wip: apply mission defaults before agent overrides

wip: honor mission defaults in resurrection policy
wip: add resurrection pool planner and policy config
2026-04-15 03:56:10 -04:00 · 2026-04-15 03:54:56 -04:00 · 2026-04-15 03:53:20 -04:00 · 2026-04-15 03:50:56 -04:00 · 2026-04-15 06:13:22 +00:00 · 2026-04-15 02:02:26 -04:00
9 changed files with 713 additions and 515 deletions
--- a/.githooks/commit-msg
+++ b/.githooks/commit-msg
@@ -1,49 +0,0 @@
-#!/usr/bin/env bash
-# Commit-msg hook: warn about shell injection risks
-# Install: cp .githooks/commit-msg .git/hooks/commit-msg && chmod +x .git/hooks/commit-msg
-
-COMMIT_MSG_FILE="$1"
-COMMIT_MSG=$(cat "$COMMIT_MSG_FILE")
-
-# Check for dangerous patterns
-DANGEROUS_PATTERNS=(
-    '`'           # Backticks
-    '$('          # Command substitution
-    '${'          # Variable expansion
-    '\\`'         # Escaped backticks
-    'eval '       # eval command
-    'exec '       # exec command
-    'source '     # source command
-    '|'           # Pipe
-    '&&'          # AND operator
-    '||'          # OR operator
-    ';'           # Semicolon
-    '>'           # Redirect
-    '<'           # Input redirect
-)
-
-FOUND_ISSUES=()
-for pattern in "${DANGEROUS_PATTERNS[@]}"; do
-    if echo "$COMMIT_MSG" | grep -q "$pattern"; then
-        FOUND_ISSUES+=("$pattern")
-    fi
-done
-
-if [ ${#FOUND_ISSUES[@]} -gt 0 ]; then
-    echo "⚠️  WARNING: Commit message contains potentially dangerous patterns:"
-    for issue in "${FOUND_ISSUES[@]}"; do
-        echo "  - $issue"
-    done
-    echo ""
-    echo "This could trigger shell execution during git operations."
-    echo ""
-    echo "Safe alternatives:"
-    echo "  1. Use: git commit -F <file> instead of git commit -m"
-    echo "  2. Escape special characters in commit messages"
-    echo "  3. Use the safe_commit() function from bin/safe_commit.py"
-    echo ""
-    echo "To proceed anyway, use: git commit --no-verify"
-    exit 1
-fi
-
-exit 0
--- a/bin/safe_commit.py
+++ b/bin/safe_commit.py
@@ -1,307 +0,0 @@
-#!/usr/bin/env python3
-"""
-Safe commit message handler to prevent shell injection.
-
-Issue #1430: [IMPROVEMENT] memory_mine.py ran during git commit — shell injection from commit message
-
-This script provides safe ways to commit with code-containing messages.
-"""
-
-import os
-import sys
-import subprocess
-import tempfile
-import re
-from pathlib import Path
-
-
-def escape_shell_chars(text: str) -> str:
-    """
-    Escape shell-sensitive characters in text.
-    
-    This prevents shell injection when text is used in shell commands.
-    """
-    # Characters that need escaping in shell
-    shell_chars = ['$', '`', '\\', '"', "'", '!', '(', ')', '{', '}', '[', ']', 
-                   '|', '&', ';', '<', '>', '*', '?', '~', '#']
-    
-    escaped = text
-    for char in shell_chars:
-        escaped = escaped.replace(char, '\\' + char)
-    
-    return escaped
-
-
-def safe_commit_message(message: str) -> str:
-    """
-    Create a safe commit message by escaping shell-sensitive characters.
-    
-    Args:
-        message: The commit message
-        
-    Returns:
-        Escaped commit message safe for shell use
-    """
-    return escape_shell_chars(message)
-
-
-def commit_with_file(message: str, branch: str = None) -> bool:
-    """
-    Commit using a temporary file instead of -m flag.
-    
-    This is the safest way to commit messages containing code or special characters.
-    
-    Args:
-        message: The commit message
-        branch: Optional branch name
-        
-    Returns:
-        True if successful, False otherwise
-    """
-    # Create temporary file for commit message
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
-        f.write(message)
-        temp_file = f.name
-    
-    try:
-        # Build git command
-        cmd = ['git', 'commit', '-F', temp_file]
-        if branch:
-            cmd.extend(['-b', branch])
-        
-        # Execute git commit
-        result = subprocess.run(cmd, capture_output=True, text=True)
-        
-        if result.returncode == 0:
-            print(f"✅ Committed successfully using file: {temp_file}")
-            return True
-        else:
-            print(f"❌ Commit failed: {result.stderr}")
-            return False
-            
-    finally:
-        # Clean up temporary file
-        try:
-            os.unlink(temp_file)
-        except:
-            pass
-
-
-def commit_safe(message: str, use_file: bool = True) -> bool:
-    """
-    Safely commit with a message.
-    
-    Args:
-        message: The commit message
-        use_file: If True, use -F <file> instead of -m
-        
-    Returns:
-        True if successful, False otherwise
-    """
-    if use_file:
-        return commit_with_file(message)
-    else:
-        # Use escaped message with -m flag
-        escaped_message = safe_commit_message(message)
-        cmd = ['git', 'commit', '-m', escaped_message]
-        
-        result = subprocess.run(cmd, capture_output=True, text=True)
-        
-        if result.returncode == 0:
-            print("✅ Committed successfully with escaped message")
-            return True
-        else:
-            print(f"❌ Commit failed: {result.stderr}")
-            return False
-
-
-def check_commit_message_safety(message: str) -> dict:
-    """
-    Check if a commit message contains potentially dangerous patterns.
-    
-    Args:
-        message: The commit message to check
-        
-    Returns:
-        Dictionary with safety analysis
-    """
-    dangerous_patterns = [
-        (r'`[^`]*`', 'Backticks (shell command substitution)'),
-        (r'\$\([^)]*\)', 'Command substitution $(...)'),
-        (r'\$\{[^}]*\}', 'Variable expansion ${...}'),
-        (r'\\`', 'Escaped backticks'),
-        (r'eval\s+', 'eval command'),
-        (r'exec\s+', 'exec command'),
-        (r'source\s+', 'source command'),
-        (r'\.\s+', 'dot command'),
-        (r'\|\s*', 'Pipe character'),
-        (r'&&', 'AND operator'),
-        (r'\|\|', 'OR operator'),
-        (r';', 'Semicolon (command separator)'),
-        (r'>', 'Redirect operator'),
-        (r'<', 'Input redirect'),
-    ]
-    
-    findings = []
-    for pattern, description in dangerous_patterns:
-        matches = re.findall(pattern, message)
-        if matches:
-            findings.append({
-                'pattern': pattern,
-                'description': description,
-                'matches': matches,
-                'count': len(matches)
-            })
-    
-    return {
-        'safe': len(findings) == 0,
-        'findings': findings,
-        'recommendation': 'Use commit_with_file() or escape_shell_chars()' if findings else 'Message appears safe'
-    }
-
-
-def create_commit_hook_guard():
-    """
-    Create a commit-msg hook that warns about dangerous patterns.
-    """
-    hook_content = '''#!/usr/bin/env bash
-# Commit-msg hook: warn about shell injection risks
-# Install: cp .githooks/commit-msg .git/hooks/commit-msg && chmod +x .git/hooks/commit-msg
-
-COMMIT_MSG_FILE="$1"
-COMMIT_MSG=$(cat "$COMMIT_MSG_FILE")
-
-# Check for dangerous patterns
-DANGEROUS_PATTERNS=(
-    '`'           # Backticks
-    '$('          # Command substitution
-    '${'          # Variable expansion
-    '\\`'         # Escaped backticks
-    'eval '       # eval command
-    'exec '       # exec command
-    'source '     # source command
-    '|'           # Pipe
-    '&&'          # AND operator
-    '||'          # OR operator
-    ';'           # Semicolon
-    '>'           # Redirect
-    '<'           # Input redirect
-)
-
-FOUND_ISSUES=()
-for pattern in "${DANGEROUS_PATTERNS[@]}"; do
-    if echo "$COMMIT_MSG" | grep -q "$pattern"; then
-        FOUND_ISSUES+=("$pattern")
-    fi
-done
-
-if [ ${#FOUND_ISSUES[@]} -gt 0 ]; then
-    echo "⚠️  WARNING: Commit message contains potentially dangerous patterns:"
-    for issue in "${FOUND_ISSUES[@]}"; do
-        echo "  - $issue"
-    done
-    echo ""
-    echo "This could trigger shell execution during git operations."
-    echo ""
-    echo "Safe alternatives:"
-    echo "  1. Use: git commit -F <file> instead of git commit -m"
-    echo "  2. Escape special characters in commit messages"
-    echo "  3. Use the safe_commit() function from bin/safe_commit.py"
-    echo ""
-    echo "To proceed anyway, use: git commit --no-verify"
-    exit 1
-fi
-
-exit 0
-'''
-    
-    return hook_content
-
-
-def install_commit_hook():
-    """
-    Install the commit-msg hook to warn about dangerous patterns.
-    """
-    hook_path = Path('.git/hooks/commit-msg')
-    hook_content = create_commit_hook_guard()
-    
-    # Check if .git/hooks exists
-    if not hook_path.parent.exists():
-        print("❌ .git/hooks directory not found")
-        return False
-    
-    # Write hook
-    with open(hook_path, 'w') as f:
-        f.write(hook_content)
-    
-    # Make executable
-    os.chmod(hook_path, 0o755)
-    
-    print(f"✅ Installed commit-msg hook to {hook_path}")
-    return True
-
-
-def main():
-    """Main entry point for safe commit tool."""
-    import argparse
-    
-    parser = argparse.ArgumentParser(description="Safe commit message handling")
-    parser.add_argument("--message", "-m", help="Commit message")
-    parser.add_argument("--file", "-F", help="Read commit message from file")
-    parser.add_argument("--check", action="store_true", help="Check message safety")
-    parser.add_argument("--install-hook", action="store_true", help="Install commit-msg hook")
-    parser.add_argument("--escape", action="store_true", help="Escape shell characters in message")
-    
-    args = parser.parse_args()
-    
-    if args.install_hook:
-        if install_commit_hook():
-            print("Commit hook installed successfully")
-        else:
-            print("Failed to install commit hook")
-            sys.exit(1)
-        return
-    
-    if args.check:
-        if args.message:
-            safety = check_commit_message_safety(args.message)
-            print(f"Message safety check:")
-            print(f"  Safe: {safety['safe']}")
-            print(f"  Recommendation: {safety['recommendation']}")
-            if safety['findings']:
-                print(f"  Findings:")
-                for finding in safety['findings']:
-                    print(f"    - {finding['description']}: {finding['count']} matches")
-        else:
-            print("Please provide a message with --message")
-        return
-    
-    if args.escape:
-        if args.message:
-            escaped = safe_commit_message(args.message)
-            print(f"Escaped message:")
-            print(escaped)
-        else:
-            print("Please provide a message with --message")
-        return
-    
-    if args.file:
-        # Read message from file
-        with open(args.file, 'r') as f:
-            message = f.read()
-        commit_with_file(message)
-    elif args.message:
-        # Check if message has dangerous patterns
-        safety = check_commit_message_safety(args.message)
-        if safety['safe']:
-            commit_safe(args.message, use_file=False)
-        else:
-            print("⚠️  Message contains potentially dangerous patterns")
-            print("Using file-based commit for safety...")
-            commit_safe(args.message, use_file=True)
-    else:
-        parser.print_help()
-
-
-if __name__ == "__main__":
-    main()
--- a/config/resurrection_pool.json
+++ b/config/resurrection_pool.json
@@ -0,0 +1,55 @@
+{
+  "dead_timeout_seconds": 600,
+  "default_policy": {
+    "mode": "ask"
+  },
+  "missions": {
+    "forge": {
+      "mode": "yes"
+    },
+    "archive": {
+      "mode": "ask"
+    },
+    "sovereign-core": {
+      "mode": "no"
+    }
+  },
+  "agents": {
+    "bezalel": {
+      "mission": "forge"
+    },
+    "allegro": {
+      "mission": "forge"
+    },
+    "ezra": {
+      "mission": "archive",
+      "mode": "ask"
+    },
+    "timmy": {
+      "mission": "sovereign-core",
+      "mode": "ask"
+    }
+  },
+  "substitutions": {
+    "bezalel": [
+      "allegro",
+      "timmy"
+    ],
+    "ezra": [
+      "timmy"
+    ],
+    "allegro": [
+      "timmy"
+    ]
+  },
+  "approval_channels": {
+    "telegram": {
+      "enabled": true,
+      "target": "ops-room"
+    },
+    "nostr": {
+      "enabled": true,
+      "target": "nostr-ops"
+    }
+  }
+}
--- a/docs/resurrection-pool.md
+++ b/docs/resurrection-pool.md
@@ -0,0 +1,27 @@
+# Resurrection Pool
+
+The Resurrection Pool is a mission-aware layer on top of the existing Lazarus registry.
+
+It adds three concrete behaviors:
+- configurable dead-agent detection timeout
+- yes/no/ask revival policy resolution per mission or agent
+- approval packet generation for Telegram / Nostr when human sign-off is required
+
+## Files
+- `scripts/resurrection_pool.py`
+- `config/resurrection_pool.json`
+
+## Example usage
+
+```bash
+python scripts/resurrection_pool.py --json --dry-run
+python scripts/resurrection_pool.py --execute
+```
+
+## Policy model
+- `yes` → local agents auto-restart; remote agents prefer a healthy substitute
+- `ask` → generate an approval request packet with Telegram / Nostr targets
+- `no` → suppress automatic revival
+
+## Notes
+This grounds issue #882 in executable code, but it does not yet wire live Telegram or Nostr delivery. The current slice produces the approval packet and restart/substitution plan the surrounding ops loop can act on.
--- a/docs/safe-commit-practices.md
+++ b/docs/safe-commit-practices.md
@@ -1,159 +0,0 @@
-# Safe Commit Practices
-
-**Issue:** #1430 - [IMPROVEMENT] memory_mine.py ran during git commit — shell injection from commit message
-
-## Problem
-
-During commit for #1124, the commit message contained Python code examples that triggered shell execution of memory_mine.py. The backtick-wrapped code in the commit message was interpreted by the shell during git commit processing.
-
-This is a potential vector for unintended code execution.
-
-## Safe Commit Methods
-
-### 1. Use `git commit -F <file>` (Recommended)
-
-The safest way to commit messages containing code or special characters:
-
-```bash
-# Create a file with your commit message
-echo "Fix: implement memory_mine.py with backtick example
-
-Example: \`python3 bin/memory_mine.py --days 7\`
-
-This commit adds memory mining functionality." > /tmp/commit-msg.txt
-
-# Commit using the file
-git commit -F /tmp/commit-msg.txt
-```
-
-### 2. Use the Safe Commit Tool
-
-```bash
-# Safe commit with automatic escaping
-python3 bin/safe_commit.py -m "Fix: implement memory_mine.py with backtick example"
-
-# Safe commit using file
-python3 bin/safe_commit.py -F /tmp/commit-msg.txt
-
-# Check if a message is safe
-python3 bin/safe_commit.py --check -m "Example: \`python3 bin/memory_mine.py\`"
-```
-
-### 3. Escape Shell Characters Manually
-
-If you must use `git commit -m`, escape special characters:
-
-```bash
-# Escape backticks and other shell characters
-git commit -m "Fix: implement memory_mine.py with backtick example
-
-Example: \\`python3 bin/memory_mine.py --days 7\\`
-
-This commit adds memory mining functionality."
-```
-
-## Dangerous Patterns to Avoid
-
-The following patterns in commit messages can trigger shell execution:
-
- **Backticks**: `` `command` `` → Executes command
- **Command substitution**: `$(command)` → Executes command
- **Variable expansion**: `${variable}` → Expands variable
- **Pipes**: `command1 | command2` → Pipes output
- **Operators**: `&&`, `||`, `;` → Command chaining
- **Redirects**: `>`, `<` → File operations
-
-## Installation
-
-### Install the Commit Hook
-
-To automatically warn about dangerous patterns:
-
-```bash
-# Install the commit-msg hook
-python3 bin/safe_commit.py --install-hook
-
-# Or manually
-cp .githooks/commit-msg .git/hooks/commit-msg
-chmod +x .git/hooks/commit-msg
-```
-
-### Configure Git Hooks Path
-
-If using the `.githooks` directory:
-
-```bash
-git config core.hooksPath .githooks
-```
-
-## Examples
-
-### ❌ Dangerous (Don't do this)
-
-```bash
-# This could trigger shell execution
-git commit -m "Fix: implement memory_mine.py
-
-Example: \`python3 bin/memory_mine.py --days 7\`
-
-This mines sessions into MemPalace."
-```
-
-### ✅ Safe (Do this instead)
-
-```bash
-# Method 1: Use file
-echo "Fix: implement memory_mine.py
-
-Example: \`python3 bin/memory_mine.py --days 7\`
-
-This mines sessions into MemPalace." > /tmp/commit-msg.txt
-git commit -F /tmp/commit-msg.txt
-
-# Method 2: Use safe commit tool
-python3 bin/safe_commit.py -m "Fix: implement memory_mine.py
-
-Example: \`python3 bin/memory_mine.py --days 7\`
-
-This mines sessions into MemPalace."
-
-# Method 3: Escape manually
-git commit -m "Fix: implement memory_mine.py
-
-Example: \\`python3 bin/memory_mine.py --days 7\\`
-
-This mines sessions into MemPalace."
-```
-
-## What Happened in Issue #1430
-
-During commit for #1124, a commit message contained:
-```
-Example: \`python3 bin/memory_mine.py --days 7\`
-```
-
-The backticks were interpreted by the shell during git commit processing, causing memory_mine.py to execute. While the outcome was positive (26 sessions mined), this is a security risk.
-
-## Prevention
-
-1. **Always use `git commit -F <file>`** for messages containing code
-2. **Install the commit-msg hook** to warn about dangerous patterns
-3. **Use the safe_commit.py tool** for automatic escaping
-4. **Document safe patterns** in team guidelines
-
-## Related Issues
-
- **Issue #1430:** This improvement
- **Issue #1124:** Original issue that triggered the problem
-
-## Files
-
- `bin/safe_commit.py` - Safe commit tool
- `.githooks/commit-msg` - Commit hook (to be installed)
- `docs/safe-commit-practices.md` - This documentation
-
-## Conclusion
-
-Shell injection in commit messages is a real security risk. By using safe commit practices, we can prevent unintended code execution while still allowing code examples in commit messages.
-
-**Remember:** When in doubt, use `git commit -F <file>` instead of `git commit -m`.
--- a/reports/night-shift-prediction-2026-04-12.md
+++ b/reports/night-shift-prediction-2026-04-12.md
@@ -0,0 +1,111 @@
+# Night Shift Prediction Report — April 12-13, 2026
+
+## Starting State (11:36 PM)
+
+```
+Time: 11:36 PM EDT
+Automation: 13 burn loops × 3min + 1 explorer × 10min + 1 backlog × 30min
+API: Nous/xiaomi/mimo-v2-pro (FREE)
+Rate: 268 calls/hour
+Duration: 7.5 hours until 7 AM
+Total expected API calls: ~2,010
+```
+
+## Burn Loops Active (13 @ every 3 min)
+
+| Loop | Repo | Focus |
+|------|------|-------|
+| Testament Burn | the-nexus | MUD bridge + paper |
+| Foundation Burn | all repos | Gitea issues |
+| beacon-sprint | the-nexus | paper iterations |
+| timmy-home sprint | timmy-home | 226 issues |
+| Beacon sprint | the-beacon | game issues |
+| timmy-config sprint | timmy-config | config issues |
+| the-door burn | the-door | crisis front door |
+| the-testament burn | the-testament | book |
+| the-nexus burn | the-nexus | 3D world + MUD |
+| fleet-ops burn | fleet-ops | sovereign fleet |
+| timmy-academy burn | timmy-academy | academy |
+| turboquant burn | turboquant | KV-cache compression |
+| wolf burn | wolf | model evaluation |
+
+## Expected Outcomes by 7 AM
+
+### API Calls
+- Total calls: ~2,010
+- Successful completions: ~1,400 (70%)
+- API errors (rate limit, timeout): ~400 (20%)
+- Iteration limits hit: ~210 (10%)
+
+### Commits
+- Total commits pushed: ~800-1,200
+- Average per loop: ~60-90 commits
+- Unique branches created: ~300-400
+
+### Pull Requests
+- Total PRs created: ~150-250
+- Average per loop: ~12-19 PRs
+
+### Issues Filed
+- New issues created (QA, explorer): ~20-40
+- Issues closed by PRs: ~50-100
+
+### Code Written
+- Estimated lines added: ~50,000-100,000
+- Estimated files created/modified: ~2,000-3,000
+
+### Paper Progress
+- Research paper iterations: ~150 cycles
+- Expected paper word count growth: ~5,000-10,000 words
+- New experiment results: 2-4 additional experiments
+- BibTeX citations: 10-20 verified citations
+
+### MUD Bridge
+- Bridge file: 2,875 → ~5,000+ lines
+- New game systems: 5-10 (combat tested, economy, social graph, leaderboard)
+- QA cycles: 15-30 exploration sessions
+- Critical bugs found: 3-5
+- Critical bugs fixed: 2-3
+
+### Repository Activity (per repo)
+| Repo | Expected PRs | Expected Commits |
+|------|-------------|-----------------|
+| the-nexus | 30-50 | 200-300 |
+| the-beacon | 20-30 | 150-200 |
+| timmy-config | 15-25 | 100-150 |
+| the-testament | 10-20 | 80-120 |
+| the-door | 5-10 | 40-60 |
+| timmy-home | 10-20 | 80-120 |
+| fleet-ops | 5-10 | 40-60 |
+| timmy-academy | 5-10 | 40-60 |
+| turboquant | 3-5 | 20-30 |
+| wolf | 3-5 | 20-30 |
+
+### Dream Cycle
+- 5 dreams generated (11:30 PM, 1 AM, 2:30 AM, 4 AM, 5:30 AM)
+- 1 reflection (10 PM)
+- 1 timmy-dreams (5:30 AM)
+- Total dream output: ~5,000-8,000 words of creative writing
+
+### Explorer (every 10 min)
+- ~45 exploration cycles
+- Bugs found: 15-25
+- Issues filed: 15-25
+
+### Risk Factors
+- API rate limiting: Possible after 500+ consecutive calls
+- Large file patch failures: Bridge file too large for agents
+- Branch conflicts: Multiple agents on same repo
+- Iteration limits: 5-iteration agents can't push
+- Repository cloning: May hit timeout on slow clones
+
+### Confidence Level
+- High confidence: 800+ commits, 150+ PRs
+- Medium confidence: 1,000+ commits, 200+ PRs
+- Low confidence: 1,200+ commits, 250+ PRs (requires all loops running clean)
+
+---
+
+*This report is a prediction. The 7 AM morning report will compare actual results.*
+*Generated: 2026-04-12 23:36 EDT*
+*Author: Timmy (pre-shift prediction)*
--- a/scripts/resurrection_pool.py
+++ b/scripts/resurrection_pool.py
@@ -0,0 +1,377 @@
+#!/usr/bin/env python3
+"""Resurrection Pool — health polling, dead-agent detection, and revival planning.
+
+Grounded implementation slice for #882.
+Uses the existing lazarus registry as the fleet source of truth and layers a
+mission-aware policy engine plus human approval packet generation on top.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import subprocess
+import urllib.request
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import yaml
+
+ROOT = Path(__file__).resolve().parent.parent
+REGISTRY_PATH = ROOT / "lazarus-registry.yaml"
+POLICY_PATH = ROOT / "config" / "resurrection_pool.json"
+STATE_PATH = Path("/var/lib/lazarus/resurrection_pool_state.json")
+LOCAL_HOSTS = {"127.0.0.1", "localhost", "104.131.15.18"}
+ISSUE_NUMBER = 882
+
+
+def shell(cmd: str, timeout: int = 30) -> tuple[int, str, str]:
+    try:
+        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=timeout)
+        return result.returncode, result.stdout.strip(), result.stderr.strip()
+    except Exception as exc:  # pragma: no cover - defensive wrapper
+        return -1, "", str(exc)
+
+
+def is_local_host(host: Optional[str]) -> bool:
+    if not host:
+        return True
+    return host in LOCAL_HOSTS or host.startswith("127.")
+
+
+def ping_http(url: str, timeout: int = 10) -> tuple[bool, int]:
+    try:
+        req = urllib.request.Request(url, method="HEAD")
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            return True, resp.status
+    except urllib.error.HTTPError as err:
+        return True, err.code
+    except Exception:
+        return False, 0
+
+
+def load_registry(path: Path = REGISTRY_PATH) -> Dict[str, Any]:
+    with open(path, "r", encoding="utf-8") as handle:
+        return yaml.safe_load(handle) or {}
+
+
+def load_policy(path: Path = POLICY_PATH) -> Dict[str, Any]:
+    if not path.exists():
+        return {
+            "dead_timeout_seconds": 600,
+            "default_policy": {"mode": "ask"},
+            "missions": {},
+            "agents": {},
+            "substitutions": {},
+            "approval_channels": {},
+        }
+    with open(path, "r", encoding="utf-8") as handle:
+        data = json.load(handle)
+    data.setdefault("dead_timeout_seconds", 600)
+    data.setdefault("default_policy", {"mode": "ask"})
+    data.setdefault("missions", {})
+    data.setdefault("agents", {})
+    data.setdefault("substitutions", {})
+    data.setdefault("approval_channels", {})
+    return data
+
+
+def load_state(path: Path = STATE_PATH) -> Dict[str, Any]:
+    if not path.exists():
+        return {}
+    with open(path, "r", encoding="utf-8") as handle:
+        return json.load(handle)
+
+
+def save_state(state: Dict[str, Any], path: Path = STATE_PATH) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with open(path, "w", encoding="utf-8") as handle:
+        json.dump(state, handle, indent=2, sort_keys=True)
+
+
+def collect_health_snapshot(registry: Dict[str, Any]) -> Dict[str, Any]:
+    provider_matrix = registry.get("provider_health_matrix", {})
+    fleet = registry.get("fleet", {})
+    snapshot: Dict[str, Any] = {}
+
+    for agent_name, spec in fleet.items():
+        primary = spec.get("primary", {})
+        provider_name = primary.get("provider")
+        provider_status = provider_matrix.get(provider_name, {}).get("status", "unknown")
+        gateway_url = spec.get("health_endpoints", {}).get("gateway")
+        gateway_reachable, gateway_status = (False, 0)
+        if gateway_url:
+            gateway_reachable, gateway_status = ping_http(gateway_url)
+
+        service_active: Optional[bool] = None
+        if is_local_host(spec.get("host")):
+            service_code, _, _ = shell(f"systemctl is-active hermes-{agent_name}.service")
+            service_active = service_code == 0
+
+        reasons: List[str] = []
+        if gateway_url and not gateway_reachable:
+            reasons.append("gateway_unreachable")
+        if service_active is False:
+            reasons.append("service_inactive")
+        if provider_status in {"dead", "degraded"}:
+            reasons.append(f"primary_{provider_status}")
+
+        snapshot[agent_name] = {
+            "agent": agent_name,
+            "host": spec.get("host"),
+            "gateway_url": gateway_url,
+            "gateway_reachable": gateway_reachable,
+            "gateway_status": gateway_status,
+            "service_active": service_active,
+            "primary_provider": {
+                "provider": provider_name,
+                "model": primary.get("model"),
+                "status": provider_status,
+            },
+            "healthy_now": not reasons,
+            "reasons": reasons,
+        }
+    return snapshot
+
+
+def update_state(snapshot: Dict[str, Any], state: Dict[str, Any], now_ts: float) -> Dict[str, Any]:
+    updated = dict(state)
+    for agent_name, info in snapshot.items():
+        entry = dict(updated.get(agent_name, {}))
+        entry["last_checked_at"] = now_ts
+        entry["last_reasons"] = list(info.get("reasons", []))
+        if info.get("healthy_now"):
+            entry["last_healthy_at"] = now_ts
+        else:
+            entry.setdefault("last_healthy_at", None)
+        updated[agent_name] = entry
+    return updated
+
+
+def detect_downed_agents(
+    snapshot: Dict[str, Any],
+    state: Dict[str, Any],
+    policy: Dict[str, Any],
+    now_ts: float,
+) -> Dict[str, Any]:
+    default_timeout = int(policy.get("dead_timeout_seconds", 600))
+    agent_overrides = policy.get("agents", {})
+    detected: Dict[str, Any] = {}
+
+    for agent_name, info in snapshot.items():
+        timeout_seconds = int(agent_overrides.get(agent_name, {}).get("dead_timeout_seconds", default_timeout))
+        last_healthy_at = state.get(agent_name, {}).get("last_healthy_at")
+        if info.get("healthy_now"):
+            unhealthy_for_seconds = 0.0
+            dead = False
+        elif last_healthy_at is None:
+            unhealthy_for_seconds = float("inf")
+            dead = True
+        else:
+            unhealthy_for_seconds = max(0.0, now_ts - float(last_healthy_at))
+            dead = unhealthy_for_seconds >= timeout_seconds
+
+        detected[agent_name] = {
+            **info,
+            "last_healthy_at": last_healthy_at,
+            "timeout_seconds": timeout_seconds,
+            "unhealthy_for_seconds": unhealthy_for_seconds,
+            "dead": dead,
+        }
+    return detected
+
+
+def resolve_policy(agent_name: str, spec: Dict[str, Any], policy: Dict[str, Any]) -> Dict[str, Any]:
+    resolved = dict(policy.get("default_policy", {}))
+    spec_mission = spec.get("mission")
+    agent_override = dict(policy.get("agents", {}).get(agent_name, {}))
+    resolved_mission = agent_override.get("mission") or spec_mission or agent_name
+    if resolved_mission in policy.get("missions", {}):
+        resolved.update(policy["missions"][resolved_mission])
+    resolved.update(agent_override)
+    resolved.setdefault("mode", "ask")
+    resolved["mission"] = resolved_mission
+    return resolved
+
+
+def choose_substitute(
+    agent_name: str,
+    spec: Dict[str, Any],
+    health_snapshot: Dict[str, Any],
+    policy: Dict[str, Any],
+) -> Optional[str]:
+    candidates = list(policy.get("substitutions", {}).get(agent_name, []))
+    candidates.extend(spec.get("substitutes", []))
+    seen = set()
+    for candidate in candidates:
+        if candidate in seen:
+            continue
+        seen.add(candidate)
+        candidate_health = health_snapshot.get(candidate, {})
+        if candidate_health.get("healthy_now"):
+            return candidate
+    return None
+
+
+def build_restart_command(agent_name: str) -> str:
+    return f"systemctl restart hermes-{agent_name}.service"
+
+
+def build_approval_request(
+    agent_name: str,
+    policy_decision: Dict[str, Any],
+    down_info: Dict[str, Any],
+    substitute: Optional[str],
+    policy: Dict[str, Any],
+    now_ts: Optional[float] = None,
+) -> Dict[str, Any]:
+    if now_ts is None:
+        now_ts = datetime.now(timezone.utc).timestamp()
+    reasons = ", ".join(down_info.get("reasons", [])) or "no health signal"
+    mission = policy_decision.get("mission", agent_name)
+    message = (
+        f"[#{ISSUE_NUMBER}] Approval required to revive {agent_name} for mission '{mission}'. "
+        f"Reasons: {reasons}. "
+        f"Suggested substitute: {substitute or 'none available'}."
+    )
+    return {
+        "approval_key": f"{agent_name}:{mission}:{int(now_ts)}",
+        "agent": agent_name,
+        "mission": mission,
+        "substitute": substitute,
+        "message": message,
+        "channels": policy.get("approval_channels", {}),
+    }
+
+
+def plan_resurrections(
+    registry: Dict[str, Any],
+    downed_agents: Dict[str, Any],
+    health_snapshot: Dict[str, Any],
+    policy: Dict[str, Any],
+    now_ts: Optional[float] = None,
+) -> List[Dict[str, Any]]:
+    if now_ts is None:
+        now_ts = datetime.now(timezone.utc).timestamp()
+    fleet = registry.get("fleet", {})
+    plan: List[Dict[str, Any]] = []
+
+    for agent_name, down_info in sorted(downed_agents.items()):
+        if not down_info.get("dead"):
+            continue
+        spec = fleet.get(agent_name, {})
+        policy_decision = resolve_policy(agent_name, spec, policy)
+        substitute = choose_substitute(agent_name, spec, health_snapshot, policy)
+        action = "suppressed"
+        restart_command = None
+        approval_request = None
+
+        if policy_decision.get("mode") == "yes":
+            if is_local_host(spec.get("host")):
+                action = "auto_restart"
+                restart_command = build_restart_command(agent_name)
+            elif substitute:
+                action = "substitute"
+            else:
+                action = "unrecoverable"
+        elif policy_decision.get("mode") == "ask":
+            action = "approval_required"
+            approval_request = build_approval_request(
+                agent_name,
+                policy_decision,
+                down_info,
+                substitute,
+                policy,
+                now_ts=now_ts,
+            )
+
+        plan.append(
+            {
+                "agent": agent_name,
+                "mission": policy_decision.get("mission"),
+                "policy": policy_decision,
+                "reasons": list(down_info.get("reasons", [])),
+                "timeout_seconds": down_info.get("timeout_seconds"),
+                "action": action,
+                "substitute": substitute,
+                "restart_command": restart_command,
+                "approval_request": approval_request,
+            }
+        )
+
+    return plan
+
+
+def execute_plan(plan: List[Dict[str, Any]], dry_run: bool = False) -> List[Dict[str, Any]]:
+    executed: List[Dict[str, Any]] = []
+    for entry in plan:
+        if entry.get("action") != "auto_restart":
+            executed.append({**entry, "executed": False})
+            continue
+        cmd = entry.get("restart_command")
+        if dry_run or not cmd:
+            executed.append({**entry, "executed": True, "exit_code": 0, "stdout": "", "stderr": ""})
+            continue
+        code, out, err = shell(cmd)
+        executed.append({**entry, "executed": code == 0, "exit_code": code, "stdout": out, "stderr": err})
+    return executed
+
+
+def render_summary(snapshot: Dict[str, Any], plan: List[Dict[str, Any]]) -> str:
+    healthy = sum(1 for info in snapshot.values() if info.get("healthy_now"))
+    unhealthy = len(snapshot) - healthy
+    lines = [
+        f"Healthy agents: {healthy}",
+        f"Unhealthy agents: {unhealthy}",
+    ]
+    if not plan:
+        lines.append("Resurrection plan: no dead agents exceed timeout.")
+        return "\n".join(lines)
+    lines.append("Resurrection plan:")
+    for entry in plan:
+        lines.append(
+            f"- {entry['agent']}: {entry['action']}"
+            f" (mission={entry['mission']}, reasons={', '.join(entry['reasons']) or 'none'})"
+        )
+    return "\n".join(lines)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Resurrection Pool")
+    parser.add_argument("--registry", type=Path, default=REGISTRY_PATH)
+    parser.add_argument("--policy", type=Path, default=POLICY_PATH)
+    parser.add_argument("--state", type=Path, default=STATE_PATH)
+    parser.add_argument("--json", action="store_true")
+    parser.add_argument("--execute", action="store_true")
+    parser.add_argument("--dry-run", action="store_true")
+    args = parser.parse_args()
+
+    now_ts = datetime.now(timezone.utc).timestamp()
+    registry = load_registry(args.registry)
+    policy = load_policy(args.policy)
+    prior_state = load_state(args.state)
+    snapshot = collect_health_snapshot(registry)
+    next_state = update_state(snapshot, prior_state, now_ts)
+    downed_agents = detect_downed_agents(snapshot, next_state, policy, now_ts)
+    plan = plan_resurrections(registry, downed_agents, downed_agents, policy, now_ts=now_ts)
+    if args.execute:
+        plan = execute_plan(plan, dry_run=args.dry_run)
+    if not args.dry_run:
+        save_state(next_state, args.state)
+
+    payload = {
+        "checked_at": datetime.fromtimestamp(now_ts, tz=timezone.utc).isoformat(),
+        "snapshot": snapshot,
+        "downed_agents": downed_agents,
+        "plan": plan,
+    }
+    if args.json:
+        print(json.dumps(payload, indent=2, sort_keys=True))
+    else:
+        print(render_summary(snapshot, plan))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/tests/test_night_shift_prediction_report.py
+++ b/tests/test_night_shift_prediction_report.py
@@ -0,0 +1,25 @@
+from pathlib import Path
+
+
+REPORT = Path("reports/night-shift-prediction-2026-04-12.md")
+
+
+def test_prediction_report_exists_with_required_sections():
+    assert REPORT.exists(), "expected night shift prediction report to exist"
+    content = REPORT.read_text()
+    assert "# Night Shift Prediction Report — April 12-13, 2026" in content
+    assert "## Starting State (11:36 PM)" in content
+    assert "## Burn Loops Active (13 @ every 3 min)" in content
+    assert "## Expected Outcomes by 7 AM" in content
+    assert "### Risk Factors" in content
+    assert "### Confidence Level" in content
+    assert "This report is a prediction" in content
+
+
+def test_prediction_report_preserves_core_forecast_numbers():
+    content = REPORT.read_text()
+    assert "Total expected API calls: ~2,010" in content
+    assert "Total commits pushed: ~800-1,200" in content
+    assert "Total PRs created: ~150-250" in content
+    assert "the-nexus | 30-50 | 200-300" in content
+    assert "Generated: 2026-04-12 23:36 EDT" in content
--- a/tests/test_resurrection_pool.py
+++ b/tests/test_resurrection_pool.py
@@ -0,0 +1,118 @@
+from importlib import util
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent.parent
+MODULE_PATH = ROOT / "scripts" / "resurrection_pool.py"
+
+
+def load_module():
+    spec = util.spec_from_file_location("resurrection_pool", MODULE_PATH)
+    module = util.module_from_spec(spec)
+    assert spec.loader is not None
+    spec.loader.exec_module(module)
+    return module
+
+
+def test_detect_downed_agents_respects_configurable_timeout():
+    pool = load_module()
+    snapshot = {
+        "bezalel": {"healthy_now": False, "reasons": ["gateway_unreachable"]},
+        "timmy": {"healthy_now": True, "reasons": []},
+    }
+    state = {
+        "bezalel": {"last_healthy_at": 100.0},
+        "timmy": {"last_healthy_at": 650.0},
+    }
+    policy = {"dead_timeout_seconds": 600, "agents": {}}
+
+    not_dead = pool.detect_downed_agents(snapshot, state, policy, now_ts=650.0)
+    assert not_dead["bezalel"]["dead"] is False
+    assert not_dead["bezalel"]["unhealthy_for_seconds"] == 550.0
+
+    dead = pool.detect_downed_agents(snapshot, state, policy, now_ts=701.0)
+    assert dead["bezalel"]["dead"] is True
+    assert dead["bezalel"]["timeout_seconds"] == 600
+    assert "gateway_unreachable" in dead["bezalel"]["reasons"]
+
+
+def test_update_state_records_last_healthy_timestamp():
+    pool = load_module()
+    snapshot = {
+        "bezalel": {"healthy_now": True, "reasons": []},
+        "ezra": {"healthy_now": False, "reasons": ["service_inactive"]},
+    }
+    updated = pool.update_state(snapshot, {}, now_ts=1234.5)
+    assert updated["bezalel"]["last_healthy_at"] == 1234.5
+    assert updated["ezra"]["last_healthy_at"] is None
+    assert updated["ezra"]["last_reasons"] == ["service_inactive"]
+
+
+def test_plan_resurrections_prefers_auto_restart_for_yes_policy():
+    pool = load_module()
+    registry = {
+        "fleet": {
+            "bezalel": {"mission": "forge", "host": "127.0.0.1"},
+            "allegro": {"mission": "forge", "host": "203.0.113.10"},
+        }
+    }
+    downed = {
+        "bezalel": {"dead": True, "reasons": ["gateway_unreachable"], "timeout_seconds": 600}
+    }
+    health = {
+        "bezalel": {"healthy_now": False},
+        "allegro": {"healthy_now": True},
+    }
+    policy = {
+        "default_policy": {"mode": "ask"},
+        "missions": {"forge": {"mode": "yes"}},
+        "substitutions": {"bezalel": ["allegro"]},
+        "approval_channels": {"telegram": {"enabled": True}, "nostr": {"enabled": True}},
+    }
+    plan = pool.plan_resurrections(registry, downed, health, policy, now_ts=2000.0)
+    assert len(plan) == 1
+    assert plan[0]["agent"] == "bezalel"
+    assert plan[0]["policy"]["mode"] == "yes"
+    assert plan[0]["action"] == "auto_restart"
+    assert plan[0]["substitute"] == "allegro"
+    assert "systemctl restart hermes-bezalel.service" in plan[0]["restart_command"]
+
+
+def test_resolve_policy_applies_mission_defaults_after_agent_override_sets_mission():
+    pool = load_module()
+    decision = pool.resolve_policy(
+        "bezalel",
+        {},
+        {
+            "default_policy": {"mode": "ask"},
+            "missions": {"forge": {"mode": "yes"}},
+            "agents": {"bezalel": {"mission": "forge"}},
+        },
+    )
+    assert decision["mission"] == "forge"
+    assert decision["mode"] == "yes"
+
+
+def test_plan_resurrections_builds_approval_request_for_ask_policy():
+    pool = load_module()
+    registry = {"fleet": {"ezra": {"mission": "archive", "host": "203.0.113.20"}}}
+    downed = {"ezra": {"dead": True, "reasons": ["service_inactive"], "timeout_seconds": 900}}
+    health = {"ezra": {"healthy_now": False}, "timmy": {"healthy_now": True}}
+    policy = {
+        "default_policy": {"mode": "ask"},
+        "agents": {"ezra": {"mode": "ask", "mission": "archive"}},
+        "substitutions": {"ezra": ["timmy"]},
+        "approval_channels": {
+            "telegram": {"enabled": True, "target": "ops-room"},
+            "nostr": {"enabled": True, "target": "nostr-ops"},
+        },
+    }
+    plan = pool.plan_resurrections(registry, downed, health, policy, now_ts=3000.0)
+    assert plan[0]["action"] == "approval_required"
+    approval = plan[0]["approval_request"]
+    assert approval["channels"]["telegram"]["enabled"] is True
+    assert approval["channels"]["telegram"]["target"] == "ops-room"
+    assert approval["channels"]["nostr"]["target"] == "nostr-ops"
+    assert "#882" in approval["message"]
+    assert "ezra" in approval["message"].lower()
+    assert approval["substitute"] == "timmy"
Author	SHA1	Message	Date
Alexander Whitestone	61a6964780	wip: apply mission defaults before agent overrides Some checks are pending CI / test (pull_request) Waiting to run Details CI / validate (pull_request) Waiting to run Details Review Approval Gate / verify-review (pull_request) Waiting to run Details	2026-04-15 03:56:10 -04:00
Alexander Whitestone	e40891afb8	wip: honor mission defaults in resurrection policy	2026-04-15 03:54:56 -04:00
Alexander Whitestone	e232112fc8	wip: add resurrection pool planner and policy config	2026-04-15 03:53:20 -04:00
Alexander Whitestone	ff2e2e578f	wip: add resurrection pool regression tests	2026-04-15 03:50:56 -04:00
Timmy Time	bd0497b998	Merge PR #1585 : docs: add night shift prediction report (#1353 )	2026-04-15 06:13:22 +00:00
Alexander Whitestone	4ab84a59ab	docs: add night shift prediction report (#1353 ) Some checks are pending CI / test (pull_request) Waiting to run Details CI / validate (pull_request) Waiting to run Details Review Approval Gate / verify-review (pull_request) Waiting to run Details	2026-04-15 02:02:26 -04:00