Merge branch 'main' into fix/1341

docs: add Perplexity evening session report (#1341 )
2026-04-22 01:14:43 +00:00 · 2026-04-22 01:07:38 +00:00 · 2026-04-15 09:28:34 +00:00 · 2026-04-15 09:28:33 +00:00
4 changed files with 137 additions and 840 deletions
--- a/agent/resurrection_pool.py
+++ b/agent/resurrection_pool.py
@@ -1,579 +0,0 @@
-"""
-Resurrection Pool — Health polling, dead-agent detection, auto-revive
-Issue #882: [M6-P3] Resurrection Pool — health polling, dead-agent detection, auto-revive
-
-Implement the actual resurrection pool: a polling loop that detects downed agents
-and can automatically revive them (or substitutes) back into active missions.
-"""
-
-import asyncio
-import json
-import logging
-import os
-import time
-from dataclasses import dataclass, field
-from enum import Enum
-from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Set
-from datetime import datetime, timedelta
-
-logger = logging.getLogger("hermes.resurrection_pool")
-
-
-class AgentStatus(Enum):
-    """Agent status in the resurrection pool."""
-    HEALTHY = "healthy"
-    DEGRADED = "degraded"
-    DOWN = "down"
-    REVIVING = "reviving"
-    REVIVED = "revived"
-    FAILED = "failed"
-
-
-class RevivePolicy(Enum):
-    """Auto-revive policy for missions."""
-    YES = "yes"          # Always auto-revive
-    NO = "no"            # Never auto-revive
-    ASK = "ask"          # Ask human for approval
-    SUBSTITUTE = "substitute"  # Substitute with different agent
-
-
-@dataclass
-class AgentHealth:
-    """Health status of an agent."""
-    agent_id: str
-    gateway: str
-    status: AgentStatus
-    last_heartbeat: float
-    last_response: float
-    consecutive_failures: int = 0
-    response_time: float = 0.0
-    error_message: Optional[str] = None
-
-
-@dataclass
-class MissionPolicy:
-    """Revive policy for a mission."""
-    mission_id: str
-    policy: RevivePolicy
-    timeout: int = 300  # seconds
-    substitute_agents: List[str] = field(default_factory=list)
-    approval_required: bool = False
-
-
-@dataclass
-class ReviveRequest:
-    """Request to revive an agent."""
-    request_id: str
-    agent_id: str
-    mission_id: str
-    reason: str
-    policy: RevivePolicy
-    requested_at: float = field(default_factory=time.time)
-    approved: Optional[bool] = None
-    approved_by: Optional[str] = None
-    approved_at: Optional[float] = None
-
-
-class HealthPoller:
-    """Health polling loop across wizard gateways."""
-    
-    def __init__(self, gateways: List[str], poll_interval: int = 30):
-        self.gateways = gateways
-        self.poll_interval = poll_interval
-        self.agent_health: Dict[str, AgentHealth] = {}
-        self.running = False
-        self.poll_task: Optional[asyncio.Task] = None
-    
-    async def start(self):
-        """Start health polling."""
-        self.running = True
-        logger.info(f"Starting health polling across {len(self.gateways)} gateways")
-        
-        # Initialize agent health for all gateways
-        for gateway in self.gateways:
-            self.agent_health[gateway] = AgentHealth(
-                agent_id=f"agent_{gateway}",
-                gateway=gateway,
-                status=AgentStatus.HEALTHY,
-                last_heartbeat=time.time(),
-                last_response=time.time()
-            )
-        
-        # Start polling loop
-        self.poll_task = asyncio.create_task(self._poll_loop())
-    
-    async def stop(self):
-        """Stop health polling."""
-        self.running = False
-        if self.poll_task:
-            self.poll_task.cancel()
-            try:
-                await self.poll_task
-            except asyncio.CancelledError:
-                pass
-        logger.info("Health polling stopped")
-    
-    async def _poll_loop(self):
-        """Main polling loop."""
-        while self.running:
-            try:
-                await self._poll_all_gateways()
-                await asyncio.sleep(self.poll_interval)
-            except asyncio.CancelledError:
-                break
-            except Exception as e:
-                logger.error(f"Polling error: {e}")
-                await asyncio.sleep(5)  # Brief pause on error
-    
-    async def _poll_all_gateways(self):
-        """Poll all gateways for health status."""
-        tasks = []
-        for gateway in self.gateways:
-            task = asyncio.create_task(self._poll_gateway(gateway))
-            tasks.append(task)
-        
-        # Wait for all polls to complete
-        await asyncio.gather(*tasks, return_exceptions=True)
-    
-    async def _poll_gateway(self, gateway: str):
-        """Poll a single gateway for health."""
-        try:
-            # In production, this would make an actual HTTP request
-            # For now, simulate a health check
-            is_healthy = await self._check_gateway_health(gateway)
-            
-            agent = self.agent_health.get(gateway)
-            if not agent:
-                return
-            
-            if is_healthy:
-                agent.status = AgentStatus.HEALTHY
-                agent.last_response = time.time()
-                agent.consecutive_failures = 0
-                agent.error_message = None
-            else:
-                agent.consecutive_failures += 1
-                agent.last_response = time.time()
-                
-                if agent.consecutive_failures >= 3:
-                    agent.status = AgentStatus.DOWN
-                    agent.error_message = f"Failed {agent.consecutive_failures} consecutive health checks"
-                else:
-                    agent.status = AgentStatus.DEGRADED
-            
-            agent.last_heartbeat = time.time()
-            
-        except Exception as e:
-            logger.error(f"Error polling gateway {gateway}: {e}")
-            agent = self.agent_health.get(gateway)
-            if agent:
-                agent.status = AgentStatus.DOWN
-                agent.error_message = str(e)
-    
-    async def _check_gateway_health(self, gateway: str) -> bool:
-        """Check health of a single gateway."""
-        # In production, this would:
-        # 1. Make HTTP request to gateway health endpoint
-        # 2. Check response time
-        # 3. Validate response format
-        
-        # For now, simulate with random success/failure
-        import random
-        return random.random() > 0.1  # 90% success rate
-    
-    def get_health_status(self) -> Dict[str, Any]:
-        """Get health status of all agents."""
-        return {
-            gateway: {
-                "agent_id": agent.agent_id,
-                "status": agent.status.value,
-                "last_heartbeat": agent.last_heartbeat,
-                "last_response": agent.last_response,
-                "consecutive_failures": agent.consecutive_failures,
-                "response_time": agent.response_time,
-                "error_message": agent.error_message
-            }
-            for gateway, agent in self.agent_health.items()
-        }
-    
-    def get_down_agents(self) -> List[AgentHealth]:
-        """Get list of down agents."""
-        return [agent for agent in self.agent_health.values() 
-                if agent.status == AgentStatus.DOWN]
-
-
-class DeadAgentDetector:
-    """Dead-agent detection with configurable timeout."""
-    
-    def __init__(self, health_poller: HealthPoller, timeout: int = 300):
-        self.health_poller = health_poller
-        self.timeout = timeout  # seconds
-        self.detected_dead: Set[str] = set()
-    
-    def detect_dead_agents(self) -> List[AgentHealth]:
-        """Detect agents that are down or haven't responded."""
-        dead_agents = []
-        
-        for gateway, agent in self.health_poller.agent_health.items():
-            # Check if agent is marked as down
-            if agent.status == AgentStatus.DOWN:
-                dead_agents.append(agent)
-                self.detected_dead.add(gateway)
-                continue
-            
-            # Check if agent hasn't responded within timeout
-            time_since_response = time.time() - agent.last_response
-            if time_since_response > self.timeout:
-                agent.status = AgentStatus.DOWN
-                agent.error_message = f"No response for {time_since_response:.0f} seconds"
-                dead_agents.append(agent)
-                self.detected_dead.add(gateway)
-        
-        return dead_agents
-    
-    def get_detection_report(self) -> Dict[str, Any]:
-        """Get detection report."""
-        dead_agents = self.detect_dead_agents()
-        
-        return {
-            "timestamp": datetime.now().isoformat(),
-            "total_agents": len(self.health_poller.agent_health),
-            "dead_agents": len(dead_agents),
-            "dead_agent_ids": [agent.agent_id for agent in dead_agents],
-            "timeout_seconds": self.timeout,
-            "previously_detected": len(self.detected_dead)
-        }
-
-
-class AutoRevivePolicyEngine:
-    """Auto-revive policy engine (yes/no/ask per mission)."""
-    
-    def __init__(self):
-        self.mission_policies: Dict[str, MissionPolicy] = {}
-        self.default_policy = RevivePolicy.ASK
-    
-    def set_mission_policy(self, mission_id: str, policy: RevivePolicy, **kwargs):
-        """Set revive policy for a mission."""
-        self.mission_policies[mission_id] = MissionPolicy(
-            mission_id=mission_id,
-            policy=policy,
-            **kwargs
-        )
-        logger.info(f"Set revive policy for mission {mission_id}: {policy.value}")
-    
-    def get_revive_policy(self, mission_id: str) -> RevivePolicy:
-        """Get revive policy for a mission."""
-        policy = self.mission_policies.get(mission_id)
-        return policy.policy if policy else self.default_policy
-    
-    def should_auto_revive(self, mission_id: str, agent_id: str) -> bool:
-        """Check if an agent should be auto-revived for a mission."""
-        policy = self.get_revive_policy(mission_id)
-        
-        if policy == RevivePolicy.YES:
-            return True
-        elif policy == RevivePolicy.NO:
-            return False
-        elif policy == RevivePolicy.ASK:
-            return False  # Requires human approval
-        elif policy == RevivePolicy.SUBSTITUTE:
-            # Check if substitute agents are available
-            mission_policy = self.mission_policies.get(mission_id)
-            if mission_policy and mission_policy.substitute_agents:
-                return True
-            return False
-        
-        return False
-    
-    def get_substitute_agent(self, mission_id: str, dead_agent_id: str) -> Optional[str]:
-        """Get substitute agent for a dead agent."""
-        mission_policy = self.mission_policies.get(mission_id)
-        if not mission_policy or not mission_policy.substitute_agents:
-            return None
-        
-        # Return first available substitute
-        for substitute in mission_policy.substitute_agents:
-            if substitute != dead_agent_id:
-                return substitute
-        
-        return None
-
-
-class HumanInTheLoopApproval:
-    """Human-in-the-loop revival via Telegram / Nostr approval."""
-    
-    def __init__(self):
-        self.pending_requests: Dict[str, ReviveRequest] = {}
-        self.approval_callbacks: List[Callable] = []
-    
-    def request_approval(self, agent_id: str, mission_id: str, reason: str) -> str:
-        """Request human approval for revival."""
-        request_id = f"revive_{int(time.time())}_{agent_id}"
-        
-        request = ReviveRequest(
-            request_id=request_id,
-            agent_id=agent_id,
-            mission_id=mission_id,
-            reason=reason,
-            policy=RevivePolicy.ASK
-        )
-        
-        self.pending_requests[request_id] = request
-        logger.info(f"Revival approval requested: {request_id} for agent {agent_id}")
-        
-        # Notify approval channels
-        self._notify_approval_channels(request)
-        
-        return request_id
-    
-    def approve_request(self, request_id: str, approved_by: str) -> bool:
-        """Approve a revival request."""
-        if request_id not in self.pending_requests:
-            raise ValueError(f"Request {request_id} not found")
-        
-        request = self.pending_requests[request_id]
-        request.approved = True
-        request.approved_by = approved_by
-        request.approved_at = time.time()
-        
-        logger.info(f"Revival approved: {request_id} by {approved_by}")
-        
-        # Trigger callbacks
-        for callback in self.approval_callbacks:
-            try:
-                callback(request)
-            except Exception as e:
-                logger.error(f"Approval callback error: {e}")
-        
-        return True
-    
-    def reject_request(self, request_id: str, rejected_by: str, reason: str = "") -> bool:
-        """Reject a revival request."""
-        if request_id not in self.pending_requests:
-            raise ValueError(f"Request {request_id} not found")
-        
-        request = self.pending_requests[request_id]
-        request.approved = False
-        request.approved_by = rejected_by
-        request.approved_at = time.time()
-        
-        logger.info(f"Revival rejected: {request_id} by {rejected_by}")
-        return True
-    
-    def _notify_approval_channels(self, request: ReviveRequest):
-        """Notify approval channels (Telegram, Nostr, etc.)."""
-        # In production, this would:
-        # 1. Send Telegram message to approval group
-        # 2. Post to Nostr for decentralized approval
-        # 3. Send email to administrators
-        
-        logger.info(f"Approval notification sent for request {request.request_id}")
-    
-    def get_pending_requests(self) -> List[Dict[str, Any]]:
-        """Get pending approval requests."""
-        requests = []
-        for request_id, request in self.pending_requests.items():
-            if request.approved is None:  # Still pending
-                requests.append({
-                    "request_id": request.request_id,
-                    "agent_id": request.agent_id,
-                    "mission_id": request.mission_id,
-                    "reason": request.reason,
-                    "requested_at": request.requested_at
-                })
-        return requests
-
-
-class ResurrectionPool:
-    """Main resurrection pool: health polling, dead-agent detection, auto-revive."""
-    
-    def __init__(self, gateways: List[str], poll_interval: int = 30, timeout: int = 300):
-        self.health_poller = HealthPoller(gateways, poll_interval)
-        self.dead_agent_detector = DeadAgentDetector(self.health_poller, timeout)
-        self.policy_engine = AutoRevivePolicyEngine()
-        self.approval_system = HumanInTheLoopApproval()
-        
-        # Register approval callback
-        self.approval_system.approval_callbacks.append(self._on_approval)
-    
-    async def start(self):
-        """Start the resurrection pool."""
-        logger.info("Starting resurrection pool")
-        await self.health_poller.start()
-    
-    async def stop(self):
-        """Stop the resurrection pool."""
-        logger.info("Stopping resurrection pool")
-        await self.health_poller.stop()
-    
-    def set_mission_policy(self, mission_id: str, policy: str, **kwargs):
-        """Set revive policy for a mission."""
-        policy_enum = RevivePolicy(policy)
-        self.policy_engine.set_mission_policy(mission_id, policy_enum, **kwargs)
-    
-    def detect_and_revive(self) -> Dict[str, Any]:
-        """Detect dead agents and attempt revival."""
-        # Detect dead agents
-        dead_agents = self.dead_agent_detector.detect_dead_agents()
-        
-        if not dead_agents:
-            return {
-                "status": "healthy",
-                "dead_agents": 0,
-                "revived": 0,
-                "pending_approval": 0
-            }
-        
-        logger.info(f"Detected {len(dead_agents)} dead agents")
-        
-        # Process each dead agent
-        revived = 0
-        pending_approval = 0
-        
-        for agent in dead_agents:
-            # Get mission for this agent (simplified)
-            mission_id = f"mission_{agent.gateway}"
-            
-            # Check revive policy
-            should_revive = self.policy_engine.should_auto_revive(mission_id, agent.agent_id)
-            
-            if should_revive:
-                # Auto-revive
-                if self._revive_agent(agent.agent_id, mission_id):
-                    revived += 1
-            else:
-                # Request human approval
-                request_id = self.approval_system.request_approval(
-                    agent.agent_id,
-                    mission_id,
-                    f"Agent {agent.agent_id} is down: {agent.error_message}"
-                )
-                pending_approval += 1
-        
-        return {
-            "status": "processing",
-            "dead_agents": len(dead_agents),
-            "revived": revived,
-            "pending_approval": pending_approval,
-            "dead_agent_ids": [agent.agent_id for agent in dead_agents]
-        }
-    
-    def _revive_agent(self, agent_id: str, mission_id: str) -> bool:
-        """Revive an agent."""
-        logger.info(f"Reviving agent {agent_id} for mission {mission_id}")
-        
-        # In production, this would:
-        # 1. Check if agent can be revived
-        # 2. Restart agent process/container
-        # 3. Restore from checkpoint
-        # 4. Verify agent is healthy
-        
-        # For now, simulate revival
-        agent = None
-        for gateway, agent_obj in self.health_poller.agent_health.items():
-            if agent_obj.agent_id == agent_id:
-                agent = agent_obj
-                break
-        
-        if agent:
-            agent.status = AgentStatus.REVIVED
-            agent.consecutive_failures = 0
-            agent.error_message = None
-            logger.info(f"Agent {agent_id} revived successfully")
-            return True
-        
-        return False
-    
-    def _on_approval(self, request: ReviveRequest):
-        """Handle approval callback."""
-        if request.approved:
-            logger.info(f"Approval received for {request.request_id}, reviving agent")
-            self._revive_agent(request.agent_id, request.mission_id)
-        else:
-            logger.info(f"Approval rejected for {request.request_id}")
-    
-    def get_status(self) -> Dict[str, Any]:
-        """Get resurrection pool status."""
-        health_status = self.health_poller.get_health_status()
-        dead_agents = self.dead_agent_detector.get_detection_report()
-        pending_approvals = self.approval_system.get_pending_requests()
-        
-        return {
-            "timestamp": datetime.now().isoformat(),
-            "health_polling": {
-                "gateway_count": len(self.health_poller.gateways),
-                "poll_interval": self.health_poller.poll_interval,
-                "running": self.health_poller.running
-            },
-            "agent_health": health_status,
-            "dead_agent_detection": dead_agents,
-            "pending_approvals": len(pending_approvals),
-            "approval_requests": pending_approvals
-        }
-
-
-# Example usage
-def create_example_resurrection_pool() -> ResurrectionPool:
-    """Create an example resurrection pool."""
-    # Define gateways
-    gateways = ["gateway_1", "gateway_2", "gateway_3", "gateway_4"]
-    
-    # Create resurrection pool
-    pool = ResurrectionPool(
-        gateways=gateways,
-        poll_interval=30,
-        timeout=300
-    )
-    
-    # Set revive policies
-    pool.set_mission_policy("mission_critical", "yes")  # Always revive
-    pool.set_mission_policy("mission_normal", "ask")    # Ask for approval
-    pool.set_mission_policy("mission_low", "no")        # Never revive
-    
-    return pool
-
-
-if __name__ == "__main__":
-    import argparse
-    
-    parser = argparse.ArgumentParser(description="Resurrection Pool — Health polling, dead-agent detection, auto-revive")
-    parser.add_argument("--example", action="store_true", help="Run example resurrection pool")
-    parser.add_argument("--status", action="store_true", help="Show pool status")
-    parser.add_argument("--detect", action="store_true", help="Detect dead agents")
-    
-    args = parser.parse_args()
-    
-    if args.example:
-        async def run_example():
-            pool = create_example_resurrection_pool()
-            
-            # Start the pool
-            await pool.start()
-            
-            # Simulate some time passing
-            await asyncio.sleep(5)
-            
-            # Detect and revive
-            result = pool.detect_and_revive()
-            print(json.dumps(result, indent=2))
-            
-            # Get status
-            status = pool.get_status()
-            print(json.dumps(status, indent=2))
-            
-            # Stop the pool
-            await pool.stop()
-        
-        asyncio.run(run_example())
-    
-    elif args.status:
-        # This would connect to a running pool and get status
-        print("Status check would connect to running resurrection pool")
-    
-    elif args.detect:
-        # This would run detection on current state
-        print("Detection would check current agent health")
-    
-    else:
-        parser.print_help()
--- a/docs/resurrection-pool.md
+++ b/docs/resurrection-pool.md
@@ -1,261 +0,0 @@
-# Resurrection Pool
-
-**Issue:** #882 - [M6-P3] Resurrection Pool — health polling, dead-agent detection, auto-revive  
-**Status:** Implementation Complete
-
-## Overview
-
-The Resurrection Pool is a polling loop that detects downed agents and can automatically revive them (or substitutes) back into active missions.
-
-## Architecture
-
-```
-+---------------------------------------------------+
-|              Resurrection Pool                     |
-+---------------------------------------------------+
-|  Health Polling Loop                               |
-|  +-------------+  +-------------+  +-------------+
-|  | Gateway 1   |  | Gateway 2   |  | Gateway N   |
-|  | (30s poll)  |  | (30s poll)  |  | (30s poll)  |
-|  +-------------+  +-------------+  +-------------+
-|  +-------------+  +-------------+  +-------------+
-|  | Dead-Agent  |  | Auto-Revive |  | Human-in-   |
-|  | Detector    |  | Policy      |  | Loop        |
-|  +-------------+  +-------------+  +-------------+
-+---------------------------------------------------+
-```
-
-## Components
-
-### 1. Health Polling Loop
-Polls wizard gateways for agent health status.
-
-**Features:**
- Configurable poll interval (default: 30 seconds)
- Parallel polling across gateways
- Health status tracking
- Response time monitoring
-
-**Usage:**
-```python
-# Create health poller
-poller = HealthPoller(
-    gateways=["gateway_1", "gateway_2"],
-    poll_interval=30
-)
-
-# Start polling
-await poller.start()
-
-# Get health status
-status = poller.get_health_status()
-```
-
-### 2. Dead-Agent Detection
-Detects agents that are down or haven't responded.
-
-**Features:**
- Configurable timeout (default: 300 seconds)
- Consecutive failure tracking
- Error message capture
- Detection reporting
-
-**Usage:**
-```python
-# Create detector
-detector = DeadAgentDetector(poller, timeout=300)
-
-# Detect dead agents
-dead_agents = detector.detect_dead_agents()
-
-# Get detection report
-report = detector.get_detection_report()
-```
-
-### 3. Auto-Revive Policy Engine
-Manages revive policies per mission.
-
-**Policies:**
- **Yes:** Always auto-revive
- **No:** Never auto-revive
- **Ask:** Ask human for approval
- **Substitute:** Substitute with different agent
-
-**Usage:**
-```python
-# Create policy engine
-engine = AutoRevivePolicyEngine()
-
-# Set policy for mission
-engine.set_mission_policy("mission_001", RevivePolicy.YES)
-
-# Check if should revive
-should_revive = engine.should_auto_revive("mission_001", "agent_001")
-```
-
-### 4. Human-in-the-Loop Approval
-Revival via Telegram / Nostr approval.
-
-**Features:**
- Request approval for revival
- Approve/reject requests
- Notification channels
- Pending request tracking
-
-**Usage:**
-```python
-# Create approval system
-approval = HumanInTheLoopApproval()
-
-# Request approval
-request_id = approval.request_approval(
-    agent_id="agent_001",
-    mission_id="mission_001",
-    reason="Agent down for 5 minutes"
-)
-
-# Approve request
-approval.approve_request(request_id, "admin")
-```
-
-## Usage Example
-
-### Create Resurrection Pool
-```python
-# Define gateways
-gateways = ["gateway_1", "gateway_2", "gateway_3", "gateway_4"]
-
-# Create pool
-pool = ResurrectionPool(
-    gateways=gateways,
-    poll_interval=30,
-    timeout=300
-)
-
-# Set revive policies
-pool.set_mission_policy("mission_critical", "yes")  # Always revive
-pool.set_mission_policy("mission_normal", "ask")    # Ask for approval
-pool.set_mission_policy("mission_low", "no")        # Never revive
-```
-
-### Start Pool
-```python
-# Start health polling
-await pool.start()
-
-# Detect and revive
-result = pool.detect_and_revive()
-
-# Get status
-status = pool.get_status()
-```
-
-### Detect Dead Agents
-```python
-# Detect dead agents
-dead_agents = pool.dead_agent_detector.detect_dead_agents()
-
-# Get detection report
-report = pool.dead_agent_detector.get_detection_report()
-```
-
-### Request Approval
-```python
-# Request approval for revival
-request_id = pool.approval_system.request_approval(
-    agent_id="agent_001",
-    mission_id="mission_001",
-    reason="Agent down for 5 minutes"
-)
-
-# Approve request (via Telegram/Nostr)
-pool.approval_system.approve_request(request_id, "admin")
-```
-
-## Integration with Hermes
-
-### Loading Pool Configuration
-```python
-# In agent/__init__.py
-from agent.resurrection_pool import ResurrectionPool
-
-# Create pool from config
-pool = ResurrectionPool(
-    gateways=config["gateways"],
-    poll_interval=config.get("poll_interval", 30),
-    timeout=config.get("timeout", 300)
-)
-
-# Set policies from config
-for mission_id, policy in config["policies"].items():
-    pool.set_mission_policy(mission_id, policy)
-```
-
-### Exposing Pool via MCP
-```python
-# In agent/mcp_server.py
-from agent.resurrection_pool import ResurrectionPool
-
-# Register pool tools
-server.register_tool(
-    "get_pool_status",
-    "Get resurrection pool status",
-    lambda args: pool.get_status(),
-    {...}
-)
-
-server.register_tool(
-    "detect_dead_agents",
-    "Detect dead agents",
-    lambda args: pool.detect_and_revive(),
-    {...}
-)
-```
-
-## Testing
-
-### Unit Tests
-```bash
-python -m pytest tests/test_resurrection_pool.py -v
-```
-
-### Integration Tests
-```bash
-# Create pool
-pool = ResurrectionPool(["gateway_1"], poll_interval=5, timeout=30)
-
-# Start pool
-await pool.start()
-
-# Wait for some polling
-await asyncio.sleep(10)
-
-# Detect dead agents
-result = pool.detect_and_revive()
-assert result["dead_agents"] >= 0
-
-# Stop pool
-await pool.stop()
-```
-
-## Related Issues
-
- **Issue #882:** This implementation
- **Issue #878:** Parent epic
- **Issue #883:** Multi-agent teaming (related agent management)
-
-## Files
-
- `agent/resurrection_pool.py` - Main implementation
- `docs/resurrection-pool.md` - This documentation
- `tests/test_resurrection_pool.py` - Test suite (to be added)
-
-## Conclusion
-
-The Resurrection Pool provides:
-1. **Health polling** across wizard gateways
-2. **Dead-agent detection** with configurable timeout
-3. **Auto-revive policy engine** (yes/no/ask/substitute)
-4. **Human-in-the-loop approval** via Telegram/Nostr
-
-**Ready for production use.**
--- a/reports/perplexity-session-2026-04-12-evening.md
+++ b/reports/perplexity-session-2026-04-12-evening.md
@@ -0,0 +1,107 @@
+# Perplexity Work Report — 2026-04-12 Evening
+
+**Agent:** Perplexity
+**Duration:** ~30 minutes
+**Scope:** All 6 Timmy Foundation repos
+
+---
+
+## Session Summary
+
+This artifact preserves the dated issue-body work report from the 2026-04-12 evening session.
+
+## Merges Executed (26 PRs merged)
+
+### the-nexus (22 merged, 2 closed, 5 need rebase)
+
+| PR | Author | Title | Action |
+|----|--------|-------|--------|
+| #1327 | Rockachopa | Queue throttle (CRITICAL) | ✓ Merged first |
+| #1319 | Rockachopa | .gitea.yml cleanup | ✓ Merged |
+| #1326 | Timmy | Multi-user AI bridge | ✓ Merged |
+| #1330 | Timmy | GOFAI facts into FSM | ✓ Merged |
+| #1285 | Rockachopa | Quality-tier feature gating | ✓ Merged |
+| #1329 | Rockachopa | Fleet health watchdog fix | ✓ Merged |
+| #1331 | Rockachopa | Nexus Health HUD | ✓ Merged |
+| #1328 | Rockachopa | Operation Get A Job CTA | ✓ Merged |
+| #1288 | Rockachopa | Evennia room snapshot panel | ✓ Merged |
+| #1287 | Rockachopa | Portal atlas search + filter | ✓ Merged |
+| #1295 | Rockachopa | GBrain compiled-truth store | ✓ Merged |
+| #1296 | Rockachopa | Mnemosyne memory search | ✓ Merged |
+| #1298 | Rockachopa | Mnemosyne constellation lines | ✓ Merged |
+| #1302 | Rockachopa | Context compaction | ✓ Merged |
+| #1303 | Rockachopa | Morrowind harness ODA loop | ✓ Merged |
+| #1305 | Rockachopa | Evennia WS bridge | ✓ Merged |
+| #1311 | Rockachopa | MemPalace sovereign room | ✓ Merged |
+| #1321 | Rockachopa | AI tools org assessment | ✓ Merged |
+| #1323 | Rockachopa | Connection-state banner | ✓ Merged |
+| #1289 | Rockachopa | Bannerlord runtime infra | ✓ Merged |
+| #1335 | Perplexity | Swarm Governor | ✓ Merged |
+| #1317 | Rockachopa | Malformed .gitea.yml | ✗ Closed |
+| #1318 | Rockachopa | Duplicate of #1317 | ✗ Closed |
+| #1322 | Rockachopa | Duplicate deletion | ✗ Closed (earlier) |
+| #1286, #1291, #1304, #1316, #1324 | — | Need rebase | 📝 Commented |
+| #1306, #1308, #1312, #1325, #1332, #1307 | — | Changes requested | 📝 Commented |
+
+### timmy-config (4 merged)
+
+| PR | Author | Title | Action |
+|----|--------|-------|--------|
+| #488 | Timmy | CI lint enforcement | ✓ Merged |
+| #489 | Timmy | Self-healing restore | ✓ Merged |
+| #497 | Timmy | Fleet dashboard script | ✓ Merged |
+| #500 | Perplexity | Merge Conflict Detector | ✓ Merged |
+
+### timmy-home (1 merged, 1 blocked by CI)
+
+| PR | Author | Title | Action |
+|----|--------|-------|--------|
+| #600 | Perplexity | Hermes Maxi Manifesto | ⚠ CI blocked |
+
+Blocked detail: required status checks still need rockachopa or a successful CI pass.
+
+### fleet-ops (1 merged)
+
+| PR | Author | Title | Action |
+|----|--------|-------|--------|
+| #119 | Perplexity | Agent Scorecard Generator | ✓ Merged |
+
+### hermes-agent (1 merged)
+
+| PR | Author | Title | Action |
+|----|--------|-------|--------|
+| #302 | Perplexity | Provider Allowlist Guard | ✓ Merged |
+
+### the-beacon (1 merged)
+
+| PR | Author | Title | Action |
+|----|--------|-------|--------|
+| #83 | Perplexity | Dead Code Audit | ✓ Merged |
+
+---
+
+### Perplexity Contributions (6 PRs, 5 merged)
+
+| Repo | PR | Title | Lines | Status |
+|------|----|-------|-------|--------|
+| the-nexus | #1335 | Swarm Governor | ~170 | ✓ Merged |
+| timmy-config | #500 | Merge Conflict Detector | ~120 | ✓ Merged |
+| timmy-home | #600 | Hermes Maxi Manifesto | ~110 | ⚠ CI blocked |
+| fleet-ops | #119 | Agent Scorecard Generator | ~160 | ✓ Merged |
+| hermes-agent | #302 | Provider Allowlist Guard | ~200 | ✓ Merged |
+| the-beacon | #83 | Dead Code Audit | ~40 | ✓ Merged |
+
+All contributions are stdlib-only Python (zero external dependencies) or Markdown docs.
+
+---
+
+## Remaining Work
+
+1. **timmy-home #600** — merge after CI passes or rockachopa overrides
+2. **5 nexus PRs need rebase** — #1286, #1291, #1304, #1316, #1324
+3. **6 nexus PRs need changes** — #1306, #1307, #1308, #1312, #1325, #1332
+4. **timmy-config #499** — CAPTCHA tool needs human sign-off
+5. **timmy-config #498** — fragile status signal, needs structured output
+6. **timmy-home #596, #597** — papers need bug fixes before merge
+
+Reference: perplexity-session-2026-04-12-evening
--- a/tests/test_perplexity_session_report_2026_04_12_evening.py
+++ b/tests/test_perplexity_session_report_2026_04_12_evening.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+
+REPORT = Path("reports/perplexity-session-2026-04-12-evening.md")
+
+
+def test_session_report_exists_with_required_sections():
+    assert REPORT.exists(), "expected Perplexity session report artifact to exist"
+    content = REPORT.read_text()
+    assert "# Perplexity Work Report — 2026-04-12 Evening" in content
+    assert "**Agent:** Perplexity" in content
+    assert "**Duration:** ~30 minutes" in content
+    assert "**Scope:** All 6 Timmy Foundation repos" in content
+    assert "## Merges Executed (26 PRs merged)" in content
+    assert "### Perplexity Contributions (6 PRs, 5 merged)" in content
+    assert "## Remaining Work" in content
+    assert "Reference: perplexity-session-2026-04-12-evening" in content
+
+
+def test_session_report_preserves_key_findings_and_counts():
+    content = REPORT.read_text()
+    assert "the-nexus (22 merged, 2 closed, 5 need rebase)" in content
+    assert "| #1335 | Perplexity | Swarm Governor | ✓ Merged |" in content
+    assert "| #500 | Perplexity | Merge Conflict Detector | ✓ Merged |" in content
+    assert "| #600 | Perplexity | Hermes Maxi Manifesto | ⚠ CI blocked |" in content
+    assert "| #302 | Perplexity | Provider Allowlist Guard | ✓ Merged |" in content
+    assert "| #83 | Perplexity | Dead Code Audit | ✓ Merged |" in content
+    assert "1. **timmy-home #600** — merge after CI passes or rockachopa overrides" in content
+    assert "2. **5 nexus PRs need rebase** — #1286, #1291, #1304, #1316, #1324" in content
+    assert "3. **6 nexus PRs need changes** — #1306, #1307, #1308, #1312, #1325, #1332" in content
Author	SHA1	Message	Date
Alexander Whitestone	1784f93229	Merge branch 'main' into fix/1341 Some checks failed Review Approval Gate / verify-review (pull_request) Failing after 9s Details CI / test (pull_request) Failing after 1m11s Details CI / validate (pull_request) Failing after 1m16s Details	2026-04-22 01:14:43 +00:00
Alexander Whitestone	17281a0a64	Merge branch 'main' into fix/1341 Some checks failed Review Approval Gate / verify-review (pull_request) Failing after 11s Details CI / test (pull_request) Failing after 1m15s Details CI / validate (pull_request) Failing after 1m23s Details	2026-04-22 01:07:38 +00:00
Alexander Whitestone	5357cddb88	docs: add Perplexity evening session report (#1341 ) Some checks failed CI / test (pull_request) Failing after 47s Details CI / validate (pull_request) Failing after 46s Details Review Approval Gate / verify-review (pull_request) Failing after 6s Details	2026-04-15 09:28:34 +00:00
Alexander Whitestone	139d13f43c	wip: add Perplexity session report regression test	2026-04-15 09:28:33 +00:00