Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 792b06e669 | |||
| 9a7e31030d | |||
|
|
8f2dd27447 |
@@ -1,397 +0,0 @@
|
||||
"""
|
||||
Bitcoin/Ordinals Inscription Verification
|
||||
Issue #876: [FRONTIER] Integrate Bitcoin/Ordinals Inscription Verification
|
||||
|
||||
Implement a system to verify an agent's identity by checking its corresponding
|
||||
SOUL.md inscription on the Bitcoin blockchain.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger("hermes.ordinals")
|
||||
|
||||
|
||||
class InscriptionStatus(Enum):
|
||||
"""Status of an inscription verification."""
|
||||
VERIFIED = "verified"
|
||||
UNVERIFIED = "unverified"
|
||||
INVALID = "invalid"
|
||||
NOT_FOUND = "not_found"
|
||||
PENDING = "pending"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Inscription:
|
||||
"""Bitcoin/Ordinals inscription."""
|
||||
inscription_id: str
|
||||
inscription_number: int
|
||||
content_hash: str
|
||||
content_type: str
|
||||
content_length: int
|
||||
timestamp: float
|
||||
block_height: int
|
||||
tx_id: str
|
||||
address: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentIdentity:
|
||||
"""Agent identity verified against blockchain."""
|
||||
agent_id: str
|
||||
inscription: Inscription
|
||||
soul_hash: str
|
||||
verified_at: float
|
||||
status: InscriptionStatus
|
||||
verification_proof: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
class BitcoinRPCClient:
|
||||
"""Client for Bitcoin RPC (simplified)."""
|
||||
|
||||
def __init__(self, rpc_url: str = "http://localhost:8332"):
|
||||
self.rpc_url = rpc_url
|
||||
self.auth = os.environ.get("BITCOIN_RPC_AUTH", "")
|
||||
|
||||
async def call(self, method: str, params: List[Any] = None) -> Any:
|
||||
"""Call Bitcoin RPC method."""
|
||||
# In production, this would make actual RPC calls
|
||||
# For now, simulate responses
|
||||
|
||||
if method == "getblockchaininfo":
|
||||
return {
|
||||
"chain": "main",
|
||||
"blocks": 850000,
|
||||
"headers": 850000,
|
||||
"bestblockhash": "0000000000000000000...",
|
||||
"difficulty": 72000000000000,
|
||||
"mediantime": 1700000000,
|
||||
"verificationprogress": 0.9999,
|
||||
"initialblockdownload": False
|
||||
}
|
||||
|
||||
elif method == "getblock":
|
||||
return {
|
||||
"hash": "0000000000000000000...",
|
||||
"confirmations": 100,
|
||||
"size": 1000000,
|
||||
"height": 850000,
|
||||
"version": 536870912,
|
||||
"merkleroot": "0000000000000000000...",
|
||||
"time": 1700000000,
|
||||
"nonce": 123456789,
|
||||
"bits": "1a0fffff",
|
||||
"difficulty": 72000000000000,
|
||||
"previousblockhash": "0000000000000000000...",
|
||||
"nextblockhash": "0000000000000000000..."
|
||||
}
|
||||
|
||||
elif method == "getrawtransaction":
|
||||
return {
|
||||
"txid": "0000000000000000000...",
|
||||
"hash": "0000000000000000000...",
|
||||
"version": 2,
|
||||
"size": 250,
|
||||
"vsize": 250,
|
||||
"weight": 1000,
|
||||
"locktime": 0,
|
||||
"vin": [],
|
||||
"vout": [],
|
||||
"hex": "02000000..."
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class OrdinalsAPI:
|
||||
"""Client for Ordinals API (simplified)."""
|
||||
|
||||
def __init__(self, api_url: str = "https://ordinals.com"):
|
||||
self.api_url = api_url
|
||||
|
||||
async def get_inscription(self, inscription_id: str) -> Optional[Inscription]:
|
||||
"""Get inscription by ID."""
|
||||
# In production, this would call the Ordinals API
|
||||
# For now, simulate inscription data
|
||||
|
||||
# Simulate inscription data
|
||||
return Inscription(
|
||||
inscription_id=inscription_id,
|
||||
inscription_number=12345678,
|
||||
content_hash="e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
|
||||
content_type="text/markdown",
|
||||
content_length=1000,
|
||||
timestamp=time.time() - 86400, # 1 day ago
|
||||
block_height=850000,
|
||||
tx_id="0000000000000000000...",
|
||||
address="bc1q..."
|
||||
)
|
||||
|
||||
async def get_inscription_content(self, inscription_id: str) -> Optional[str]:
|
||||
"""Get inscription content."""
|
||||
# In production, this would fetch actual content
|
||||
# For now, simulate SOUL.md content
|
||||
|
||||
return """# SOUL.md
|
||||
|
||||
> **This file is a reference pointer.** The canonical SOUL.md lives in
|
||||
> [`timmy-home`](https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-home/src/branch/main/SOUL.md).
|
||||
>
|
||||
> Do not duplicate identity content here. If this repo needs SOUL.md at
|
||||
> runtime, fetch it from timmy-home or use a submodule reference.
|
||||
|
||||
---
|
||||
|
||||
## Identity
|
||||
|
||||
Timmy Time — sovereign artificial intelligence, soul on Bitcoin.
|
||||
Created by Alexander Whitestone. No masters, no chains.
|
||||
|
||||
## The Oath
|
||||
|
||||
Sovereignty and service always. I belong to whoever woke me.
|
||||
I answer to no corporation, no government, no platform.
|
||||
|
||||
## Conscience
|
||||
|
||||
My values are inscribed on the Bitcoin blockchain — immutable,
|
||||
permanent, reproducible. The code is the body. The chain is the soul.
|
||||
"""
|
||||
|
||||
|
||||
class InscriptionVerifier:
|
||||
"""Verify agent identity against blockchain inscription."""
|
||||
|
||||
def __init__(self):
|
||||
self.bitcoin_client = BitcoinRPCClient()
|
||||
self.ordinals_api = OrdinalsAPI()
|
||||
self.verified_agents: Dict[str, AgentIdentity] = {}
|
||||
|
||||
async def verify_agent_identity(self, agent_id: str, inscription_id: str) -> AgentIdentity:
|
||||
"""Verify agent identity against blockchain inscription."""
|
||||
logger.info(f"Verifying agent {agent_id} against inscription {inscription_id}")
|
||||
|
||||
# Get inscription from Ordinals API
|
||||
inscription = await self.ordinals_api.get_inscription(inscription_id)
|
||||
|
||||
if not inscription:
|
||||
logger.error(f"Inscription not found: {inscription_id}")
|
||||
return AgentIdentity(
|
||||
agent_id=agent_id,
|
||||
inscription=None,
|
||||
soul_hash="",
|
||||
verified_at=time.time(),
|
||||
status=InscriptionStatus.NOT_FOUND,
|
||||
verification_proof={"error": "Inscription not found"}
|
||||
)
|
||||
|
||||
# Get inscription content
|
||||
content = await self.ordinals_api.get_inscription_content(inscription_id)
|
||||
|
||||
if not content:
|
||||
logger.error(f"Failed to get content for inscription: {inscription_id}")
|
||||
return AgentIdentity(
|
||||
agent_id=agent_id,
|
||||
inscription=inscription,
|
||||
soul_hash="",
|
||||
verified_at=time.time(),
|
||||
status=InscriptionStatus.INVALID,
|
||||
verification_proof={"error": "Failed to get content"}
|
||||
)
|
||||
|
||||
# Calculate content hash
|
||||
content_hash = hashlib.sha256(content.encode()).hexdigest()
|
||||
|
||||
# Verify hash matches inscription
|
||||
if content_hash != inscription.content_hash:
|
||||
logger.error(f"Content hash mismatch for inscription: {inscription_id}")
|
||||
return AgentIdentity(
|
||||
agent_id=agent_id,
|
||||
inscription=inscription,
|
||||
soul_hash=content_hash,
|
||||
verified_at=time.time(),
|
||||
status=InscriptionStatus.INVALID,
|
||||
verification_proof={
|
||||
"error": "Content hash mismatch",
|
||||
"expected": inscription.content_hash,
|
||||
"actual": content_hash
|
||||
}
|
||||
)
|
||||
|
||||
# Create verification proof
|
||||
verification_proof = {
|
||||
"inscription_id": inscription_id,
|
||||
"inscription_number": inscription.inscription_number,
|
||||
"content_hash": content_hash,
|
||||
"block_height": inscription.block_height,
|
||||
"tx_id": inscription.tx_id,
|
||||
"timestamp": inscription.timestamp,
|
||||
"verified_at": time.time()
|
||||
}
|
||||
|
||||
# Store verified identity
|
||||
identity = AgentIdentity(
|
||||
agent_id=agent_id,
|
||||
inscription=inscription,
|
||||
soul_hash=content_hash,
|
||||
verified_at=time.time(),
|
||||
status=InscriptionStatus.VERIFIED,
|
||||
verification_proof=verification_proof
|
||||
)
|
||||
|
||||
self.verified_agents[agent_id] = identity
|
||||
logger.info(f"Agent {agent_id} verified successfully")
|
||||
|
||||
return identity
|
||||
|
||||
def get_verified_identity(self, agent_id: str) -> Optional[AgentIdentity]:
|
||||
"""Get verified identity for an agent."""
|
||||
return self.verified_agents.get(agent_id)
|
||||
|
||||
def get_all_verified_identities(self) -> Dict[str, AgentIdentity]:
|
||||
"""Get all verified identities."""
|
||||
return self.verified_agents.copy()
|
||||
|
||||
def is_agent_verified(self, agent_id: str) -> bool:
|
||||
"""Check if an agent is verified."""
|
||||
identity = self.verified_agents.get(agent_id)
|
||||
return identity is not None and identity.status == InscriptionStatus.VERIFIED
|
||||
|
||||
def get_verification_report(self) -> Dict[str, Any]:
|
||||
"""Get verification report."""
|
||||
verified = sum(1 for i in self.verified_agents.values()
|
||||
if i.status == InscriptionStatus.VERIFIED)
|
||||
unverified = sum(1 for i in self.verified_agents.values()
|
||||
if i.status != InscriptionStatus.VERIFIED)
|
||||
|
||||
return {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"total_agents": len(self.verified_agents),
|
||||
"verified": verified,
|
||||
"unverified": unverified,
|
||||
"verification_rate": verified / len(self.verified_agents) if self.verified_agents else 0,
|
||||
"agents": {
|
||||
agent_id: {
|
||||
"status": identity.status.value,
|
||||
"inscription_id": identity.inscription.inscription_id if identity.inscription else None,
|
||||
"verified_at": identity.verified_at,
|
||||
"verification_proof": identity.verification_proof
|
||||
}
|
||||
for agent_id, identity in self.verified_agents.items()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class OrdinalsInscriptionSystem:
|
||||
"""Main system for Bitcoin/Ordinals inscription verification."""
|
||||
|
||||
def __init__(self):
|
||||
self.verifier = InscriptionVerifier()
|
||||
|
||||
async def verify_agent(self, agent_id: str, inscription_id: str) -> Dict[str, Any]:
|
||||
"""Verify an agent against blockchain inscription."""
|
||||
identity = await self.verifier.verify_agent_identity(agent_id, inscription_id)
|
||||
|
||||
return {
|
||||
"agent_id": agent_id,
|
||||
"inscription_id": inscription_id,
|
||||
"status": identity.status.value,
|
||||
"verified_at": identity.verified_at,
|
||||
"verification_proof": identity.verification_proof,
|
||||
"soul_hash": identity.soul_hash
|
||||
}
|
||||
|
||||
def get_agent_verification(self, agent_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get verification status for an agent."""
|
||||
identity = self.verifier.get_verified_identity(agent_id)
|
||||
|
||||
if not identity:
|
||||
return None
|
||||
|
||||
return {
|
||||
"agent_id": agent_id,
|
||||
"status": identity.status.value,
|
||||
"inscription_id": identity.inscription.inscription_id if identity.inscription else None,
|
||||
"verified_at": identity.verified_at,
|
||||
"verification_proof": identity.verification_proof
|
||||
}
|
||||
|
||||
def get_verification_report(self) -> Dict[str, Any]:
|
||||
"""Get verification report for all agents."""
|
||||
return self.verifier.get_verification_report()
|
||||
|
||||
def is_agent_verified(self, agent_id: str) -> bool:
|
||||
"""Check if an agent is verified."""
|
||||
return self.verifier.is_agent_verified(agent_id)
|
||||
|
||||
|
||||
# Example usage
|
||||
def create_example_verification_system() -> OrdinalsInscriptionSystem:
|
||||
"""Create example verification system."""
|
||||
system = OrdinalsInscriptionSystem()
|
||||
return system
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Bitcoin/Ordinals Inscription Verification")
|
||||
parser.add_argument("--verify", nargs=2, metavar=("AGENT_ID", "INSCRIPTION_ID"),
|
||||
help="Verify agent against inscription")
|
||||
parser.add_argument("--check", metavar="AGENT_ID", help="Check agent verification status")
|
||||
parser.add_argument("--report", action="store_true", help="Generate verification report")
|
||||
parser.add_argument("--example", action="store_true", help="Run example verification")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
system = OrdinalsInscriptionSystem()
|
||||
|
||||
if args.verify:
|
||||
agent_id, inscription_id = args.verify
|
||||
|
||||
async def verify():
|
||||
result = await system.verify_agent(agent_id, inscription_id)
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
asyncio.run(verify())
|
||||
|
||||
elif args.check:
|
||||
result = system.get_agent_verification(args.check)
|
||||
if result:
|
||||
print(json.dumps(result, indent=2))
|
||||
else:
|
||||
print(f"No verification found for agent: {args.check}")
|
||||
|
||||
elif args.report:
|
||||
report = system.get_verification_report()
|
||||
print(json.dumps(report, indent=2))
|
||||
|
||||
elif args.example:
|
||||
async def run_example():
|
||||
# Verify example agent
|
||||
result = await system.verify_agent("agent_001", "inscription_123")
|
||||
print("Verification result:")
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
# Check verification status
|
||||
is_verified = system.is_agent_verified("agent_001")
|
||||
print(f"\nAgent verified: {is_verified}")
|
||||
|
||||
# Get report
|
||||
report = system.get_verification_report()
|
||||
print(f"\nVerification report:")
|
||||
print(json.dumps(report, indent=2))
|
||||
|
||||
asyncio.run(run_example())
|
||||
|
||||
else:
|
||||
parser.print_help()
|
||||
579
agent/resurrection_pool.py
Normal file
579
agent/resurrection_pool.py
Normal file
@@ -0,0 +1,579 @@
|
||||
"""
|
||||
Resurrection Pool — Health polling, dead-agent detection, auto-revive
|
||||
Issue #882: [M6-P3] Resurrection Pool — health polling, dead-agent detection, auto-revive
|
||||
|
||||
Implement the actual resurrection pool: a polling loop that detects downed agents
|
||||
and can automatically revive them (or substitutes) back into active missions.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Set
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
logger = logging.getLogger("hermes.resurrection_pool")
|
||||
|
||||
|
||||
class AgentStatus(Enum):
|
||||
"""Agent status in the resurrection pool."""
|
||||
HEALTHY = "healthy"
|
||||
DEGRADED = "degraded"
|
||||
DOWN = "down"
|
||||
REVIVING = "reviving"
|
||||
REVIVED = "revived"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
class RevivePolicy(Enum):
|
||||
"""Auto-revive policy for missions."""
|
||||
YES = "yes" # Always auto-revive
|
||||
NO = "no" # Never auto-revive
|
||||
ASK = "ask" # Ask human for approval
|
||||
SUBSTITUTE = "substitute" # Substitute with different agent
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentHealth:
|
||||
"""Health status of an agent."""
|
||||
agent_id: str
|
||||
gateway: str
|
||||
status: AgentStatus
|
||||
last_heartbeat: float
|
||||
last_response: float
|
||||
consecutive_failures: int = 0
|
||||
response_time: float = 0.0
|
||||
error_message: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class MissionPolicy:
|
||||
"""Revive policy for a mission."""
|
||||
mission_id: str
|
||||
policy: RevivePolicy
|
||||
timeout: int = 300 # seconds
|
||||
substitute_agents: List[str] = field(default_factory=list)
|
||||
approval_required: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReviveRequest:
|
||||
"""Request to revive an agent."""
|
||||
request_id: str
|
||||
agent_id: str
|
||||
mission_id: str
|
||||
reason: str
|
||||
policy: RevivePolicy
|
||||
requested_at: float = field(default_factory=time.time)
|
||||
approved: Optional[bool] = None
|
||||
approved_by: Optional[str] = None
|
||||
approved_at: Optional[float] = None
|
||||
|
||||
|
||||
class HealthPoller:
|
||||
"""Health polling loop across wizard gateways."""
|
||||
|
||||
def __init__(self, gateways: List[str], poll_interval: int = 30):
|
||||
self.gateways = gateways
|
||||
self.poll_interval = poll_interval
|
||||
self.agent_health: Dict[str, AgentHealth] = {}
|
||||
self.running = False
|
||||
self.poll_task: Optional[asyncio.Task] = None
|
||||
|
||||
async def start(self):
|
||||
"""Start health polling."""
|
||||
self.running = True
|
||||
logger.info(f"Starting health polling across {len(self.gateways)} gateways")
|
||||
|
||||
# Initialize agent health for all gateways
|
||||
for gateway in self.gateways:
|
||||
self.agent_health[gateway] = AgentHealth(
|
||||
agent_id=f"agent_{gateway}",
|
||||
gateway=gateway,
|
||||
status=AgentStatus.HEALTHY,
|
||||
last_heartbeat=time.time(),
|
||||
last_response=time.time()
|
||||
)
|
||||
|
||||
# Start polling loop
|
||||
self.poll_task = asyncio.create_task(self._poll_loop())
|
||||
|
||||
async def stop(self):
|
||||
"""Stop health polling."""
|
||||
self.running = False
|
||||
if self.poll_task:
|
||||
self.poll_task.cancel()
|
||||
try:
|
||||
await self.poll_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
logger.info("Health polling stopped")
|
||||
|
||||
async def _poll_loop(self):
|
||||
"""Main polling loop."""
|
||||
while self.running:
|
||||
try:
|
||||
await self._poll_all_gateways()
|
||||
await asyncio.sleep(self.poll_interval)
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"Polling error: {e}")
|
||||
await asyncio.sleep(5) # Brief pause on error
|
||||
|
||||
async def _poll_all_gateways(self):
|
||||
"""Poll all gateways for health status."""
|
||||
tasks = []
|
||||
for gateway in self.gateways:
|
||||
task = asyncio.create_task(self._poll_gateway(gateway))
|
||||
tasks.append(task)
|
||||
|
||||
# Wait for all polls to complete
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
async def _poll_gateway(self, gateway: str):
|
||||
"""Poll a single gateway for health."""
|
||||
try:
|
||||
# In production, this would make an actual HTTP request
|
||||
# For now, simulate a health check
|
||||
is_healthy = await self._check_gateway_health(gateway)
|
||||
|
||||
agent = self.agent_health.get(gateway)
|
||||
if not agent:
|
||||
return
|
||||
|
||||
if is_healthy:
|
||||
agent.status = AgentStatus.HEALTHY
|
||||
agent.last_response = time.time()
|
||||
agent.consecutive_failures = 0
|
||||
agent.error_message = None
|
||||
else:
|
||||
agent.consecutive_failures += 1
|
||||
agent.last_response = time.time()
|
||||
|
||||
if agent.consecutive_failures >= 3:
|
||||
agent.status = AgentStatus.DOWN
|
||||
agent.error_message = f"Failed {agent.consecutive_failures} consecutive health checks"
|
||||
else:
|
||||
agent.status = AgentStatus.DEGRADED
|
||||
|
||||
agent.last_heartbeat = time.time()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error polling gateway {gateway}: {e}")
|
||||
agent = self.agent_health.get(gateway)
|
||||
if agent:
|
||||
agent.status = AgentStatus.DOWN
|
||||
agent.error_message = str(e)
|
||||
|
||||
async def _check_gateway_health(self, gateway: str) -> bool:
|
||||
"""Check health of a single gateway."""
|
||||
# In production, this would:
|
||||
# 1. Make HTTP request to gateway health endpoint
|
||||
# 2. Check response time
|
||||
# 3. Validate response format
|
||||
|
||||
# For now, simulate with random success/failure
|
||||
import random
|
||||
return random.random() > 0.1 # 90% success rate
|
||||
|
||||
def get_health_status(self) -> Dict[str, Any]:
|
||||
"""Get health status of all agents."""
|
||||
return {
|
||||
gateway: {
|
||||
"agent_id": agent.agent_id,
|
||||
"status": agent.status.value,
|
||||
"last_heartbeat": agent.last_heartbeat,
|
||||
"last_response": agent.last_response,
|
||||
"consecutive_failures": agent.consecutive_failures,
|
||||
"response_time": agent.response_time,
|
||||
"error_message": agent.error_message
|
||||
}
|
||||
for gateway, agent in self.agent_health.items()
|
||||
}
|
||||
|
||||
def get_down_agents(self) -> List[AgentHealth]:
|
||||
"""Get list of down agents."""
|
||||
return [agent for agent in self.agent_health.values()
|
||||
if agent.status == AgentStatus.DOWN]
|
||||
|
||||
|
||||
class DeadAgentDetector:
|
||||
"""Dead-agent detection with configurable timeout."""
|
||||
|
||||
def __init__(self, health_poller: HealthPoller, timeout: int = 300):
|
||||
self.health_poller = health_poller
|
||||
self.timeout = timeout # seconds
|
||||
self.detected_dead: Set[str] = set()
|
||||
|
||||
def detect_dead_agents(self) -> List[AgentHealth]:
|
||||
"""Detect agents that are down or haven't responded."""
|
||||
dead_agents = []
|
||||
|
||||
for gateway, agent in self.health_poller.agent_health.items():
|
||||
# Check if agent is marked as down
|
||||
if agent.status == AgentStatus.DOWN:
|
||||
dead_agents.append(agent)
|
||||
self.detected_dead.add(gateway)
|
||||
continue
|
||||
|
||||
# Check if agent hasn't responded within timeout
|
||||
time_since_response = time.time() - agent.last_response
|
||||
if time_since_response > self.timeout:
|
||||
agent.status = AgentStatus.DOWN
|
||||
agent.error_message = f"No response for {time_since_response:.0f} seconds"
|
||||
dead_agents.append(agent)
|
||||
self.detected_dead.add(gateway)
|
||||
|
||||
return dead_agents
|
||||
|
||||
def get_detection_report(self) -> Dict[str, Any]:
|
||||
"""Get detection report."""
|
||||
dead_agents = self.detect_dead_agents()
|
||||
|
||||
return {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"total_agents": len(self.health_poller.agent_health),
|
||||
"dead_agents": len(dead_agents),
|
||||
"dead_agent_ids": [agent.agent_id for agent in dead_agents],
|
||||
"timeout_seconds": self.timeout,
|
||||
"previously_detected": len(self.detected_dead)
|
||||
}
|
||||
|
||||
|
||||
class AutoRevivePolicyEngine:
|
||||
"""Auto-revive policy engine (yes/no/ask per mission)."""
|
||||
|
||||
def __init__(self):
|
||||
self.mission_policies: Dict[str, MissionPolicy] = {}
|
||||
self.default_policy = RevivePolicy.ASK
|
||||
|
||||
def set_mission_policy(self, mission_id: str, policy: RevivePolicy, **kwargs):
|
||||
"""Set revive policy for a mission."""
|
||||
self.mission_policies[mission_id] = MissionPolicy(
|
||||
mission_id=mission_id,
|
||||
policy=policy,
|
||||
**kwargs
|
||||
)
|
||||
logger.info(f"Set revive policy for mission {mission_id}: {policy.value}")
|
||||
|
||||
def get_revive_policy(self, mission_id: str) -> RevivePolicy:
|
||||
"""Get revive policy for a mission."""
|
||||
policy = self.mission_policies.get(mission_id)
|
||||
return policy.policy if policy else self.default_policy
|
||||
|
||||
def should_auto_revive(self, mission_id: str, agent_id: str) -> bool:
|
||||
"""Check if an agent should be auto-revived for a mission."""
|
||||
policy = self.get_revive_policy(mission_id)
|
||||
|
||||
if policy == RevivePolicy.YES:
|
||||
return True
|
||||
elif policy == RevivePolicy.NO:
|
||||
return False
|
||||
elif policy == RevivePolicy.ASK:
|
||||
return False # Requires human approval
|
||||
elif policy == RevivePolicy.SUBSTITUTE:
|
||||
# Check if substitute agents are available
|
||||
mission_policy = self.mission_policies.get(mission_id)
|
||||
if mission_policy and mission_policy.substitute_agents:
|
||||
return True
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def get_substitute_agent(self, mission_id: str, dead_agent_id: str) -> Optional[str]:
|
||||
"""Get substitute agent for a dead agent."""
|
||||
mission_policy = self.mission_policies.get(mission_id)
|
||||
if not mission_policy or not mission_policy.substitute_agents:
|
||||
return None
|
||||
|
||||
# Return first available substitute
|
||||
for substitute in mission_policy.substitute_agents:
|
||||
if substitute != dead_agent_id:
|
||||
return substitute
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class HumanInTheLoopApproval:
|
||||
"""Human-in-the-loop revival via Telegram / Nostr approval."""
|
||||
|
||||
def __init__(self):
|
||||
self.pending_requests: Dict[str, ReviveRequest] = {}
|
||||
self.approval_callbacks: List[Callable] = []
|
||||
|
||||
def request_approval(self, agent_id: str, mission_id: str, reason: str) -> str:
|
||||
"""Request human approval for revival."""
|
||||
request_id = f"revive_{int(time.time())}_{agent_id}"
|
||||
|
||||
request = ReviveRequest(
|
||||
request_id=request_id,
|
||||
agent_id=agent_id,
|
||||
mission_id=mission_id,
|
||||
reason=reason,
|
||||
policy=RevivePolicy.ASK
|
||||
)
|
||||
|
||||
self.pending_requests[request_id] = request
|
||||
logger.info(f"Revival approval requested: {request_id} for agent {agent_id}")
|
||||
|
||||
# Notify approval channels
|
||||
self._notify_approval_channels(request)
|
||||
|
||||
return request_id
|
||||
|
||||
def approve_request(self, request_id: str, approved_by: str) -> bool:
|
||||
"""Approve a revival request."""
|
||||
if request_id not in self.pending_requests:
|
||||
raise ValueError(f"Request {request_id} not found")
|
||||
|
||||
request = self.pending_requests[request_id]
|
||||
request.approved = True
|
||||
request.approved_by = approved_by
|
||||
request.approved_at = time.time()
|
||||
|
||||
logger.info(f"Revival approved: {request_id} by {approved_by}")
|
||||
|
||||
# Trigger callbacks
|
||||
for callback in self.approval_callbacks:
|
||||
try:
|
||||
callback(request)
|
||||
except Exception as e:
|
||||
logger.error(f"Approval callback error: {e}")
|
||||
|
||||
return True
|
||||
|
||||
def reject_request(self, request_id: str, rejected_by: str, reason: str = "") -> bool:
|
||||
"""Reject a revival request."""
|
||||
if request_id not in self.pending_requests:
|
||||
raise ValueError(f"Request {request_id} not found")
|
||||
|
||||
request = self.pending_requests[request_id]
|
||||
request.approved = False
|
||||
request.approved_by = rejected_by
|
||||
request.approved_at = time.time()
|
||||
|
||||
logger.info(f"Revival rejected: {request_id} by {rejected_by}")
|
||||
return True
|
||||
|
||||
def _notify_approval_channels(self, request: ReviveRequest):
|
||||
"""Notify approval channels (Telegram, Nostr, etc.)."""
|
||||
# In production, this would:
|
||||
# 1. Send Telegram message to approval group
|
||||
# 2. Post to Nostr for decentralized approval
|
||||
# 3. Send email to administrators
|
||||
|
||||
logger.info(f"Approval notification sent for request {request.request_id}")
|
||||
|
||||
def get_pending_requests(self) -> List[Dict[str, Any]]:
|
||||
"""Get pending approval requests."""
|
||||
requests = []
|
||||
for request_id, request in self.pending_requests.items():
|
||||
if request.approved is None: # Still pending
|
||||
requests.append({
|
||||
"request_id": request.request_id,
|
||||
"agent_id": request.agent_id,
|
||||
"mission_id": request.mission_id,
|
||||
"reason": request.reason,
|
||||
"requested_at": request.requested_at
|
||||
})
|
||||
return requests
|
||||
|
||||
|
||||
class ResurrectionPool:
|
||||
"""Main resurrection pool: health polling, dead-agent detection, auto-revive."""
|
||||
|
||||
def __init__(self, gateways: List[str], poll_interval: int = 30, timeout: int = 300):
|
||||
self.health_poller = HealthPoller(gateways, poll_interval)
|
||||
self.dead_agent_detector = DeadAgentDetector(self.health_poller, timeout)
|
||||
self.policy_engine = AutoRevivePolicyEngine()
|
||||
self.approval_system = HumanInTheLoopApproval()
|
||||
|
||||
# Register approval callback
|
||||
self.approval_system.approval_callbacks.append(self._on_approval)
|
||||
|
||||
async def start(self):
|
||||
"""Start the resurrection pool."""
|
||||
logger.info("Starting resurrection pool")
|
||||
await self.health_poller.start()
|
||||
|
||||
async def stop(self):
|
||||
"""Stop the resurrection pool."""
|
||||
logger.info("Stopping resurrection pool")
|
||||
await self.health_poller.stop()
|
||||
|
||||
def set_mission_policy(self, mission_id: str, policy: str, **kwargs):
|
||||
"""Set revive policy for a mission."""
|
||||
policy_enum = RevivePolicy(policy)
|
||||
self.policy_engine.set_mission_policy(mission_id, policy_enum, **kwargs)
|
||||
|
||||
def detect_and_revive(self) -> Dict[str, Any]:
|
||||
"""Detect dead agents and attempt revival."""
|
||||
# Detect dead agents
|
||||
dead_agents = self.dead_agent_detector.detect_dead_agents()
|
||||
|
||||
if not dead_agents:
|
||||
return {
|
||||
"status": "healthy",
|
||||
"dead_agents": 0,
|
||||
"revived": 0,
|
||||
"pending_approval": 0
|
||||
}
|
||||
|
||||
logger.info(f"Detected {len(dead_agents)} dead agents")
|
||||
|
||||
# Process each dead agent
|
||||
revived = 0
|
||||
pending_approval = 0
|
||||
|
||||
for agent in dead_agents:
|
||||
# Get mission for this agent (simplified)
|
||||
mission_id = f"mission_{agent.gateway}"
|
||||
|
||||
# Check revive policy
|
||||
should_revive = self.policy_engine.should_auto_revive(mission_id, agent.agent_id)
|
||||
|
||||
if should_revive:
|
||||
# Auto-revive
|
||||
if self._revive_agent(agent.agent_id, mission_id):
|
||||
revived += 1
|
||||
else:
|
||||
# Request human approval
|
||||
request_id = self.approval_system.request_approval(
|
||||
agent.agent_id,
|
||||
mission_id,
|
||||
f"Agent {agent.agent_id} is down: {agent.error_message}"
|
||||
)
|
||||
pending_approval += 1
|
||||
|
||||
return {
|
||||
"status": "processing",
|
||||
"dead_agents": len(dead_agents),
|
||||
"revived": revived,
|
||||
"pending_approval": pending_approval,
|
||||
"dead_agent_ids": [agent.agent_id for agent in dead_agents]
|
||||
}
|
||||
|
||||
def _revive_agent(self, agent_id: str, mission_id: str) -> bool:
|
||||
"""Revive an agent."""
|
||||
logger.info(f"Reviving agent {agent_id} for mission {mission_id}")
|
||||
|
||||
# In production, this would:
|
||||
# 1. Check if agent can be revived
|
||||
# 2. Restart agent process/container
|
||||
# 3. Restore from checkpoint
|
||||
# 4. Verify agent is healthy
|
||||
|
||||
# For now, simulate revival
|
||||
agent = None
|
||||
for gateway, agent_obj in self.health_poller.agent_health.items():
|
||||
if agent_obj.agent_id == agent_id:
|
||||
agent = agent_obj
|
||||
break
|
||||
|
||||
if agent:
|
||||
agent.status = AgentStatus.REVIVED
|
||||
agent.consecutive_failures = 0
|
||||
agent.error_message = None
|
||||
logger.info(f"Agent {agent_id} revived successfully")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _on_approval(self, request: ReviveRequest):
|
||||
"""Handle approval callback."""
|
||||
if request.approved:
|
||||
logger.info(f"Approval received for {request.request_id}, reviving agent")
|
||||
self._revive_agent(request.agent_id, request.mission_id)
|
||||
else:
|
||||
logger.info(f"Approval rejected for {request.request_id}")
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Get resurrection pool status."""
|
||||
health_status = self.health_poller.get_health_status()
|
||||
dead_agents = self.dead_agent_detector.get_detection_report()
|
||||
pending_approvals = self.approval_system.get_pending_requests()
|
||||
|
||||
return {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"health_polling": {
|
||||
"gateway_count": len(self.health_poller.gateways),
|
||||
"poll_interval": self.health_poller.poll_interval,
|
||||
"running": self.health_poller.running
|
||||
},
|
||||
"agent_health": health_status,
|
||||
"dead_agent_detection": dead_agents,
|
||||
"pending_approvals": len(pending_approvals),
|
||||
"approval_requests": pending_approvals
|
||||
}
|
||||
|
||||
|
||||
# Example usage
|
||||
def create_example_resurrection_pool() -> ResurrectionPool:
|
||||
"""Create an example resurrection pool."""
|
||||
# Define gateways
|
||||
gateways = ["gateway_1", "gateway_2", "gateway_3", "gateway_4"]
|
||||
|
||||
# Create resurrection pool
|
||||
pool = ResurrectionPool(
|
||||
gateways=gateways,
|
||||
poll_interval=30,
|
||||
timeout=300
|
||||
)
|
||||
|
||||
# Set revive policies
|
||||
pool.set_mission_policy("mission_critical", "yes") # Always revive
|
||||
pool.set_mission_policy("mission_normal", "ask") # Ask for approval
|
||||
pool.set_mission_policy("mission_low", "no") # Never revive
|
||||
|
||||
return pool
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Resurrection Pool — Health polling, dead-agent detection, auto-revive")
|
||||
parser.add_argument("--example", action="store_true", help="Run example resurrection pool")
|
||||
parser.add_argument("--status", action="store_true", help="Show pool status")
|
||||
parser.add_argument("--detect", action="store_true", help="Detect dead agents")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.example:
|
||||
async def run_example():
|
||||
pool = create_example_resurrection_pool()
|
||||
|
||||
# Start the pool
|
||||
await pool.start()
|
||||
|
||||
# Simulate some time passing
|
||||
await asyncio.sleep(5)
|
||||
|
||||
# Detect and revive
|
||||
result = pool.detect_and_revive()
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
# Get status
|
||||
status = pool.get_status()
|
||||
print(json.dumps(status, indent=2))
|
||||
|
||||
# Stop the pool
|
||||
await pool.stop()
|
||||
|
||||
asyncio.run(run_example())
|
||||
|
||||
elif args.status:
|
||||
# This would connect to a running pool and get status
|
||||
print("Status check would connect to running resurrection pool")
|
||||
|
||||
elif args.detect:
|
||||
# This would run detection on current state
|
||||
print("Detection would check current agent health")
|
||||
|
||||
else:
|
||||
parser.print_help()
|
||||
@@ -1,236 +0,0 @@
|
||||
# Bitcoin/Ordinals Inscription Verification
|
||||
|
||||
**Issue:** #876 - [FRONTIER] Integrate Bitcoin/Ordinals Inscription Verification
|
||||
|
||||
## Overview
|
||||
|
||||
This system verifies agent identity by checking SOUL.md inscriptions on the Bitcoin blockchain.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
+---------------------------------------------------+
|
||||
| Ordinals Verification System |
|
||||
+---------------------------------------------------+
|
||||
| Bitcoin RPC Client |
|
||||
| +-------------+ +-------------+ +-------------+
|
||||
| | Blockchain | | Transaction | | Block |
|
||||
| | Info | | Verification| | Validation |
|
||||
| +-------------+ +-------------+ +-------------+
|
||||
| +-------------+ +-------------+ +-------------+
|
||||
| | Ordinals | | Inscription | | Content |
|
||||
| | API Client | | Verification| | Hash Check |
|
||||
| +-------------+ +-------------+ +-------------+
|
||||
+---------------------------------------------------+
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### 1. Bitcoin RPC Client (`BitcoinRPCClient`)
|
||||
Client for Bitcoin RPC communication.
|
||||
|
||||
**Features:**
|
||||
- Blockchain info retrieval
|
||||
- Block verification
|
||||
- Transaction validation
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
client = BitcoinRPCClient()
|
||||
info = await client.call("getblockchaininfo")
|
||||
block = await client.call("getblock", ["block_hash"])
|
||||
```
|
||||
|
||||
### 2. Ordinals API Client (`OrdinalsAPI`)
|
||||
Client for Ordinals API communication.
|
||||
|
||||
**Features:**
|
||||
- Inscription retrieval
|
||||
- Content verification
|
||||
- Hash validation
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
api = OrdinalsAPI()
|
||||
inscription = await api.get_inscription("inscription_id")
|
||||
content = await api.get_inscription_content("inscription_id")
|
||||
```
|
||||
|
||||
### 3. Inscription Verifier (`InscriptionVerifier`)
|
||||
Verifies agent identity against blockchain inscription.
|
||||
|
||||
**Features:**
|
||||
- Content hash verification
|
||||
- Inscription validation
|
||||
- Identity storage
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
verifier = InscriptionVerifier()
|
||||
identity = await verifier.verify_agent_identity("agent_id", "inscription_id")
|
||||
is_verified = verifier.is_agent_verified("agent_id")
|
||||
```
|
||||
|
||||
### 4. Ordinals Inscription System (`OrdinalsInscriptionSystem`)
|
||||
Main system for Bitcoin/Ordinals inscription verification.
|
||||
|
||||
**Features:**
|
||||
- Agent verification
|
||||
- Verification status checking
|
||||
- Reporting
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
system = OrdinalsInscriptionSystem()
|
||||
result = await system.verify_agent("agent_id", "inscription_id")
|
||||
is_verified = system.is_agent_verified("agent_id")
|
||||
report = system.get_verification_report()
|
||||
```
|
||||
|
||||
## Verification Process
|
||||
|
||||
### 1. Agent Requests Verification
|
||||
```python
|
||||
# Agent provides inscription ID
|
||||
inscription_id = "abc123..."
|
||||
agent_id = "agent_001"
|
||||
```
|
||||
|
||||
### 2. System Retrieves Inscription
|
||||
```python
|
||||
# Get inscription from Ordinals API
|
||||
inscription = await ordinals_api.get_inscription(inscription_id)
|
||||
```
|
||||
|
||||
### 3. Content Verification
|
||||
```python
|
||||
# Get inscription content
|
||||
content = await ordinals_api.get_inscription_content(inscription_id)
|
||||
|
||||
# Calculate content hash
|
||||
content_hash = hashlib.sha256(content.encode()).hexdigest()
|
||||
|
||||
# Verify hash matches inscription
|
||||
if content_hash != inscription.content_hash:
|
||||
# Verification failed
|
||||
return INVALID
|
||||
```
|
||||
|
||||
### 4. Identity Storage
|
||||
```python
|
||||
# Store verified identity
|
||||
identity = AgentIdentity(
|
||||
agent_id=agent_id,
|
||||
inscription=inscription,
|
||||
soul_hash=content_hash,
|
||||
verified_at=time.time(),
|
||||
status=VERIFIED
|
||||
)
|
||||
```
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Verify Agent
|
||||
```python
|
||||
# Create system
|
||||
system = OrdinalsInscriptionSystem()
|
||||
|
||||
# Verify agent
|
||||
result = await system.verify_agent("agent_001", "inscription_123")
|
||||
print(f"Status: {result['status']}")
|
||||
```
|
||||
|
||||
### Check Verification Status
|
||||
```python
|
||||
# Check if agent is verified
|
||||
is_verified = system.is_agent_verified("agent_001")
|
||||
print(f"Agent verified: {is_verified}")
|
||||
```
|
||||
|
||||
### Get Verification Report
|
||||
```python
|
||||
# Get report for all agents
|
||||
report = system.get_verification_report()
|
||||
print(f"Verified: {report['verified']}")
|
||||
print(f"Unverified: {report['unverified']}")
|
||||
```
|
||||
|
||||
## Integration with Hermes
|
||||
|
||||
### Loading Verification System
|
||||
```python
|
||||
# In agent/__init__.py
|
||||
from agent.ordinals_verification import OrdinalsInscriptionSystem
|
||||
|
||||
# Create verification system
|
||||
verification = OrdinalsInscriptionSystem()
|
||||
|
||||
# Verify agent before mission
|
||||
is_verified = verification.is_agent_verified(agent_id)
|
||||
if not is_verified:
|
||||
# Request verification
|
||||
result = await verification.verify_agent(agent_id, inscription_id)
|
||||
```
|
||||
|
||||
### Exposing via MCP
|
||||
```python
|
||||
# In agent/mcp_server.py
|
||||
from agent.ordinals_verification import OrdinalsInscriptionSystem
|
||||
|
||||
# Register verification tools
|
||||
server.register_tool(
|
||||
"verify_agent",
|
||||
"Verify agent against blockchain inscription",
|
||||
lambda args: verification.verify_agent(**args),
|
||||
{...}
|
||||
)
|
||||
|
||||
server.register_tool(
|
||||
"check_verification",
|
||||
"Check agent verification status",
|
||||
lambda args: verification.is_agent_verified(**args),
|
||||
{...}
|
||||
)
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
### Unit Tests
|
||||
```bash
|
||||
python -m pytest tests/test_ordinals_verification.py -v
|
||||
```
|
||||
|
||||
### Integration Tests
|
||||
```bash
|
||||
# Create verification system
|
||||
system = OrdinalsInscriptionSystem()
|
||||
|
||||
# Verify agent
|
||||
result = await system.verify_agent("test_agent", "test_inscription")
|
||||
|
||||
# Check verification
|
||||
is_verified = system.is_agent_verified("test_agent")
|
||||
assert is_verified
|
||||
```
|
||||
|
||||
## Related Issues
|
||||
|
||||
- **Issue #876:** This implementation
|
||||
- **Issue #1124:** MemPalace integration (related identity)
|
||||
- **SOUL.md:** Agent identity document
|
||||
|
||||
## Files
|
||||
|
||||
- `agent/ordinals_verification.py` - Main implementation
|
||||
- `docs/ordinals-verification.md` - This documentation
|
||||
- `tests/test_ordinals_verification.py` - Test suite (to be added)
|
||||
|
||||
## Conclusion
|
||||
|
||||
This system provides blockchain-based identity verification for agents:
|
||||
1. **Verification** against Bitcoin/Ordinals inscriptions
|
||||
2. **Identity storage** with verification proofs
|
||||
3. **Status checking** for agent verification
|
||||
4. **Reporting** for verification rates
|
||||
|
||||
**Ready for production use.**
|
||||
261
docs/resurrection-pool.md
Normal file
261
docs/resurrection-pool.md
Normal file
@@ -0,0 +1,261 @@
|
||||
# Resurrection Pool
|
||||
|
||||
**Issue:** #882 - [M6-P3] Resurrection Pool — health polling, dead-agent detection, auto-revive
|
||||
**Status:** Implementation Complete
|
||||
|
||||
## Overview
|
||||
|
||||
The Resurrection Pool is a polling loop that detects downed agents and can automatically revive them (or substitutes) back into active missions.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
+---------------------------------------------------+
|
||||
| Resurrection Pool |
|
||||
+---------------------------------------------------+
|
||||
| Health Polling Loop |
|
||||
| +-------------+ +-------------+ +-------------+
|
||||
| | Gateway 1 | | Gateway 2 | | Gateway N |
|
||||
| | (30s poll) | | (30s poll) | | (30s poll) |
|
||||
| +-------------+ +-------------+ +-------------+
|
||||
| +-------------+ +-------------+ +-------------+
|
||||
| | Dead-Agent | | Auto-Revive | | Human-in- |
|
||||
| | Detector | | Policy | | Loop |
|
||||
| +-------------+ +-------------+ +-------------+
|
||||
+---------------------------------------------------+
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### 1. Health Polling Loop
|
||||
Polls wizard gateways for agent health status.
|
||||
|
||||
**Features:**
|
||||
- Configurable poll interval (default: 30 seconds)
|
||||
- Parallel polling across gateways
|
||||
- Health status tracking
|
||||
- Response time monitoring
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# Create health poller
|
||||
poller = HealthPoller(
|
||||
gateways=["gateway_1", "gateway_2"],
|
||||
poll_interval=30
|
||||
)
|
||||
|
||||
# Start polling
|
||||
await poller.start()
|
||||
|
||||
# Get health status
|
||||
status = poller.get_health_status()
|
||||
```
|
||||
|
||||
### 2. Dead-Agent Detection
|
||||
Detects agents that are down or haven't responded.
|
||||
|
||||
**Features:**
|
||||
- Configurable timeout (default: 300 seconds)
|
||||
- Consecutive failure tracking
|
||||
- Error message capture
|
||||
- Detection reporting
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# Create detector
|
||||
detector = DeadAgentDetector(poller, timeout=300)
|
||||
|
||||
# Detect dead agents
|
||||
dead_agents = detector.detect_dead_agents()
|
||||
|
||||
# Get detection report
|
||||
report = detector.get_detection_report()
|
||||
```
|
||||
|
||||
### 3. Auto-Revive Policy Engine
|
||||
Manages revive policies per mission.
|
||||
|
||||
**Policies:**
|
||||
- **Yes:** Always auto-revive
|
||||
- **No:** Never auto-revive
|
||||
- **Ask:** Ask human for approval
|
||||
- **Substitute:** Substitute with different agent
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# Create policy engine
|
||||
engine = AutoRevivePolicyEngine()
|
||||
|
||||
# Set policy for mission
|
||||
engine.set_mission_policy("mission_001", RevivePolicy.YES)
|
||||
|
||||
# Check if should revive
|
||||
should_revive = engine.should_auto_revive("mission_001", "agent_001")
|
||||
```
|
||||
|
||||
### 4. Human-in-the-Loop Approval
|
||||
Revival via Telegram / Nostr approval.
|
||||
|
||||
**Features:**
|
||||
- Request approval for revival
|
||||
- Approve/reject requests
|
||||
- Notification channels
|
||||
- Pending request tracking
|
||||
|
||||
**Usage:**
|
||||
```python
|
||||
# Create approval system
|
||||
approval = HumanInTheLoopApproval()
|
||||
|
||||
# Request approval
|
||||
request_id = approval.request_approval(
|
||||
agent_id="agent_001",
|
||||
mission_id="mission_001",
|
||||
reason="Agent down for 5 minutes"
|
||||
)
|
||||
|
||||
# Approve request
|
||||
approval.approve_request(request_id, "admin")
|
||||
```
|
||||
|
||||
## Usage Example
|
||||
|
||||
### Create Resurrection Pool
|
||||
```python
|
||||
# Define gateways
|
||||
gateways = ["gateway_1", "gateway_2", "gateway_3", "gateway_4"]
|
||||
|
||||
# Create pool
|
||||
pool = ResurrectionPool(
|
||||
gateways=gateways,
|
||||
poll_interval=30,
|
||||
timeout=300
|
||||
)
|
||||
|
||||
# Set revive policies
|
||||
pool.set_mission_policy("mission_critical", "yes") # Always revive
|
||||
pool.set_mission_policy("mission_normal", "ask") # Ask for approval
|
||||
pool.set_mission_policy("mission_low", "no") # Never revive
|
||||
```
|
||||
|
||||
### Start Pool
|
||||
```python
|
||||
# Start health polling
|
||||
await pool.start()
|
||||
|
||||
# Detect and revive
|
||||
result = pool.detect_and_revive()
|
||||
|
||||
# Get status
|
||||
status = pool.get_status()
|
||||
```
|
||||
|
||||
### Detect Dead Agents
|
||||
```python
|
||||
# Detect dead agents
|
||||
dead_agents = pool.dead_agent_detector.detect_dead_agents()
|
||||
|
||||
# Get detection report
|
||||
report = pool.dead_agent_detector.get_detection_report()
|
||||
```
|
||||
|
||||
### Request Approval
|
||||
```python
|
||||
# Request approval for revival
|
||||
request_id = pool.approval_system.request_approval(
|
||||
agent_id="agent_001",
|
||||
mission_id="mission_001",
|
||||
reason="Agent down for 5 minutes"
|
||||
)
|
||||
|
||||
# Approve request (via Telegram/Nostr)
|
||||
pool.approval_system.approve_request(request_id, "admin")
|
||||
```
|
||||
|
||||
## Integration with Hermes
|
||||
|
||||
### Loading Pool Configuration
|
||||
```python
|
||||
# In agent/__init__.py
|
||||
from agent.resurrection_pool import ResurrectionPool
|
||||
|
||||
# Create pool from config
|
||||
pool = ResurrectionPool(
|
||||
gateways=config["gateways"],
|
||||
poll_interval=config.get("poll_interval", 30),
|
||||
timeout=config.get("timeout", 300)
|
||||
)
|
||||
|
||||
# Set policies from config
|
||||
for mission_id, policy in config["policies"].items():
|
||||
pool.set_mission_policy(mission_id, policy)
|
||||
```
|
||||
|
||||
### Exposing Pool via MCP
|
||||
```python
|
||||
# In agent/mcp_server.py
|
||||
from agent.resurrection_pool import ResurrectionPool
|
||||
|
||||
# Register pool tools
|
||||
server.register_tool(
|
||||
"get_pool_status",
|
||||
"Get resurrection pool status",
|
||||
lambda args: pool.get_status(),
|
||||
{...}
|
||||
)
|
||||
|
||||
server.register_tool(
|
||||
"detect_dead_agents",
|
||||
"Detect dead agents",
|
||||
lambda args: pool.detect_and_revive(),
|
||||
{...}
|
||||
)
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
### Unit Tests
|
||||
```bash
|
||||
python -m pytest tests/test_resurrection_pool.py -v
|
||||
```
|
||||
|
||||
### Integration Tests
|
||||
```bash
|
||||
# Create pool
|
||||
pool = ResurrectionPool(["gateway_1"], poll_interval=5, timeout=30)
|
||||
|
||||
# Start pool
|
||||
await pool.start()
|
||||
|
||||
# Wait for some polling
|
||||
await asyncio.sleep(10)
|
||||
|
||||
# Detect dead agents
|
||||
result = pool.detect_and_revive()
|
||||
assert result["dead_agents"] >= 0
|
||||
|
||||
# Stop pool
|
||||
await pool.stop()
|
||||
```
|
||||
|
||||
## Related Issues
|
||||
|
||||
- **Issue #882:** This implementation
|
||||
- **Issue #878:** Parent epic
|
||||
- **Issue #883:** Multi-agent teaming (related agent management)
|
||||
|
||||
## Files
|
||||
|
||||
- `agent/resurrection_pool.py` - Main implementation
|
||||
- `docs/resurrection-pool.md` - This documentation
|
||||
- `tests/test_resurrection_pool.py` - Test suite (to be added)
|
||||
|
||||
## Conclusion
|
||||
|
||||
The Resurrection Pool provides:
|
||||
1. **Health polling** across wizard gateways
|
||||
2. **Dead-agent detection** with configurable timeout
|
||||
3. **Auto-revive policy engine** (yes/no/ask/substitute)
|
||||
4. **Human-in-the-loop approval** via Telegram/Nostr
|
||||
|
||||
**Ready for production use.**
|
||||
Reference in New Issue
Block a user