460 lines
15 KiB
Python
460 lines
15 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Father Agent - Capability Coordinator
|
|
|
|
Coordinates capability discovery and task routing between father and child agents.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
from dataclasses import dataclass
|
|
from typing import Dict, List, Optional, Any, Callable
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
import threading
|
|
|
|
from father_registry import (
|
|
FatherCapabilityRegistry,
|
|
AgentCapabilities,
|
|
CapabilityInfo,
|
|
AgentStatus,
|
|
CapabilityType,
|
|
get_father_registry
|
|
)
|
|
|
|
|
|
logger = logging.getLogger("father-coordinator")
|
|
|
|
|
|
class TaskStatus(Enum):
|
|
"""Status of a task."""
|
|
PENDING = "pending"
|
|
ASSIGNED = "assigned"
|
|
IN_PROGRESS = "in_progress"
|
|
COMPLETED = "completed"
|
|
FAILED = "failed"
|
|
CANCELLED = "cancelled"
|
|
|
|
|
|
@dataclass
|
|
class TaskRequest:
|
|
"""Request for task execution."""
|
|
task_id: str
|
|
task_type: str
|
|
description: str
|
|
required_capabilities: List[str]
|
|
parameters: Dict[str, Any]
|
|
priority: int = 5 # 1-10, higher is more urgent
|
|
timeout_seconds: int = 300
|
|
requested_at: str = None
|
|
|
|
def __post_init__(self):
|
|
if self.requested_at is None:
|
|
self.requested_at = datetime.utcnow().isoformat()
|
|
|
|
|
|
@dataclass
|
|
class TaskAssignment:
|
|
"""Assignment of a task to an agent."""
|
|
task_id: str
|
|
agent_id: str
|
|
assigned_at: str
|
|
status: TaskStatus = TaskStatus.ASSIGNED
|
|
result: Optional[Any] = None
|
|
error: Optional[str] = None
|
|
completed_at: Optional[str] = None
|
|
|
|
|
|
class CapabilityCoordinator:
|
|
"""
|
|
Coordinates capabilities across the agent ecosystem.
|
|
|
|
Responsibilities:
|
|
- Route tasks to capable agents
|
|
- Monitor agent health
|
|
- Load balance across agents
|
|
- Handle agent failures
|
|
"""
|
|
|
|
def __init__(self, registry: Optional[FatherCapabilityRegistry] = None):
|
|
self.registry = registry or get_father_registry()
|
|
|
|
# Task tracking
|
|
self._tasks: Dict[str, TaskRequest] = {}
|
|
self._assignments: Dict[str, TaskAssignment] = {}
|
|
self._task_lock = threading.RLock()
|
|
|
|
# Callbacks
|
|
self._task_callbacks: List[Callable[[TaskAssignment], None]] = []
|
|
self._health_check_callbacks: List[Callable[[str, AgentStatus], None]] = []
|
|
|
|
# Load tracking
|
|
self._agent_load: Dict[str, int] = {} # agent_id -> task count
|
|
|
|
# Configuration
|
|
self.max_retries = 3
|
|
self.health_check_interval = 30
|
|
|
|
def register_agent(
|
|
self,
|
|
agent_id: str,
|
|
endpoint: str,
|
|
manifest: Dict[str, Any],
|
|
capabilities: List[Dict[str, Any]]
|
|
) -> bool:
|
|
"""
|
|
Register a child agent.
|
|
|
|
Args:
|
|
agent_id: Unique agent identifier
|
|
endpoint: Agent endpoint URL
|
|
manifest: Agent manifest
|
|
capabilities: List of capability dictionaries
|
|
|
|
Returns:
|
|
True if registered successfully
|
|
"""
|
|
# Convert capability dicts to CapabilityInfo objects
|
|
cap_objects = []
|
|
for cap_dict in capabilities:
|
|
cap = CapabilityInfo(
|
|
name=cap_dict["name"],
|
|
type=CapabilityType(cap_dict.get("type", "skill")),
|
|
version=cap_dict.get("version", "1.0.0"),
|
|
description=cap_dict.get("description", ""),
|
|
agent_id=agent_id,
|
|
parameters=cap_dict.get("parameters", {}),
|
|
returns=cap_dict.get("returns", {}),
|
|
tags=cap_dict.get("tags", [])
|
|
)
|
|
cap_objects.append(cap)
|
|
|
|
agent_info = AgentCapabilities(
|
|
agent_id=agent_id,
|
|
endpoint=endpoint,
|
|
manifest=manifest,
|
|
capabilities=cap_objects,
|
|
status=AgentStatus.ONLINE
|
|
)
|
|
|
|
is_new = self.registry.register_agent(agent_info)
|
|
|
|
if agent_id not in self._agent_load:
|
|
self._agent_load[agent_id] = 0
|
|
|
|
logger.info(f"Registered agent {agent_id} (new={is_new})")
|
|
return True
|
|
|
|
def unregister_agent(self, agent_id: str) -> bool:
|
|
"""Unregister an agent and reassign its tasks."""
|
|
# Reassign any pending tasks
|
|
with self._task_lock:
|
|
for task_id, assignment in list(self._assignments.items()):
|
|
if assignment.agent_id == agent_id and assignment.status == TaskStatus.ASSIGNED:
|
|
logger.info(f"Reassigning task {task_id} from {agent_id}")
|
|
self._reassign_task(task_id)
|
|
|
|
success = self.registry.unregister_agent(agent_id)
|
|
self._agent_load.pop(agent_id, None)
|
|
|
|
if success:
|
|
logger.info(f"Unregistered agent {agent_id}")
|
|
|
|
return success
|
|
|
|
def update_agent_heartbeat(self, agent_id: str) -> bool:
|
|
"""Update agent heartbeat and status."""
|
|
agent = self.registry.get_agent(agent_id)
|
|
if not agent:
|
|
return False
|
|
|
|
agent.update_last_seen()
|
|
if agent.status != AgentStatus.ONLINE:
|
|
self.registry.update_agent_status(agent_id, AgentStatus.ONLINE)
|
|
|
|
return True
|
|
|
|
def submit_task(self, task: TaskRequest) -> Optional[TaskAssignment]:
|
|
"""
|
|
Submit a task for execution.
|
|
|
|
Args:
|
|
task: Task request
|
|
|
|
Returns:
|
|
Task assignment or None if no capable agent found
|
|
"""
|
|
with self._task_lock:
|
|
self._tasks[task.task_id] = task
|
|
|
|
# Find capable agent
|
|
agent_id = self._select_agent_for_task(task)
|
|
|
|
if not agent_id:
|
|
logger.warning(f"No capable agent found for task {task.task_id}")
|
|
return None
|
|
|
|
# Create assignment
|
|
assignment = TaskAssignment(
|
|
task_id=task.task_id,
|
|
agent_id=agent_id,
|
|
assigned_at=datetime.utcnow().isoformat()
|
|
)
|
|
|
|
self._assignments[task.task_id] = assignment
|
|
self._agent_load[agent_id] = self._agent_load.get(agent_id, 0) + 1
|
|
|
|
logger.info(f"Assigned task {task.task_id} to agent {agent_id}")
|
|
|
|
# Notify callbacks
|
|
for callback in self._task_callbacks:
|
|
try:
|
|
callback(assignment)
|
|
except Exception as e:
|
|
logger.error(f"Task callback error: {e}")
|
|
|
|
return assignment
|
|
|
|
def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Get status of a task."""
|
|
with self._task_lock:
|
|
task = self._tasks.get(task_id)
|
|
assignment = self._assignments.get(task_id)
|
|
|
|
if not task:
|
|
return None
|
|
|
|
return {
|
|
"task_id": task_id,
|
|
"task_type": task.task_type,
|
|
"status": assignment.status.value if assignment else TaskStatus.PENDING.value,
|
|
"agent_id": assignment.agent_id if assignment else None,
|
|
"assigned_at": assignment.assigned_at if assignment else None,
|
|
"completed_at": assignment.completed_at if assignment else None,
|
|
"result": assignment.result if assignment else None,
|
|
"error": assignment.error if assignment else None
|
|
}
|
|
|
|
def update_task_status(
|
|
self,
|
|
task_id: str,
|
|
status: TaskStatus,
|
|
result: Optional[Any] = None,
|
|
error: Optional[str] = None
|
|
) -> bool:
|
|
"""Update status of a task."""
|
|
with self._task_lock:
|
|
assignment = self._assignments.get(task_id)
|
|
if not assignment:
|
|
return False
|
|
|
|
assignment.status = status
|
|
|
|
if result is not None:
|
|
assignment.result = result
|
|
|
|
if error is not None:
|
|
assignment.error = error
|
|
|
|
if status in [TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED]:
|
|
assignment.completed_at = datetime.utcnow().isoformat()
|
|
self._agent_load[assignment.agent_id] = max(
|
|
0, self._agent_load.get(assignment.agent_id, 0) - 1
|
|
)
|
|
|
|
logger.info(f"Task {task_id} status updated to {status.value}")
|
|
|
|
# Notify callbacks
|
|
for callback in self._task_callbacks:
|
|
try:
|
|
callback(assignment)
|
|
except Exception as e:
|
|
logger.error(f"Task callback error: {e}")
|
|
|
|
return True
|
|
|
|
def _select_agent_for_task(self, task: TaskRequest) -> Optional[str]:
|
|
"""Select the best agent for a task."""
|
|
# Find capable agents
|
|
capable = self.registry.find_capable_agents(task.required_capabilities)
|
|
|
|
if not capable:
|
|
return None
|
|
|
|
# Score agents by load and capability
|
|
scored = []
|
|
for agent in capable:
|
|
load = self._agent_load.get(agent.agent_id, 0)
|
|
|
|
# Prefer agents with lower load
|
|
score = 100 - (load * 10)
|
|
|
|
# Bonus for agents with specific expertise
|
|
manifest = agent.manifest
|
|
if "performance" in manifest:
|
|
perf = manifest["performance"]
|
|
score += perf.get("accuracy_score", 0) * 50
|
|
|
|
scored.append((agent.agent_id, score))
|
|
|
|
# Sort by score (descending)
|
|
scored.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
return scored[0][0] if scored else None
|
|
|
|
def _reassign_task(self, task_id: str) -> bool:
|
|
"""Reassign a task to a different agent."""
|
|
task = self._tasks.get(task_id)
|
|
if not task:
|
|
return False
|
|
|
|
# Remove old assignment
|
|
old_assignment = self._assignments.pop(task_id, None)
|
|
if old_assignment:
|
|
self._agent_load[old_assignment.agent_id] = max(
|
|
0, self._agent_load.get(old_assignment.agent_id, 0) - 1
|
|
)
|
|
|
|
# Find new agent
|
|
new_assignment = self.submit_task(task)
|
|
|
|
if new_assignment:
|
|
logger.info(f"Reassigned task {task_id} to {new_assignment.agent_id}")
|
|
return True
|
|
else:
|
|
logger.error(f"Failed to reassign task {task_id}")
|
|
return False
|
|
|
|
def get_system_overview(self) -> Dict[str, Any]:
|
|
"""Get overview of the entire system."""
|
|
registry_stats = self.registry.get_stats()
|
|
|
|
with self._task_lock:
|
|
task_stats = {
|
|
"total_tasks": len(self._tasks),
|
|
"pending": sum(
|
|
1 for a in self._assignments.values()
|
|
if a.status == TaskStatus.PENDING
|
|
),
|
|
"in_progress": sum(
|
|
1 for a in self._assignments.values()
|
|
if a.status == TaskStatus.IN_PROGRESS
|
|
),
|
|
"completed": sum(
|
|
1 for a in self._assignments.values()
|
|
if a.status == TaskStatus.COMPLETED
|
|
),
|
|
"failed": sum(
|
|
1 for a in self._assignments.values()
|
|
if a.status == TaskStatus.FAILED
|
|
)
|
|
}
|
|
|
|
agent_loads = dict(self._agent_load)
|
|
|
|
return {
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"registry": registry_stats,
|
|
"tasks": task_stats,
|
|
"agent_loads": agent_loads
|
|
}
|
|
|
|
def on_task_update(self, callback: Callable[[TaskAssignment], None]):
|
|
"""Register callback for task updates."""
|
|
self._task_callbacks.append(callback)
|
|
|
|
def on_health_check(self, callback: Callable[[str, AgentStatus], None]):
|
|
"""Register callback for health checks."""
|
|
self._health_check_callbacks.append(callback)
|
|
|
|
def discover_capabilities(self, query: Optional[str] = None) -> Dict[str, Any]:
|
|
"""
|
|
Discover available capabilities.
|
|
|
|
Args:
|
|
query: Optional search query
|
|
|
|
Returns:
|
|
Capability catalog
|
|
"""
|
|
catalog = self.registry.get_capability_catalog()
|
|
|
|
if query:
|
|
# Filter by query
|
|
filtered = {
|
|
"by_name": {},
|
|
"by_type": catalog.get("by_type", {}),
|
|
"by_tag": {}
|
|
}
|
|
|
|
for name, info in catalog.get("by_name", {}).items():
|
|
if query.lower() in name.lower():
|
|
filtered["by_name"][name] = info
|
|
|
|
for tag, caps in catalog.get("by_tag", {}).items():
|
|
if query.lower() in tag.lower():
|
|
filtered["by_tag"][tag] = caps
|
|
|
|
catalog = filtered
|
|
|
|
return catalog
|
|
|
|
|
|
# Global coordinator instance
|
|
_coordinator_instance: Optional[CapabilityCoordinator] = None
|
|
|
|
|
|
def get_coordinator() -> CapabilityCoordinator:
|
|
"""Get or create the global coordinator."""
|
|
global _coordinator_instance
|
|
if _coordinator_instance is None:
|
|
_coordinator_instance = CapabilityCoordinator()
|
|
return _coordinator_instance
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Demo
|
|
coordinator = CapabilityCoordinator()
|
|
|
|
# Register agents
|
|
coordinator.register_agent(
|
|
"ap-1",
|
|
"http://localhost:9001",
|
|
{"name": "AP Agent 1", "version": "1.0"},
|
|
[
|
|
{"name": "code_generation", "type": "skill", "version": "1.0"},
|
|
{"name": "file_reader", "type": "tool", "version": "1.0", "tags": ["filesystem"]}
|
|
]
|
|
)
|
|
|
|
coordinator.register_agent(
|
|
"ap-2",
|
|
"http://localhost:9002",
|
|
{"name": "AP Agent 2", "version": "1.0"},
|
|
[
|
|
{"name": "code_analysis", "type": "skill", "version": "2.0"},
|
|
{"name": "file_reader", "type": "tool", "version": "1.0", "tags": ["filesystem"]}
|
|
]
|
|
)
|
|
|
|
print("System overview:", json.dumps(coordinator.get_system_overview(), indent=2))
|
|
|
|
# Submit a task
|
|
task = TaskRequest(
|
|
task_id="task-001",
|
|
task_type="code_gen",
|
|
description="Generate Python function",
|
|
required_capabilities=["code_generation"],
|
|
parameters={"language": "python"}
|
|
)
|
|
|
|
assignment = coordinator.submit_task(task)
|
|
if assignment:
|
|
print(f"\nTask assigned to: {assignment.agent_id}")
|
|
print(f"Task status: {coordinator.get_task_status(task.task_id)}")
|
|
|
|
# Discover capabilities
|
|
print("\nCapability catalog:", json.dumps(coordinator.discover_capabilities(), indent=2))
|