FLEET-010: Cross-agent task delegation protocol - Keyword-based heuristic assigns unassigned issues to agents - Supports: claw-code, gemini, ezra, bezalel, timmy - Delegation logging and status dashboard - Auto-comments on assigned issues FLEET-011: Local model pipeline and fallback chain - Checks Ollama reachability and model availability - 4-model chain: hermes4:14b -> qwen2.5:7b -> phi3:3.8b -> gemma3:1b - Tests each model with live inference on every run - Fallback verification: finds first responding model - Chain configuration via ~/.local/timmy/fleet-resources/model-chain.json FLEET-012: Agent lifecycle manager - Full lifecycle: provision -> deploy -> monitor -> retire - Heartbeat detection with 24h idle threshold - Task completion/failure tracking - Agent Fleet Status dashboard Fixes timmy-home#563 (delegation), #564 (model pipeline), #565 (lifecycle)
123 lines
4.1 KiB
Python
123 lines
4.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
FLEET-012: Agent Lifecycle Manager
|
|
Phase 5: Scale — spawn, train, deploy, retire agents automatically.
|
|
|
|
Manages the full lifecycle:
|
|
1. PROVISION: Clone template, install deps, configure, test
|
|
2. DEPLOY: Add to active rotation, start accepting issues
|
|
3. MONITOR: Track performance, quality, heartbeat
|
|
4. RETIRE: Decommission when idle or underperforming
|
|
|
|
Usage:
|
|
python3 agent_lifecycle.py provision <name> <vps> [--model model]
|
|
python3 agent_lifecycle.py deploy <name>
|
|
python3 agent_lifecycle.py retire <name>
|
|
python3 agent_lifecycle.py status
|
|
python3 agent_lifecycle.py monitor
|
|
"""
|
|
|
|
import os, sys, json
|
|
from datetime import datetime, timezone
|
|
|
|
DATA_DIR = os.path.expanduser("~/.local/timmy/fleet-agents")
|
|
DB_FILE = os.path.join(DATA_DIR, "agents.json")
|
|
LOG_FILE = os.path.join(DATA_DIR, "lifecycle.log")
|
|
|
|
def ensure():
|
|
os.makedirs(DATA_DIR, exist_ok=True)
|
|
|
|
def log(msg, level="INFO"):
|
|
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
|
entry = f"[{ts}] [{level}] {msg}"
|
|
with open(LOG_FILE, "a") as f: f.write(entry + "\n")
|
|
print(f" {entry}")
|
|
|
|
def load():
|
|
if os.path.exists(DB_FILE):
|
|
return json.loads(open(DB_FILE).read())
|
|
return {}
|
|
|
|
def save(db):
|
|
open(DB_FILE, "w").write(json.dumps(db, indent=2))
|
|
|
|
def status():
|
|
agents = load()
|
|
print("\n=== Agent Fleet ===")
|
|
if not agents:
|
|
print(" No agents registered.")
|
|
return
|
|
for name, a in agents.items():
|
|
state = a.get("state", "?")
|
|
vps = a.get("vps", "?")
|
|
model = a.get("model", "?")
|
|
tasks = a.get("tasks_completed", 0)
|
|
hb = a.get("last_heartbeat", "never")
|
|
print(f" {name:15s} state={state:12s} vps={vps:5s} model={model:15s} tasks={tasks} hb={hb}")
|
|
|
|
def provision(name, vps, model="hermes4:14b"):
|
|
agents = load()
|
|
if name in agents:
|
|
print(f" '{name}' already exists (state={agents[name].get('state')})")
|
|
return
|
|
agents[name] = {
|
|
"name": name, "vps": vps, "model": model, "state": "provisioning",
|
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
"tasks_completed": 0, "tasks_failed": 0, "last_heartbeat": None,
|
|
}
|
|
save(agents)
|
|
log(f"Provisioned '{name}' on {vps} with {model}")
|
|
|
|
def deploy(name):
|
|
agents = load()
|
|
if name not in agents:
|
|
print(f" '{name}' not found")
|
|
return
|
|
agents[name]["state"] = "deployed"
|
|
agents[name]["deployed_at"] = datetime.now(timezone.utc).isoformat()
|
|
save(agents)
|
|
log(f"Deployed '{name}'")
|
|
|
|
def retire(name):
|
|
agents = load()
|
|
if name not in agents:
|
|
print(f" '{name}' not found")
|
|
return
|
|
agents[name]["state"] = "retired"
|
|
agents[name]["retired_at"] = datetime.now(timezone.utc).isoformat()
|
|
save(agents)
|
|
log(f"Retired '{name}'. Completed {agents[name].get('tasks_completed', 0)} tasks.")
|
|
|
|
def monitor():
|
|
agents = load()
|
|
now = datetime.now(timezone.utc)
|
|
changes = 0
|
|
for name, a in agents.items():
|
|
if a.get("state") != "deployed": continue
|
|
hb = a.get("last_heartbeat")
|
|
if hb:
|
|
try:
|
|
hb_t = datetime.fromisoformat(hb)
|
|
hours = (now - hb_t).total_seconds() / 3600
|
|
if hours > 24 and a.get("state") == "deployed":
|
|
a["state"] = "idle"
|
|
a["idle_since"] = now.isoformat()
|
|
log(f"'{name}' idle for {hours:.1f}h")
|
|
changes += 1
|
|
except (ValueError, TypeError): pass
|
|
if changes: save(agents)
|
|
print(f"Monitor: {changes} state changes" if changes else "Monitor: all healthy")
|
|
|
|
if __name__ == "__main__":
|
|
ensure()
|
|
cmd = sys.argv[1] if len(sys.argv) > 1 else "monitor"
|
|
if cmd == "status": status()
|
|
elif cmd == "provision" and len(sys.argv) >= 4:
|
|
model = sys.argv[4] if len(sys.argv) >= 5 else "hermes4:14b"
|
|
provision(sys.argv[2], sys.argv[3], model)
|
|
elif cmd == "deploy" and len(sys.argv) >= 3: deploy(sys.argv[2])
|
|
elif cmd == "retire" and len(sys.argv) >= 3: retire(sys.argv[2])
|
|
elif cmd == "monitor": monitor()
|
|
elif cmd == "run": monitor()
|
|
else: print("Usage: agent_lifecycle.py [provision|deploy|retire|status|monitor]")
|