#!/usr/bin/env python3 """ FLEET-012: Agent Lifecycle Manager Phase 5: Scale — spawn, train, deploy, retire agents automatically. Manages the full lifecycle: 1. PROVISION: Clone template, install deps, configure, test 2. DEPLOY: Add to active rotation, start accepting issues 3. MONITOR: Track performance, quality, heartbeat 4. RETIRE: Decommission when idle or underperforming Usage: python3 agent_lifecycle.py provision [--model model] python3 agent_lifecycle.py deploy python3 agent_lifecycle.py retire python3 agent_lifecycle.py status python3 agent_lifecycle.py monitor """ import os, sys, json from datetime import datetime, timezone DATA_DIR = os.path.expanduser("~/.local/timmy/fleet-agents") DB_FILE = os.path.join(DATA_DIR, "agents.json") LOG_FILE = os.path.join(DATA_DIR, "lifecycle.log") def ensure(): os.makedirs(DATA_DIR, exist_ok=True) def log(msg, level="INFO"): ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") entry = f"[{ts}] [{level}] {msg}" with open(LOG_FILE, "a") as f: f.write(entry + "\n") print(f" {entry}") def load(): if os.path.exists(DB_FILE): return json.loads(open(DB_FILE).read()) return {} def save(db): open(DB_FILE, "w").write(json.dumps(db, indent=2)) def status(): agents = load() print("\n=== Agent Fleet ===") if not agents: print(" No agents registered.") return for name, a in agents.items(): state = a.get("state", "?") vps = a.get("vps", "?") model = a.get("model", "?") tasks = a.get("tasks_completed", 0) hb = a.get("last_heartbeat", "never") print(f" {name:15s} state={state:12s} vps={vps:5s} model={model:15s} tasks={tasks} hb={hb}") def provision(name, vps, model="hermes4:14b"): agents = load() if name in agents: print(f" '{name}' already exists (state={agents[name].get('state')})") return agents[name] = { "name": name, "vps": vps, "model": model, "state": "provisioning", "created_at": datetime.now(timezone.utc).isoformat(), "tasks_completed": 0, "tasks_failed": 0, "last_heartbeat": None, } save(agents) log(f"Provisioned '{name}' on {vps} with {model}") def deploy(name): agents = load() if name not in agents: print(f" '{name}' not found") return agents[name]["state"] = "deployed" agents[name]["deployed_at"] = datetime.now(timezone.utc).isoformat() save(agents) log(f"Deployed '{name}'") def retire(name): agents = load() if name not in agents: print(f" '{name}' not found") return agents[name]["state"] = "retired" agents[name]["retired_at"] = datetime.now(timezone.utc).isoformat() save(agents) log(f"Retired '{name}'. Completed {agents[name].get('tasks_completed', 0)} tasks.") def monitor(): agents = load() now = datetime.now(timezone.utc) changes = 0 for name, a in agents.items(): if a.get("state") != "deployed": continue hb = a.get("last_heartbeat") if hb: try: hb_t = datetime.fromisoformat(hb) hours = (now - hb_t).total_seconds() / 3600 if hours > 24 and a.get("state") == "deployed": a["state"] = "idle" a["idle_since"] = now.isoformat() log(f"'{name}' idle for {hours:.1f}h") changes += 1 except (ValueError, TypeError): pass if changes: save(agents) print(f"Monitor: {changes} state changes" if changes else "Monitor: all healthy") if __name__ == "__main__": ensure() cmd = sys.argv[1] if len(sys.argv) > 1 else "monitor" if cmd == "status": status() elif cmd == "provision" and len(sys.argv) >= 4: model = sys.argv[4] if len(sys.argv) >= 5 else "hermes4:14b" provision(sys.argv[2], sys.argv[3], model) elif cmd == "deploy" and len(sys.argv) >= 3: deploy(sys.argv[2]) elif cmd == "retire" and len(sys.argv) >= 3: retire(sys.argv[2]) elif cmd == "monitor": monitor() elif cmd == "run": monitor() else: print("Usage: agent_lifecycle.py [provision|deploy|retire|status|monitor]")