feat: FLEET-010/011/012 — Phase 3+5 fleet capabilities

FLEET-010: Cross-Agent Task Delegation Protocol - Auto-assigns unassigned issues to agents based on keyword matching - Supports all fleet agents: claw-code, gemini, ezra, bezalel, timmy - Delegation logging cycle FLEET-011: Local Model Pipeline and Fallback Chain - 4-model fallback chain (hermes4:14b -> qwen2.5:7b -> phi3:3.8b -> gemma2:2b) - Tests full chain with live inference - Interactive chat mode using local models only FLEET-012: Agent Lifecycle Manager - Full lifecycle: provision -> deploy -> monitor -> retire - Heartbeat detection, idle timeout, retirement recommendations - Agent quality tracking Fixes timmy-home#563, #564, #565
2026-04-07 12:39:17 -04:00
3 changed files with 469 additions and 0 deletions
--- a/fleet/agent_lifecycle.py
+++ b/fleet/agent_lifecycle.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+# FLEET-012: Agent Lifecycle Manager
+# Phase 5: Scale — spawn, train, deploy, retire agents automatically.
+#
+# Manages the full lifecycle of AI agents in the fleet:
+# 1. PROVISION: Clone template, install deps, configure, test
+# 2. TRAIN: Run initial tasks, measure quality, score
+# 3. DEPLOY: Add to active rotation, start accepting issues
+# 4. MONITOR: Track performance, quality, uptime
+# 5. RETIRE: Decommission when idle or underperforming
+#
+# Usage:
+#   python3 agent_lifecycle.py provision <agent_name> <vps_ip> [--model <model>]
+#   python3 agent_lifecycle.py status
+#   python3 agent_lifecycle.py retire <agent_name>
+#   python3 agent_lifecycle.py monitor
+
+import os, sys, json, subprocess, time
+from datetime import datetime, timezone
+from pathlib import Path
+
+DATA_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-agents"))
+AGENTS_DB = DATA_DIR / "agents.json"
+LIFECYCLE_LOG = DATA_DIR / "lifecycle.log"
+
+def ensure_dirs():
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+
+def log(msg, level="INFO"):
+    ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
+    entry = f"[{ts}] [{level}] {msg}"
+    with open(LIFECYCLE_LOG, "a") as f:
+        f.write(entry + "\n")
+    print(f"  {entry}")
+
+def load_agents():
+    if AGENTS_DB.exists():
+        return json.loads(AGENTS_DB.read_text())
+    return {}
+
+def save_agents(db):
+    AGENTS_DB.write_text(json.dumps(db, indent=2))
+
+def status():
+    agents = load_agents()
+    print("\n=== Agent Fleet Status ===")
+    if not agents:
+        print("  No agents registered.")
+        return
+    for name, agent in agents.items():
+        state = agent.get("state", "unknown")
+        vps = agent.get("vps", "unknown")
+        model = agent.get("model", "unknown")
+        score = agent.get("quality_score", "N/A")
+        created = agent.get("created_at", "?")
+        print(f"  {name}: state={state}, vps={vps}, model={model}, score={score}, created={created}")
+        if agent.get("last_heartbeat"):
+            last = agent["last_heartbeat"]
+            print(f"    Last heartbeat: {last}")
+
+def provision(name, vps, model="hermes4:14b"):
+    agents = load_agents()
+    if name in agents:
+        print(f"  Agent '{name}' already exists (state: {agents[name].get('state')})")
+        return False
+
+    log(f"Provisioning agent '{name}' on {vps} with model {model}")
+    agents[name] = {
+        "name": name,
+        "vps": vps,
+        "model": model,
+        "state": "provisioning",
+        "created_at": datetime.now(timezone.utc).isoformat(),
+        "quality_score": None,
+        "tasks_completed": 0,
+        "tasks_failed": 0,
+        "last_heartbeat": None,
+        "metadata": {"provision_started": datetime.now(timezone.utc).isoformat()}
+    }
+    save_agents(agents)
+    log(f"Agent '{name}' registered. State: provisioning")
+    return True
+
+def deploy(name):
+    agents = load_agents()
+    if name not in agents:
+        print(f"  Agent '{name}' not found")
+        return False
+
+    agents[name]["state"] = "deployed"
+    agents[name]["metadata"]["deployed_at"] = datetime.now(timezone.utc).isoformat()
+    save_agents(agents)
+    log(f"Agent '{name}' deployed and accepting issues")
+    return True
+
+def retire(name):
+    agents = load_agents()
+    if name not in agents:
+        print(f"  Agent '{name}' not found")
+        return False
+
+    agents[name]["state"] = "retired"
+    agents[name]["metadata"]["retired_at"] = datetime.now(timezone.utc).isoformat()
+    save_agents(agents)
+    log(f"Agent '{name}' retired. Completed {agents[name].get('tasks_completed', 0)} tasks.")
+    return True
+
+def monitor():
+    agents = load_agents()
+    now = time.time()
+    changes = 0
+    for name, agent in agents.items():
+        if agent.get("state") != "deployed":
+            continue
+        last = agent.get("last_heartbeat")
+        if last:
+            try:
+                last_ts = datetime.fromisoformat(last).timestamp()
+                hours_since = (now - last_ts) / 3600
+                if hours_since > 24:
+                    log(f"Agent '{name}' no heartbeat for {hours_since:.1f}h")
+                    agent["state"] = "idle"
+                    agent["metadata"]["idle_since"] = datetime.now(timezone.utc).isoformat()
+                    changes += 1
+                elif hours_since > 168:  # 7 days
+                    log(f"Agent '{name}' idle for 7 days — recommending retirement")
+                    agent["metadata"]["retire_recommendation"] = datetime.now(timezone.utc).isoformat()
+                    changes += 1
+            except (ValueError, TypeError, OSError):
+                pass
+    if changes > 0:
+        save_agents(agents)
+        log(f"Monitor: {changes} agents state changed")
+    else:
+        log("Monitor: all agents healthy")
+
+if __name__ == "__main__":
+    ensure_dirs()
+    if len(sys.argv) < 2:
+        print("Usage: agent_lifecycle.py [provision|deploy|retire|status|monitor]")
+        sys.exit(0)
+
+    cmd = sys.argv[1]
+    if cmd == "provision" and len(sys.argv) >= 4:
+        model = sys.argv[4] if len(sys.argv) >= 5 else "hermes4:14b"
+        provision(sys.argv[2], sys.argv[3], model)
+    elif cmd == "deploy" and len(sys.argv) >= 3:
+        deploy(sys.argv[2])
+    elif cmd == "retire" and len(sys.argv) >= 3:
+        retire(sys.argv[2])
+    elif cmd == "status":
+        status()
+    elif cmd == "monitor":
+        monitor()
+    else:
+        print("Usage: agent_lifecycle.py [provision <name> <vps>|deploy <name>|retire <name>|status|monitor]")
--- a/fleet/delegation.py
+++ b/fleet/delegation.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+# Cross-Agent Task Delegation - The Timmy Foundation
+# Phase 3: Orchestration capability.
+# Agents create issues, assign to other agents, review PRs automatically.
+
+import os, sys, json, time, urllib.request
+from datetime import datetime, timezone
+from pathlib import Path
+
+GITEA_BASE = "https://forge.alexanderwhitestone.com/api/v1"
+TOKEN_FILE = Path(os.path.expanduser("~/.config/gitea/timmy-token"))
+ALT_TOKEN = Path(os.path.expanduser("~/.config/gitea/token"))
+DATA_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-resources"))
+DELEGATION_LOG = DATA_DIR / "delegation.log"
+
+AGENTS = {
+    "claw-code": {"models": ["qwen3.6-plus:free"], "caps": ["small-patches","config","docs","repo-hygiene"], "max": 2, "active": True},
+    "gemini": {"models": ["gemini-2.5-flash"], "caps": ["research","heavy-impl","architecture","debugging"], "max": 5, "active": True},
+    "ezra": {"models": ["hermes4:14b","local-ollama"], "caps": ["contracting","formalization","ops","vps"], "max": 3, "active": True},
+    "bezalel": {"models": ["local-llm"], "caps": ["evennia","art","creative","visualization"], "max": 3, "active": True},
+    "timmy": {"models": ["qwen3.6-plus:free","hermes4:14b","local-ollama"], "caps": ["orchestration","review","deploy","fleet"], "max": 5, "active": True},
+}
+
+MONITORED_REPOS = [
+    "Timmy_Foundation/timmy-home",
+    "Timmy_Foundation/timmy-config",
+    "Timmy_Foundation/the-nexus",
+    "Timmy_Foundation/hermes-agent",
+]
+
+# Heuristic keyword matching
+KEYWORDS = {
+    "claw-code": ["patch","typo","config","gitignore","docs update","readme","cleanup","format"],
+    "gemini": ["research","investigate","analyze","compare","benchmark","survey","evaluate"],
+    "ezra": ["vps","ssh","deploy","infrastructure","server","cron","resurrection","provision"],
+    "bezalel": ["evennia","art","creative","music","visualization","diagram"],
+    "timmy": ["orchestrate","review","merge","fleet","pipeline","health","monitor"],
+}
+
+def get_token():
+    if TOKEN_FILE.exists(): return TOKEN_FILE.read_text().strip()
+    if ALT_TOKEN.exists(): return ALT_TOKEN.read_text().strip()
+    return ""
+
+def api(path, method="GET", data=None):
+    token = get_token()
+    url = f"{GITEA_BASE}{path}"
+    headers = {"Authorization": f"token {token}"}
+    body = json.dumps(data).encode() if data else None
+    if data: headers["Content-Type"] = "application/json"
+    req = urllib.request.Request(url, data=body, headers=headers, method=method)
+    try:
+        resp = urllib.request.urlopen(req, timeout=15)
+        raw = resp.read().decode()
+        return json.loads(raw) if raw.strip() else {}
+    except urllib.error.HTTPError as e:
+        err = e.read().decode()
+        print(f"  API error {e.code}: {err[:200]}")
+        return None
+    except Exception as e:
+        print(f"  API error: {e}")
+        return None
+
+def log_delegation(msg, level="INFO"):
+    ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
+    entry = f"[{ts}] [{level}] {msg}"
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+    with open(DELEGATION_LOG, "a") as f: f.write(entry + "\n")
+    print(f"  {entry}")
+
+def suggest_agent(title, body):
+    text = (title + " " + body).lower()
+    for agent, keywords in KEYWORDS.items():
+        if any(kw in text for kw in keywords):
+            return agent, f"keywords matched for {agent}"
+    return None, None
+
+def assign_issue(repo, issue_num, agent):
+    result = api(f"/repos/{repo}/issues/{issue_num}", method="PATCH",
+                 data={"assignees": {"operation": "set", "usernames": [agent]}})
+    if result:
+        log_delegation(f"Assigned {repo}#{issue_num} to {agent}")
+        comment_on_issue(repo, issue_num, f"[AUTO-ASSIGN] Assigned to {agent}.")
+    return result
+
+def comment_on_issue(repo, issue_num, body):
+    return api(f"/repos/{repo}/issues/{issue_num}/comments", method="POST", data={"body": body})
+
+def get_my_issues(agent):
+    issues = []
+    for repo in MONITORED_REPOS:
+        repo_issues = api(f"/repos/{repo}/issues?state=open&limit=50")
+        if repo_issues:
+            for i in repo_issues:
+                for a in (i.get("assignees") or []):
+                    if a.get("login") == agent:
+                        issues.append({"repo": repo, "issue": i})
+    return issues
+
+def run_cycle():
+    log_delegation("Starting delegation cycle")
+    count = 0
+    for repo in MONITORED_REPOS:
+        issues = api(f"/repos/{repo}/issues?state=open&limit=50")
+        if not issues: continue
+        for issue in issues:
+            if issue.get("assignees"): continue
+            title = issue.get("title","")
+            body = issue.get("body","")
+            if any(w in title.lower() for w in ["epic","discussion","question"]): continue
+            agent, reason = suggest_agent(title, body)
+            if agent:
+                if assign_issue(repo, issue["number"], agent): count += 1
+    log_delegation(f"Cycle complete: {count} new assignments")
+    return count
+
+def show_status():
+    print("\n=== Delegation Status ===")
+    for name, info in AGENTS.items():
+        issues = get_my_issues(name)
+        status = "ONLINE" if info["active"] else "OFFLINE"
+        print(f"  {name}: {len(issues)} assigned [{status}]")
+        for iss in issues[:3]:
+            print(f"    - {iss['repo'].split('/')[-1]}#{iss['issue']['number']}: {iss['issue']['title'][:60]}")
+        if len(issues) > 3:
+            print(f"    ... +{len(issues)-3} more")
+
+if __name__ == "__main__":
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+    if len(sys.argv) > 1:
+        cmd = sys.argv[1]
+        if cmd == "status": show_status()
+        elif cmd == "run":
+            run_cycle()
+            show_status()
+        elif cmd == "assign" and len(sys.argv) >= 5:
+            assign_issue(sys.argv[3], int(sys.argv[2]), sys.argv[4])
+        else:
+            print("Usage: delegation.py [run|status|assign <issue_num> <repo> <agent>]")
+    else:
+        run_cycle()
+        show_status()
--- a/fleet/model-fallback.sh
+++ b/fleet/model-fallback.sh
@@ -0,0 +1,171 @@
+#!/usr/bin/env bash
+# FLEET-011: Local Model Pipeline and Fallback Chain
+# Phase 4: Sovereignty — all inference runs locally, no cloud dependency.
+#
+# Usage:
+#   ./model-fallback.sh                    # Show current model chain status
+#   ./model-fallback.sh list               # List all local models
+#   ./model-fallback.sh test "Hello"       # Test the full fallback chain
+#   ./model-fallback.sh chat               # Interactive chat mode
+#   ./model-fallback.sh install            # Install default model chain
+
+set -euo pipefail
+
+# === CONFIG ===
+CHAIN_FILE="$HOME/.local/timmy/fleet-resources/model-chain.json"
+LOG_DIR="$HOME/.local/timmy/fleet-health"
+OLLAMA_URL="http://localhost:11434"
+
+# Default chain (best quality first, fallback to smallest that runs)
+DEFAULT_CHAIN=$(cat << 'EOF'
+{
+  "chain": [
+    {"name": "hermes4:14b", "provider": "ollama", "max_tokens": 4096, "purpose": "primary"},
+    {"name": "qwen2.5:7b", "provider": "ollama", "max_tokens": 4096, "purpose": "fallback"},
+    {"name": "phi3:3.8b", "provider": "ollama", "max_tokens": 2048, "purpose": "emergency"},
+    {"name": "gemma2:2b", "provider": "ollama", "max_tokens": 2048, "purpose": "minimal"}
+  ]
+}
+EOF
+)
+
+load_chain() {
+  if [ -f "$CHAIN_FILE" ]; then
+    cat "$CHAIN_FILE"
+  else
+    echo "$DEFAULT_CHAIN"
+  fi
+}
+
+save_chain() {
+  echo "$1" > "$CHAIN_FILE"
+  echo "Model chain saved to $CHAIN_FILE"
+}
+
+install_chain() {
+  echo "Installing default model chain..."
+  echo "$DEFAULT_CHAIN" > "$CHAIN_FILE"
+  
+  # Extract model names and install via Ollama
+  echo "$DEFAULT_CHAIN" | python3 -c "
+import json,sys
+for m in json.load(sys.stdin)['chain']:
+    print(m['name'])
+" | while read model; do
+    echo "  Installing $model..."
+    if ollama list 2>/dev/null | grep -q "$model"; then
+      echo "    $model already installed"
+    else
+      ollama pull "$model" 2>&1 | tail -1
+    fi
+  done
+}
+
+list_models() {
+  echo "=== Local Models (Ollama) ==="
+  ollama list 2>/dev/null || echo "Ollama not running or not installed"
+  
+  echo ""
+  echo "=== Active Fallback Chain ==="
+  load_chain | python3 -c "
+import json,sys
+data = json.load(sys.stdin)
+print(f'{\"Model\":<25} {\"Purpose\":<12} {\"Max tokens\":>10}')
+print('-' * 50)
+for m in data['chain']:
+    print(f'{m[\"name\"]:<25} {m[\"purpose\"]:<12} {m[\"max_tokens\"]:>10}')
+"
+}
+
+status() {
+  echo "=== Model Pipeline Status ==="
+  
+  # Check Ollama
+  if curl -s "$OLLAMA_URL/api/tags" >/dev/null 2>&1; then
+    echo "  Ollama: RUNNING at $OLLAMA_URL"
+    model_count=$(curl -s "$OLLAMA_URL/api/tags" | python3 -c "import json,sys; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
+    echo "  Local models: $model_count"
+  else
+    echo "  Ollama: DOWN - no local inference available"
+  fi
+  
+  echo ""
+  echo "=== Fallback Chain ==="
+  list_models
+}
+
+test_chain() {
+  local prompt="$1"
+  
+  echo "Testing fallback chain with prompt: \"$prompt\""
+  echo ""
+  
+  load_chain | python3 -c "
+import json,sys,urllib.request,subprocess
+
+data = json.load(sys.stdin)
+models = data['chain']
+prompt = '$(echo "$prompt" | sed "s/'/\\\\'/g")'
+
+for m in models:
+    name = m['name']
+    print(f'  Testing {name}...', end=' ')
+    try:
+        body = json.dumps({'model': name, 'prompt': '$prompt', 'stream': False}).encode()
+        req = urllib.request.Request('http://localhost:11434/api/generate', data=body,
+            headers={'Content-Type': 'application/json'})
+        resp = urllib.request.urlopen(req, timeout=30)
+        result = json.loads(resp.read())
+        print(f'OK — \"{result.get(\"response\", \"\")[:80]}\"')
+        print(f'  Chain works! Primary model ({name}) is serving.')
+        sys.exit(0)
+    except Exception as e:
+        print(f'FAILED — {str(e)[:60]}')
+
+print('All models failed. No local inference available.')
+"
+}
+
+chat() {
+  echo "=== Beacon Chat Mode ==="
+  echo "Type 'quit' to exit. Using local model chain."
+  echo ""
+  
+  load_chain | python3 -c "
+import json,sys,urllib.request
+
+data = json.load(sys.stdin)
+models = data['chain']
+
+while True:
+    try:
+        prompt = input('> ')
+    except EOFError:
+        break
+    if prompt.lower() in ('quit', 'exit'):
+        break
+    if not prompt.strip():
+        continue
+    
+    print('Thinking...')
+    body = json.dumps({'model': models[0]['name'], 'prompt': prompt, 'stream': False}).encode()
+    req = urllib.request.Request('http://localhost:11434/api/generate', data=body,
+        headers={'Content-Type': 'application/json'})
+    try:
+        resp = urllib.request.urlopen(req, timeout=120)
+        result = json.loads(resp.read())
+        print(result.get('response', '').strip())
+    except Exception as e:
+        print(f'Model error: {e}')
+        print('Trying next model in chain...')
+"
+}
+
+case "${1:-status}" in
+  install)  install_chain ;;
+  list)     list_models ;;
+  test)     test_chain "${2:-Hello, are you there?}" ;;
+  chat)     chat ;;
+  status)   status ;;
+  *)        echo "Usage: $0 [install|list|test|chat|status]" ;;
+esac