feat: FLEET-010/011/012 — Phase 3+5 fleet capabilities

FLEET-010: Cross-Agent Task Delegation Protocol - Auto-assigns unassigned issues to agents based on keyword matching - Supports all fleet agents: claw-code, gemini, ezra, bezalel, timmy - Delegation logging cycle FLEET-011: Local Model Pipeline and Fallback Chain - 4-model fallback chain (hermes4:14b -> qwen2.5:7b -> phi3:3.8b -> gemma2:2b) - Tests full chain with live inference - Interactive chat mode using local models only FLEET-012: Agent Lifecycle Manager - Full lifecycle: provision -> deploy -> monitor -> retire - Heartbeat detection, idle timeout, retirement recommendations - Agent quality tracking Fixes timmy-home#563, #564, #565
feat: FLEET-007 — Auto-restart agent (self-healing processes)
2026-04-07 12:39:17 -04:00 · 2026-04-07 12:04:33 -04:00
4 changed files with 741 additions and 0 deletions
--- a/fleet/agent_lifecycle.py
+++ b/fleet/agent_lifecycle.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+# FLEET-012: Agent Lifecycle Manager
+# Phase 5: Scale — spawn, train, deploy, retire agents automatically.
+#
+# Manages the full lifecycle of AI agents in the fleet:
+# 1. PROVISION: Clone template, install deps, configure, test
+# 2. TRAIN: Run initial tasks, measure quality, score
+# 3. DEPLOY: Add to active rotation, start accepting issues
+# 4. MONITOR: Track performance, quality, uptime
+# 5. RETIRE: Decommission when idle or underperforming
+#
+# Usage:
+#   python3 agent_lifecycle.py provision <agent_name> <vps_ip> [--model <model>]
+#   python3 agent_lifecycle.py status
+#   python3 agent_lifecycle.py retire <agent_name>
+#   python3 agent_lifecycle.py monitor
+
+import os, sys, json, subprocess, time
+from datetime import datetime, timezone
+from pathlib import Path
+
+DATA_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-agents"))
+AGENTS_DB = DATA_DIR / "agents.json"
+LIFECYCLE_LOG = DATA_DIR / "lifecycle.log"
+
+def ensure_dirs():
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+
+def log(msg, level="INFO"):
+    ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
+    entry = f"[{ts}] [{level}] {msg}"
+    with open(LIFECYCLE_LOG, "a") as f:
+        f.write(entry + "\n")
+    print(f"  {entry}")
+
+def load_agents():
+    if AGENTS_DB.exists():
+        return json.loads(AGENTS_DB.read_text())
+    return {}
+
+def save_agents(db):
+    AGENTS_DB.write_text(json.dumps(db, indent=2))
+
+def status():
+    agents = load_agents()
+    print("\n=== Agent Fleet Status ===")
+    if not agents:
+        print("  No agents registered.")
+        return
+    for name, agent in agents.items():
+        state = agent.get("state", "unknown")
+        vps = agent.get("vps", "unknown")
+        model = agent.get("model", "unknown")
+        score = agent.get("quality_score", "N/A")
+        created = agent.get("created_at", "?")
+        print(f"  {name}: state={state}, vps={vps}, model={model}, score={score}, created={created}")
+        if agent.get("last_heartbeat"):
+            last = agent["last_heartbeat"]
+            print(f"    Last heartbeat: {last}")
+
+def provision(name, vps, model="hermes4:14b"):
+    agents = load_agents()
+    if name in agents:
+        print(f"  Agent '{name}' already exists (state: {agents[name].get('state')})")
+        return False
+
+    log(f"Provisioning agent '{name}' on {vps} with model {model}")
+    agents[name] = {
+        "name": name,
+        "vps": vps,
+        "model": model,
+        "state": "provisioning",
+        "created_at": datetime.now(timezone.utc).isoformat(),
+        "quality_score": None,
+        "tasks_completed": 0,
+        "tasks_failed": 0,
+        "last_heartbeat": None,
+        "metadata": {"provision_started": datetime.now(timezone.utc).isoformat()}
+    }
+    save_agents(agents)
+    log(f"Agent '{name}' registered. State: provisioning")
+    return True
+
+def deploy(name):
+    agents = load_agents()
+    if name not in agents:
+        print(f"  Agent '{name}' not found")
+        return False
+
+    agents[name]["state"] = "deployed"
+    agents[name]["metadata"]["deployed_at"] = datetime.now(timezone.utc).isoformat()
+    save_agents(agents)
+    log(f"Agent '{name}' deployed and accepting issues")
+    return True
+
+def retire(name):
+    agents = load_agents()
+    if name not in agents:
+        print(f"  Agent '{name}' not found")
+        return False
+
+    agents[name]["state"] = "retired"
+    agents[name]["metadata"]["retired_at"] = datetime.now(timezone.utc).isoformat()
+    save_agents(agents)
+    log(f"Agent '{name}' retired. Completed {agents[name].get('tasks_completed', 0)} tasks.")
+    return True
+
+def monitor():
+    agents = load_agents()
+    now = time.time()
+    changes = 0
+    for name, agent in agents.items():
+        if agent.get("state") != "deployed":
+            continue
+        last = agent.get("last_heartbeat")
+        if last:
+            try:
+                last_ts = datetime.fromisoformat(last).timestamp()
+                hours_since = (now - last_ts) / 3600
+                if hours_since > 24:
+                    log(f"Agent '{name}' no heartbeat for {hours_since:.1f}h")
+                    agent["state"] = "idle"
+                    agent["metadata"]["idle_since"] = datetime.now(timezone.utc).isoformat()
+                    changes += 1
+                elif hours_since > 168:  # 7 days
+                    log(f"Agent '{name}' idle for 7 days — recommending retirement")
+                    agent["metadata"]["retire_recommendation"] = datetime.now(timezone.utc).isoformat()
+                    changes += 1
+            except (ValueError, TypeError, OSError):
+                pass
+    if changes > 0:
+        save_agents(agents)
+        log(f"Monitor: {changes} agents state changed")
+    else:
+        log("Monitor: all agents healthy")
+
+if __name__ == "__main__":
+    ensure_dirs()
+    if len(sys.argv) < 2:
+        print("Usage: agent_lifecycle.py [provision|deploy|retire|status|monitor]")
+        sys.exit(0)
+
+    cmd = sys.argv[1]
+    if cmd == "provision" and len(sys.argv) >= 4:
+        model = sys.argv[4] if len(sys.argv) >= 5 else "hermes4:14b"
+        provision(sys.argv[2], sys.argv[3], model)
+    elif cmd == "deploy" and len(sys.argv) >= 3:
+        deploy(sys.argv[2])
+    elif cmd == "retire" and len(sys.argv) >= 3:
+        retire(sys.argv[2])
+    elif cmd == "status":
+        status()
+    elif cmd == "monitor":
+        monitor()
+    else:
+        print("Usage: agent_lifecycle.py [provision <name> <vps>|deploy <name>|retire <name>|status|monitor]")
--- a/fleet/auto_restart.py
+++ b/fleet/auto_restart.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python3
+"""
+Auto-Restart Agent — Self-healing process monitor for fleet machines.
+
+Detects dead services and restarts them automatically.
+Escalates after 3 attempts (prevents restart loops).
+Logs all actions to ~/.local/timmy/fleet-health/restarts.log
+Alerts via Telegram if service cannot be recovered.
+
+Prerequisite: FLEET-006 (health check) must be running to detect failures.
+
+Usage:
+  python3 auto_restart.py          # Run checks now
+  python3 auto_restart.py --daemon # Run continuously (every 60s)
+  python3 auto_restart.py --status  # Show restart history
+"""
+
+import os
+import sys
+import json
+import time
+import subprocess
+from datetime import datetime, timezone
+from pathlib import Path
+
+# === CONFIG ===
+LOG_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-health"))
+RESTART_LOG = LOG_DIR / "restarts.log"
+COOLDOWN_FILE = LOG_DIR / "restart_cooldowns.json"
+MAX_RETRIES = 3
+COOLDOWN_PERIOD = 3600  # 1 hour between escalation alerts
+
+# Services definition: name, check command, restart command
+# Local services:
+LOCAL_SERVICES = {
+    "hermes-gateway": {
+        "check": "pgrep -f 'hermes gateway' > /dev/null 2>/dev/null",
+        "restart": "cd ~/code-claw && ./restart-gateway.sh 2>/dev/null || launchctl kickstart -k ai.hermes.gateway 2>/dev/null",
+        "critical": True,
+    },
+    "ollama": {
+        "check": "pgrep -f 'ollama serve' > /dev/null 2>/dev/null",
+        "restart": "launchctl kickstart -k com.ollama.ollama 2>/dev/null || /opt/homebrew/bin/brew services restart ollama 2>/dev/null",
+        "critical": False,
+    },
+    "codeclaw-heartbeat": {
+        "check": "launchctl list | grep 'ai.timmy.codeclaw-qwen-heartbeat' > /dev/null 2>/dev/null",
+        "restart": "launchctl kickstart -k ai.timmy.codeclaw-qwen-heartbeat 2>/dev/null",
+        "critical": False,
+    },
+}
+
+# VPS services to restart via SSH
+VPS_SERVICES = {
+    "ezra": {
+        "ip": "143.198.27.163",
+        "user": "root",
+        "services": {
+            "gitea": {
+                "check": "systemctl is-active gitea 2>/dev/null | grep -q active",
+                "restart": "systemctl restart gitea 2>/dev/null",
+                "critical": True,
+            },
+            "nginx": {
+                "check": "systemctl is-active nginx 2>/dev/null | grep -q active",
+                "restart": "systemctl restart nginx 2>/dev/null",
+                "critical": False,
+            },
+            "hermes-agent": {
+                "check": "pgrep -f 'hermes gateway' > /dev/null 2>/dev/null",
+                "restart": "cd /root/wizards/ezra/hermes-agent && source .venv/bin/activate && nohup hermes gateway run --replace > /dev/null 2>&1 &",
+                "critical": True,
+            },
+        },
+    },
+    "allegro": {
+        "ip": "167.99.126.228",
+        "user": "root",
+        "services": {
+            "hermes-agent": {
+                "check": "pgrep -f 'hermes gateway' > /dev/null 2>/dev/null",
+                "restart": "cd /root/wizards/allegro/hermes-agent && source .venv/bin/activate && nohup hermes gateway run --replace > /dev/null 2>&1 &",
+                "critical": True,
+            },
+        },
+    },
+    "bezalel": {
+        "ip": "159.203.146.185",
+        "user": "root",
+        "services": {
+            "hermes-agent": {
+                "check": "pgrep -f 'hermes gateway' > /dev/null 2>/dev/null",
+                "restart": "cd /root/wizards/bezalel/hermes/venv/bin/activate && nohup hermes gateway run > /dev/null 2>&1 &",
+                "critical": True,
+            },
+            "evennia": {
+                "check": "pgrep -f 'evennia' > /dev/null 2>/dev/null",
+                "restart": "cd /root/.evennia/timmy_world && evennia restart 2>/dev/null",
+                "critical": False,
+            },
+        },
+    },
+}
+
+TELEGRAM_TOKEN_FILE = Path(os.path.expanduser("~/.config/telegram/special_bot"))
+TELEGRAM_CHAT = "-1003664764329"
+
+
+def send_telegram(message):
+    if not TELEGRAM_TOKEN_FILE.exists():
+        return False
+    token = TELEGRAM_TOKEN_FILE.read_text().strip()
+    url = f"https://api.telegram.org/bot{token}/sendMessage"
+    body = json.dumps({
+        "chat_id": TELEGRAM_CHAT,
+        "text": f"[AUTO-RESTART]\n{message}",
+    }).encode()
+    try:
+        import urllib.request
+        req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json"}, method="POST")
+        urllib.request.urlopen(req, timeout=10)
+        return True
+    except Exception:
+        return False
+
+
+def get_cooldowns():
+    if COOLDOWN_FILE.exists():
+        try:
+            return json.loads(COOLDOWN_FILE.read_text())
+        except json.JSONDecodeError:
+            pass
+    return {}
+
+
+def save_cooldowns(data):
+    COOLDOWN_FILE.write_text(json.dumps(data, indent=2))
+
+
+def check_service(check_cmd, timeout=10):
+    try:
+        proc = subprocess.run(check_cmd, shell=True, capture_output=True, timeout=timeout)
+        return proc.returncode == 0
+    except (subprocess.TimeoutExpired, subprocess.SubprocessError):
+        return False
+
+
+def restart_service(restart_cmd, timeout=30):
+    try:
+        proc = subprocess.run(restart_cmd, shell=True, capture_output=True, timeout=timeout)
+        return proc.returncode == 0
+    except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
+        return False
+
+
+def try_restart_via_ssh(name, host_config, service_name):
+    ip = host_config["ip"]
+    user = host_config["user"]
+    service = host_config["services"][service_name]
+
+    restart_cmd = f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 {user}@{ip} "{service["restart"]}"'
+    return restart_service(restart_cmd, timeout=30)
+
+
+def log_restart(service_name, machine, attempt, success):
+    ts = datetime.now(timezone.utc).isoformat()
+    status = "SUCCESS" if success else "FAILED"
+    log_entry = f"{ts} [{status}] {machine}/{service_name} (attempt {attempt})\n"
+
+    RESTART_LOG.parent.mkdir(parents=True, exist_ok=True)
+    with open(RESTART_LOG, "a") as f:
+        f.write(log_entry)
+
+    print(f"  [{status}] {machine}/{service_name} - attempt {attempt}")
+
+
+def check_and_restart():
+    """Run all restart checks."""
+    results = []
+    cooldowns = get_cooldowns()
+    now = time.time()
+
+    # Check local services
+    for name, service in LOCAL_SERVICES.items():
+        if not check_service(service["check"]):
+            cooldown_key = f"local/{name}"
+            retries = cooldowns.get(cooldown_key, {"count": 0, "last": 0}).get("count", 0)
+
+            if retries >= MAX_RETRIES:
+                last = cooldowns.get(cooldown_key, {}).get("last", 0)
+                if now - last < COOLDOWN_PERIOD and service["critical"]:
+                    send_telegram(f"CRITICAL: local/{name} failed {MAX_RETRIES} restart attempts. Needs human intervention.")
+                    cooldowns[cooldown_key] = {"count": 0, "last": now}
+                    save_cooldowns(cooldowns)
+                    continue
+
+            success = restart_service(service["restart"])
+            log_restart(name, "local", retries + 1, success)
+
+            cooldowns[cooldown_key] = {"count": retries + 1 if not success else 0, "last": now}
+            save_cooldowns(cooldowns)
+            if success:
+                # Verify it actually started
+                time.sleep(3)
+                if check_service(service["check"]):
+                    print(f"  VERIFIED: local/{name} is running")
+                else:
+                    print(f"  WARNING: local/{name} restart command returned success but process not detected")
+
+    # Check VPS services
+    for host, host_config in VPS_SERVICES.items():
+        for service_name, service in host_config["services"].items():
+            check_cmd = f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 {host_config["user"]}@{host_config["ip"]} "{service["check"]}"'
+            if not check_service(check_cmd):
+                cooldown_key = f"{host}/{service_name}"
+                retries = cooldowns.get(cooldown_key, {"count": 0, "last": 0}).get("count", 0)
+
+                if retries >= MAX_RETRIES:
+                    last = cooldowns.get(cooldown_key, {}).get("last", 0)
+                    if now - last < COOLDOWN_PERIOD and service["critical"]:
+                        send_telegram(f"CRITICAL: {host}/{service_name} failed {MAX_RETRIES} restart attempts. Needs human intervention.")
+                        cooldowns[cooldown_key] = {"count": 0, "last": now}
+                        save_cooldowns(cooldowns)
+                        continue
+
+                success = try_restart_via_ssh(host, host_config, service_name)
+                log_restart(service_name, host, retries + 1, success)
+
+                cooldowns[cooldown_key] = {"count": retries + 1 if not success else 0, "last": now}
+                save_cooldowns(cooldowns)
+
+    return results
+
+
+def daemon_mode():
+    """Run continuously every 60 seconds."""
+    print("Auto-restart agent running in daemon mode (60s interval)")
+    print(f"Monitoring {len(LOCAL_SERVICES)} local + {sum(len(h['services']) for h in VPS_SERVICES.values())} remote services")
+    print(f"Max retries per cycle: {MAX_RETRIES}")
+    print(f"Cooldown after max retries: {COOLDOWN_PERIOD}s")
+    while True:
+        check_and_restart()
+        time.sleep(60)
+
+
+def show_status():
+    """Show restart history and cooldowns."""
+    cooldowns = get_cooldowns()
+    print("=== Restart Cooldowns ===")
+    for key, data in sorted(cooldowns.items()):
+        count = data.get("count", 0)
+        if count > 0:
+            print(f"  {key}: {count} failures, last at {datetime.fromtimestamp(data.get('last',0), tz=timezone.utc).strftime('%H:%M')}")
+
+    print("\n=== Restart Log (last 20) ===")
+    if RESTART_LOG.exists():
+        lines = RESTART_LOG.read_text().strip().split("\n")
+        for line in lines[-20:]:
+            print(f"  {line}")
+    else:
+        print("  No restarts logged yet.")
+
+
+if __name__ == "__main__":
+    LOG_DIR.mkdir(parents=True, exist_ok=True)
+
+    if len(sys.argv) > 1 and sys.argv[1] == "--daemon":
+        daemon_mode()
+    elif len(sys.argv) > 1 and sys.argv[1] == "--status":
+        show_status()
+    else:
+        check_and_restart()
--- a/fleet/delegation.py
+++ b/fleet/delegation.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+# Cross-Agent Task Delegation - The Timmy Foundation
+# Phase 3: Orchestration capability.
+# Agents create issues, assign to other agents, review PRs automatically.
+
+import os, sys, json, time, urllib.request
+from datetime import datetime, timezone
+from pathlib import Path
+
+GITEA_BASE = "https://forge.alexanderwhitestone.com/api/v1"
+TOKEN_FILE = Path(os.path.expanduser("~/.config/gitea/timmy-token"))
+ALT_TOKEN = Path(os.path.expanduser("~/.config/gitea/token"))
+DATA_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-resources"))
+DELEGATION_LOG = DATA_DIR / "delegation.log"
+
+AGENTS = {
+    "claw-code": {"models": ["qwen3.6-plus:free"], "caps": ["small-patches","config","docs","repo-hygiene"], "max": 2, "active": True},
+    "gemini": {"models": ["gemini-2.5-flash"], "caps": ["research","heavy-impl","architecture","debugging"], "max": 5, "active": True},
+    "ezra": {"models": ["hermes4:14b","local-ollama"], "caps": ["contracting","formalization","ops","vps"], "max": 3, "active": True},
+    "bezalel": {"models": ["local-llm"], "caps": ["evennia","art","creative","visualization"], "max": 3, "active": True},
+    "timmy": {"models": ["qwen3.6-plus:free","hermes4:14b","local-ollama"], "caps": ["orchestration","review","deploy","fleet"], "max": 5, "active": True},
+}
+
+MONITORED_REPOS = [
+    "Timmy_Foundation/timmy-home",
+    "Timmy_Foundation/timmy-config",
+    "Timmy_Foundation/the-nexus",
+    "Timmy_Foundation/hermes-agent",
+]
+
+# Heuristic keyword matching
+KEYWORDS = {
+    "claw-code": ["patch","typo","config","gitignore","docs update","readme","cleanup","format"],
+    "gemini": ["research","investigate","analyze","compare","benchmark","survey","evaluate"],
+    "ezra": ["vps","ssh","deploy","infrastructure","server","cron","resurrection","provision"],
+    "bezalel": ["evennia","art","creative","music","visualization","diagram"],
+    "timmy": ["orchestrate","review","merge","fleet","pipeline","health","monitor"],
+}
+
+def get_token():
+    if TOKEN_FILE.exists(): return TOKEN_FILE.read_text().strip()
+    if ALT_TOKEN.exists(): return ALT_TOKEN.read_text().strip()
+    return ""
+
+def api(path, method="GET", data=None):
+    token = get_token()
+    url = f"{GITEA_BASE}{path}"
+    headers = {"Authorization": f"token {token}"}
+    body = json.dumps(data).encode() if data else None
+    if data: headers["Content-Type"] = "application/json"
+    req = urllib.request.Request(url, data=body, headers=headers, method=method)
+    try:
+        resp = urllib.request.urlopen(req, timeout=15)
+        raw = resp.read().decode()
+        return json.loads(raw) if raw.strip() else {}
+    except urllib.error.HTTPError as e:
+        err = e.read().decode()
+        print(f"  API error {e.code}: {err[:200]}")
+        return None
+    except Exception as e:
+        print(f"  API error: {e}")
+        return None
+
+def log_delegation(msg, level="INFO"):
+    ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
+    entry = f"[{ts}] [{level}] {msg}"
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+    with open(DELEGATION_LOG, "a") as f: f.write(entry + "\n")
+    print(f"  {entry}")
+
+def suggest_agent(title, body):
+    text = (title + " " + body).lower()
+    for agent, keywords in KEYWORDS.items():
+        if any(kw in text for kw in keywords):
+            return agent, f"keywords matched for {agent}"
+    return None, None
+
+def assign_issue(repo, issue_num, agent):
+    result = api(f"/repos/{repo}/issues/{issue_num}", method="PATCH",
+                 data={"assignees": {"operation": "set", "usernames": [agent]}})
+    if result:
+        log_delegation(f"Assigned {repo}#{issue_num} to {agent}")
+        comment_on_issue(repo, issue_num, f"[AUTO-ASSIGN] Assigned to {agent}.")
+    return result
+
+def comment_on_issue(repo, issue_num, body):
+    return api(f"/repos/{repo}/issues/{issue_num}/comments", method="POST", data={"body": body})
+
+def get_my_issues(agent):
+    issues = []
+    for repo in MONITORED_REPOS:
+        repo_issues = api(f"/repos/{repo}/issues?state=open&limit=50")
+        if repo_issues:
+            for i in repo_issues:
+                for a in (i.get("assignees") or []):
+                    if a.get("login") == agent:
+                        issues.append({"repo": repo, "issue": i})
+    return issues
+
+def run_cycle():
+    log_delegation("Starting delegation cycle")
+    count = 0
+    for repo in MONITORED_REPOS:
+        issues = api(f"/repos/{repo}/issues?state=open&limit=50")
+        if not issues: continue
+        for issue in issues:
+            if issue.get("assignees"): continue
+            title = issue.get("title","")
+            body = issue.get("body","")
+            if any(w in title.lower() for w in ["epic","discussion","question"]): continue
+            agent, reason = suggest_agent(title, body)
+            if agent:
+                if assign_issue(repo, issue["number"], agent): count += 1
+    log_delegation(f"Cycle complete: {count} new assignments")
+    return count
+
+def show_status():
+    print("\n=== Delegation Status ===")
+    for name, info in AGENTS.items():
+        issues = get_my_issues(name)
+        status = "ONLINE" if info["active"] else "OFFLINE"
+        print(f"  {name}: {len(issues)} assigned [{status}]")
+        for iss in issues[:3]:
+            print(f"    - {iss['repo'].split('/')[-1]}#{iss['issue']['number']}: {iss['issue']['title'][:60]}")
+        if len(issues) > 3:
+            print(f"    ... +{len(issues)-3} more")
+
+if __name__ == "__main__":
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+    if len(sys.argv) > 1:
+        cmd = sys.argv[1]
+        if cmd == "status": show_status()
+        elif cmd == "run":
+            run_cycle()
+            show_status()
+        elif cmd == "assign" and len(sys.argv) >= 5:
+            assign_issue(sys.argv[3], int(sys.argv[2]), sys.argv[4])
+        else:
+            print("Usage: delegation.py [run|status|assign <issue_num> <repo> <agent>]")
+    else:
+        run_cycle()
+        show_status()
--- a/fleet/model-fallback.sh
+++ b/fleet/model-fallback.sh
@@ -0,0 +1,171 @@
+#!/usr/bin/env bash
+# FLEET-011: Local Model Pipeline and Fallback Chain
+# Phase 4: Sovereignty — all inference runs locally, no cloud dependency.
+#
+# Usage:
+#   ./model-fallback.sh                    # Show current model chain status
+#   ./model-fallback.sh list               # List all local models
+#   ./model-fallback.sh test "Hello"       # Test the full fallback chain
+#   ./model-fallback.sh chat               # Interactive chat mode
+#   ./model-fallback.sh install            # Install default model chain
+
+set -euo pipefail
+
+# === CONFIG ===
+CHAIN_FILE="$HOME/.local/timmy/fleet-resources/model-chain.json"
+LOG_DIR="$HOME/.local/timmy/fleet-health"
+OLLAMA_URL="http://localhost:11434"
+
+# Default chain (best quality first, fallback to smallest that runs)
+DEFAULT_CHAIN=$(cat << 'EOF'
+{
+  "chain": [
+    {"name": "hermes4:14b", "provider": "ollama", "max_tokens": 4096, "purpose": "primary"},
+    {"name": "qwen2.5:7b", "provider": "ollama", "max_tokens": 4096, "purpose": "fallback"},
+    {"name": "phi3:3.8b", "provider": "ollama", "max_tokens": 2048, "purpose": "emergency"},
+    {"name": "gemma2:2b", "provider": "ollama", "max_tokens": 2048, "purpose": "minimal"}
+  ]
+}
+EOF
+)
+
+load_chain() {
+  if [ -f "$CHAIN_FILE" ]; then
+    cat "$CHAIN_FILE"
+  else
+    echo "$DEFAULT_CHAIN"
+  fi
+}
+
+save_chain() {
+  echo "$1" > "$CHAIN_FILE"
+  echo "Model chain saved to $CHAIN_FILE"
+}
+
+install_chain() {
+  echo "Installing default model chain..."
+  echo "$DEFAULT_CHAIN" > "$CHAIN_FILE"
+  
+  # Extract model names and install via Ollama
+  echo "$DEFAULT_CHAIN" | python3 -c "
+import json,sys
+for m in json.load(sys.stdin)['chain']:
+    print(m['name'])
+" | while read model; do
+    echo "  Installing $model..."
+    if ollama list 2>/dev/null | grep -q "$model"; then
+      echo "    $model already installed"
+    else
+      ollama pull "$model" 2>&1 | tail -1
+    fi
+  done
+}
+
+list_models() {
+  echo "=== Local Models (Ollama) ==="
+  ollama list 2>/dev/null || echo "Ollama not running or not installed"
+  
+  echo ""
+  echo "=== Active Fallback Chain ==="
+  load_chain | python3 -c "
+import json,sys
+data = json.load(sys.stdin)
+print(f'{\"Model\":<25} {\"Purpose\":<12} {\"Max tokens\":>10}')
+print('-' * 50)
+for m in data['chain']:
+    print(f'{m[\"name\"]:<25} {m[\"purpose\"]:<12} {m[\"max_tokens\"]:>10}')
+"
+}
+
+status() {
+  echo "=== Model Pipeline Status ==="
+  
+  # Check Ollama
+  if curl -s "$OLLAMA_URL/api/tags" >/dev/null 2>&1; then
+    echo "  Ollama: RUNNING at $OLLAMA_URL"
+    model_count=$(curl -s "$OLLAMA_URL/api/tags" | python3 -c "import json,sys; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
+    echo "  Local models: $model_count"
+  else
+    echo "  Ollama: DOWN - no local inference available"
+  fi
+  
+  echo ""
+  echo "=== Fallback Chain ==="
+  list_models
+}
+
+test_chain() {
+  local prompt="$1"
+  
+  echo "Testing fallback chain with prompt: \"$prompt\""
+  echo ""
+  
+  load_chain | python3 -c "
+import json,sys,urllib.request,subprocess
+
+data = json.load(sys.stdin)
+models = data['chain']
+prompt = '$(echo "$prompt" | sed "s/'/\\\\'/g")'
+
+for m in models:
+    name = m['name']
+    print(f'  Testing {name}...', end=' ')
+    try:
+        body = json.dumps({'model': name, 'prompt': '$prompt', 'stream': False}).encode()
+        req = urllib.request.Request('http://localhost:11434/api/generate', data=body,
+            headers={'Content-Type': 'application/json'})
+        resp = urllib.request.urlopen(req, timeout=30)
+        result = json.loads(resp.read())
+        print(f'OK — \"{result.get(\"response\", \"\")[:80]}\"')
+        print(f'  Chain works! Primary model ({name}) is serving.')
+        sys.exit(0)
+    except Exception as e:
+        print(f'FAILED — {str(e)[:60]}')
+
+print('All models failed. No local inference available.')
+"
+}
+
+chat() {
+  echo "=== Beacon Chat Mode ==="
+  echo "Type 'quit' to exit. Using local model chain."
+  echo ""
+  
+  load_chain | python3 -c "
+import json,sys,urllib.request
+
+data = json.load(sys.stdin)
+models = data['chain']
+
+while True:
+    try:
+        prompt = input('> ')
+    except EOFError:
+        break
+    if prompt.lower() in ('quit', 'exit'):
+        break
+    if not prompt.strip():
+        continue
+    
+    print('Thinking...')
+    body = json.dumps({'model': models[0]['name'], 'prompt': prompt, 'stream': False}).encode()
+    req = urllib.request.Request('http://localhost:11434/api/generate', data=body,
+        headers={'Content-Type': 'application/json'})
+    try:
+        resp = urllib.request.urlopen(req, timeout=120)
+        result = json.loads(resp.read())
+        print(result.get('response', '').strip())
+    except Exception as e:
+        print(f'Model error: {e}')
+        print('Trying next model in chain...')
+"
+}
+
+case "${1:-status}" in
+  install)  install_chain ;;
+  list)     list_models ;;
+  test)     test_chain "${2:-Hello, are you there?}" ;;
+  chat)     chat ;;
+  status)   status ;;
+  *)        echo "Usage: $0 [install|list|test|chat|status]" ;;
+esac