FLEET-010: Cross-agent task delegation protocol - Keyword-based heuristic assigns unassigned issues to agents - Supports: claw-code, gemini, ezra, bezalel, timmy - Delegation logging and status dashboard - Auto-comments on assigned issues FLEET-011: Local model pipeline and fallback chain - Checks Ollama reachability and model availability - 4-model chain: hermes4:14b -> qwen2.5:7b -> phi3:3.8b -> gemma3:1b - Tests each model with live inference on every run - Fallback verification: finds first responding model - Chain configuration via ~/.local/timmy/fleet-resources/model-chain.json FLEET-012: Agent lifecycle manager - Full lifecycle: provision -> deploy -> monitor -> retire - Heartbeat detection with 24h idle threshold - Task completion/failure tracking - Agent Fleet Status dashboard Fixes timmy-home#563 (delegation), #564 (model pipeline), #565 (lifecycle)
127 lines
4.2 KiB
Python
127 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
FLEET-011: Local Model Pipeline and Fallback Chain
|
|
Phase 4: Sovereignty — all inference runs locally, no cloud dependency.
|
|
|
|
Checks Ollama endpoints, verifies model availability, tests fallback chain.
|
|
Logs results. The chain runs: hermes4:14b -> qwen2.5:7b -> gemma3:1b -> gemma4 (latest)
|
|
|
|
Usage:
|
|
python3 model_pipeline.py # Run full fallback test
|
|
python3 model_pipeline.py status # Show current model status
|
|
python3 model_pipeline.py list # List all local models
|
|
python3 model_pipeline.py test # Generate test output from each model
|
|
"""
|
|
|
|
import os, sys, json, urllib.request
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "localhost:11434")
|
|
LOG_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-health"))
|
|
CHAIN_FILE = Path(os.path.expanduser("~/.local/timmy/fleet-resources/model-chain.json"))
|
|
|
|
DEFAULT_CHAIN = [
|
|
{"model": "hermes4:14b", "role": "primary"},
|
|
{"model": "qwen2.5:7b", "role": "fallback"},
|
|
{"model": "phi3:3.8b", "role": "emergency"},
|
|
{"model": "gemma3:1b", "role": "minimal"},
|
|
]
|
|
|
|
|
|
def log(msg):
|
|
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
with open(LOG_DIR / "model-pipeline.log", "a") as f:
|
|
f.write(f"[{datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}] {msg}\n")
|
|
|
|
|
|
def check_ollama():
|
|
try:
|
|
resp = urllib.request.urlopen(f"http://{OLLAMA_HOST}/api/tags", timeout=5)
|
|
return json.loads(resp.read())
|
|
except Exception as e:
|
|
return {"error": str(e)}
|
|
|
|
|
|
def list_models():
|
|
data = check_ollama()
|
|
if "error" in data:
|
|
print(f" Ollama not reachable at {OLLAMA_HOST}: {data['error']}")
|
|
return []
|
|
models = data.get("models", [])
|
|
for m in models:
|
|
name = m.get("name", "?")
|
|
size = m.get("size", 0) / (1024**3)
|
|
print(f" {name:<25s} {size:.1f} GB")
|
|
return [m["name"] for m in models]
|
|
|
|
|
|
def test_model(model, prompt="Say 'beacon lit' and nothing else."):
|
|
try:
|
|
body = json.dumps({"model": model, "prompt": prompt, "stream": False}).encode()
|
|
req = urllib.request.Request(f"http://{OLLAMA_HOST}/api/generate", data=body,
|
|
headers={"Content-Type": "application/json"})
|
|
resp = urllib.request.urlopen(req, timeout=60)
|
|
result = json.loads(resp.read())
|
|
return True, result.get("response", "").strip()
|
|
except Exception as e:
|
|
return False, str(e)[:100]
|
|
|
|
|
|
def test_chain():
|
|
chain_data = {}
|
|
if CHAIN_FILE.exists():
|
|
chain_data = json.loads(CHAIN_FILE.read_text())
|
|
chain = chain_data.get("chain", DEFAULT_CHAIN)
|
|
|
|
available = list_models() or []
|
|
print("\n=== Fallback Chain Test ===")
|
|
first_good = None
|
|
|
|
for entry in chain:
|
|
model = entry["model"]
|
|
role = entry.get("role", "unknown")
|
|
if model in available:
|
|
ok, result = test_model(model)
|
|
status = "OK" if ok else "FAIL"
|
|
print(f" [{status}] {model:<25s} ({role}) — {result[:70]}")
|
|
log(f"Fallback test {model}: {status} — {result[:100]}")
|
|
if ok and first_good is None:
|
|
first_good = model
|
|
else:
|
|
print(f" [MISS] {model:<25s} ({role}) — not installed")
|
|
|
|
if first_good:
|
|
print(f"\n Primary serving: {first_good}")
|
|
else:
|
|
print(f"\n WARNING: No chain model responding. Fallback broken.")
|
|
log("FALLBACK CHAIN BROKEN — no models responding")
|
|
|
|
|
|
def status():
|
|
data = check_ollama()
|
|
if "error" in data:
|
|
print(f" Ollama: DOWN — {data['error']}")
|
|
else:
|
|
models = data.get("models", [])
|
|
print(f" Ollama: UP — {len(models)} models loaded")
|
|
print("\n=== Local Models ===")
|
|
list_models()
|
|
print("\n=== Chain Configuration ===")
|
|
if CHAIN_FILE.exists():
|
|
chain = json.loads(CHAIN_FILE.read_text()).get("chain", DEFAULT_CHAIN)
|
|
else:
|
|
chain = DEFAULT_CHAIN
|
|
for e in chain:
|
|
print(f" {e['model']:<25s} {e.get('role','?')}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
cmd = sys.argv[1] if len(sys.argv) > 1 else "status"
|
|
if cmd == "status": status()
|
|
elif cmd == "list": list_models()
|
|
elif cmd == "test": test_chain()
|
|
else:
|
|
status()
|
|
test_chain()
|