#!/usr/bin/env python3 """ FLEET-011: Local Model Pipeline and Fallback Chain Phase 4: Sovereignty — all inference runs locally, no cloud dependency. Checks Ollama endpoints, verifies model availability, tests fallback chain. Logs results. The chain runs: hermes4:14b -> qwen2.5:7b -> gemma3:1b -> gemma4 (latest) Usage: python3 model_pipeline.py # Run full fallback test python3 model_pipeline.py status # Show current model status python3 model_pipeline.py list # List all local models python3 model_pipeline.py test # Generate test output from each model """ import os, sys, json, urllib.request from datetime import datetime, timezone from pathlib import Path OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "localhost:11434") LOG_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-health")) CHAIN_FILE = Path(os.path.expanduser("~/.local/timmy/fleet-resources/model-chain.json")) DEFAULT_CHAIN = [ {"model": "hermes4:14b", "role": "primary"}, {"model": "qwen2.5:7b", "role": "fallback"}, {"model": "phi3:3.8b", "role": "emergency"}, {"model": "gemma3:1b", "role": "minimal"}, ] def log(msg): LOG_DIR.mkdir(parents=True, exist_ok=True) with open(LOG_DIR / "model-pipeline.log", "a") as f: f.write(f"[{datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}] {msg}\n") def check_ollama(): try: resp = urllib.request.urlopen(f"http://{OLLAMA_HOST}/api/tags", timeout=5) return json.loads(resp.read()) except Exception as e: return {"error": str(e)} def list_models(): data = check_ollama() if "error" in data: print(f" Ollama not reachable at {OLLAMA_HOST}: {data['error']}") return [] models = data.get("models", []) for m in models: name = m.get("name", "?") size = m.get("size", 0) / (1024**3) print(f" {name:<25s} {size:.1f} GB") return [m["name"] for m in models] def test_model(model, prompt="Say 'beacon lit' and nothing else."): try: body = json.dumps({"model": model, "prompt": prompt, "stream": False}).encode() req = urllib.request.Request(f"http://{OLLAMA_HOST}/api/generate", data=body, headers={"Content-Type": "application/json"}) resp = urllib.request.urlopen(req, timeout=60) result = json.loads(resp.read()) return True, result.get("response", "").strip() except Exception as e: return False, str(e)[:100] def test_chain(): chain_data = {} if CHAIN_FILE.exists(): chain_data = json.loads(CHAIN_FILE.read_text()) chain = chain_data.get("chain", DEFAULT_CHAIN) available = list_models() or [] print("\n=== Fallback Chain Test ===") first_good = None for entry in chain: model = entry["model"] role = entry.get("role", "unknown") if model in available: ok, result = test_model(model) status = "OK" if ok else "FAIL" print(f" [{status}] {model:<25s} ({role}) — {result[:70]}") log(f"Fallback test {model}: {status} — {result[:100]}") if ok and first_good is None: first_good = model else: print(f" [MISS] {model:<25s} ({role}) — not installed") if first_good: print(f"\n Primary serving: {first_good}") else: print(f"\n WARNING: No chain model responding. Fallback broken.") log("FALLBACK CHAIN BROKEN — no models responding") def status(): data = check_ollama() if "error" in data: print(f" Ollama: DOWN — {data['error']}") else: models = data.get("models", []) print(f" Ollama: UP — {len(models)} models loaded") print("\n=== Local Models ===") list_models() print("\n=== Chain Configuration ===") if CHAIN_FILE.exists(): chain = json.loads(CHAIN_FILE.read_text()).get("chain", DEFAULT_CHAIN) else: chain = DEFAULT_CHAIN for e in chain: print(f" {e['model']:<25s} {e.get('role','?')}") if __name__ == "__main__": cmd = sys.argv[1] if len(sys.argv) > 1 else "status" if cmd == "status": status() elif cmd == "list": list_models() elif cmd == "test": test_chain() else: status() test_chain()