timmy-config/fleet/model_pipeline.py

#!/usr/bin/env python3
"""
FLEET-011: Local Model Pipeline and Fallback Chain
Phase 4: Sovereignty — all inference runs locally, no cloud dependency.

Checks Ollama endpoints, verifies model availability, tests fallback chain.
Logs results. The chain runs: hermes4:14b -> qwen2.5:7b -> gemma3:1b -> gemma4 (latest)

Usage:
  python3 model_pipeline.py          # Run full fallback test
  python3 model_pipeline.py status   # Show current model status
  python3 model_pipeline.py list     # List all local models
  python3 model_pipeline.py test     # Generate test output from each model
"""

import os, sys, json, urllib.request
from datetime import datetime, timezone
from pathlib import Path

OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "localhost:11434")
LOG_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-health"))
CHAIN_FILE = Path(os.path.expanduser("~/.local/timmy/fleet-resources/model-chain.json"))

DEFAULT_CHAIN = [
    {"model": "hermes4:14b", "role": "primary"},
    {"model": "qwen2.5:7b", "role": "fallback"},
    {"model": "phi3:3.8b", "role": "emergency"},
    {"model": "gemma3:1b", "role": "minimal"},
]


def log(msg):
    LOG_DIR.mkdir(parents=True, exist_ok=True)
    with open(LOG_DIR / "model-pipeline.log", "a") as f:
        f.write(f"[{datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')}] {msg}\n")


def check_ollama():
    try:
        resp = urllib.request.urlopen(f"http://{OLLAMA_HOST}/api/tags", timeout=5)
        return json.loads(resp.read())
    except Exception as e:
        return {"error": str(e)}


def list_models():
    data = check_ollama()
    if "error" in data:
        print(f"  Ollama not reachable at {OLLAMA_HOST}: {data['error']}")
        return []
    models = data.get("models", [])
    for m in models:
        name = m.get("name", "?")
        size = m.get("size", 0) / (1024**3)
        print(f"  {name:<25s} {size:.1f} GB")
    return [m["name"] for m in models]


def test_model(model, prompt="Say 'beacon lit' and nothing else."):
    try:
        body = json.dumps({"model": model, "prompt": prompt, "stream": False}).encode()
        req = urllib.request.Request(f"http://{OLLAMA_HOST}/api/generate", data=body,
            headers={"Content-Type": "application/json"})
        resp = urllib.request.urlopen(req, timeout=60)
        result = json.loads(resp.read())
        return True, result.get("response", "").strip()
    except Exception as e:
        return False, str(e)[:100]


def test_chain():
    chain_data = {}
    if CHAIN_FILE.exists():
        chain_data = json.loads(CHAIN_FILE.read_text())
    chain = chain_data.get("chain", DEFAULT_CHAIN)

    available = list_models() or []
    print("\n=== Fallback Chain Test ===")
    first_good = None

    for entry in chain:
        model = entry["model"]
        role = entry.get("role", "unknown")
        if model in available:
            ok, result = test_model(model)
            status = "OK" if ok else "FAIL"
            print(f"  [{status}] {model:<25s} ({role}) — {result[:70]}")
            log(f"Fallback test {model}: {status} — {result[:100]}")
            if ok and first_good is None:
                first_good = model
        else:
            print(f"  [MISS] {model:<25s} ({role}) — not installed")

    if first_good:
        print(f"\n  Primary serving: {first_good}")
    else:
        print(f"\n  WARNING: No chain model responding. Fallback broken.")
        log("FALLBACK CHAIN BROKEN — no models responding")


def status():
    data = check_ollama()
    if "error" in data:
        print(f"  Ollama: DOWN — {data['error']}")
    else:
        models = data.get("models", [])
        print(f"  Ollama: UP — {len(models)} models loaded")
    print("\n=== Local Models ===")
    list_models()
    print("\n=== Chain Configuration ===")
    if CHAIN_FILE.exists():
        chain = json.loads(CHAIN_FILE.read_text()).get("chain", DEFAULT_CHAIN)
    else:
        chain = DEFAULT_CHAIN
    for e in chain:
        print(f"  {e['model']:<25s} {e.get('role','?')}")


if __name__ == "__main__":
    cmd = sys.argv[1] if len(sys.argv) > 1 else "status"
    if cmd == "status": status()
    elif cmd == "list": list_models()
    elif cmd == "test": test_chain()
    else:
        status()
        test_chain()