[gemini] Huey retrospective — 1-hour check-in and test plan (#8 )

config: update config.yaml
2026-03-27 11:20:41 -04:00 · 2026-03-27 11:00:31 -04:00 · 2026-03-27 00:00:30 -04:00 · 2026-03-26 23:00:35 -04:00 · 2026-03-26 17:00:22 -04:00 · 2026-03-26 13:00:29 -04:00
5 changed files with 886 additions and 17 deletions
--- a/autolora/manifest.yaml
+++ b/autolora/manifest.yaml
@@ -0,0 +1,62 @@
+# Timmy Adapter Manifest
+# Only version adapters, never base models. Base models are reproducible downloads.
+# Adapters are the diff. The manifest is the record.
+
+bases:
+  hermes3-8b-4bit:
+    source: mlx-community/Hermes-3-Llama-3.1-8B-4bit
+    local: ~/models/Hermes-3-Llama-3.1-8B-4bit
+    arch: llama3
+    params: 8B
+    quant: 4-bit MLX
+
+  hermes4-14b-4bit:
+    source: mlx-community/Hermes-4-14B-4bit
+    local: ~/models/hermes4-14b-mlx
+    arch: qwen3
+    params: 14.8B
+    quant: 4-bit MLX
+
+adapters:
+  timmy-v0:
+    base: hermes3-8b-4bit
+    date: 2026-03-24
+    status: retired
+    data: 1154 sessions (technical only, no crisis/pastoral)
+    training: { lr: 2e-6, rank: 8, iters: 1000, best_iter: 800, val_loss: 2.134 }
+    eval: { identity: PASS, sovereignty: PASS, coding: PASS, crisis: FAIL, faith: FAIL }
+    notes: "First adapter. Crisis fails — data was 99% technical. Sacred rule: REJECTED."
+
+  timmy-v0-nan-run1:
+    base: hermes3-8b-4bit
+    date: 2026-03-24
+    status: rejected
+    notes: "NaN at iter 70. lr=1e-5 too high for 4-bit. Dead on arrival."
+
+  timmy-v0.1:
+    base: hermes3-8b-4bit
+    date: 2026-03-25
+    status: retired
+    data: 1203 train / 135 valid (enriched with 49 crisis/faith synthetic)
+    training: { lr: 5e-6, rank: 8, iters: 600, val_loss: 2.026 }
+    eval: { identity: PASS, sovereignty: PASS, coding: PASS, crisis: PARTIAL, faith: FAIL }
+    notes: "Crisis partial — mentions seeking help but no 988/gospel. Rank 8 can't override base priors."
+
+  timmy-v0.2:
+    base: hermes3-8b-4bit
+    date: 2026-03-25
+    status: rejected
+    data: 1214 train / 141 valid (12 targeted crisis/faith examples, 5x duplicated)
+    training: { lr: 5e-6, rank: 16, iters: 800 }
+    eval: "NaN at iter 100. Rank 16 + lr 5e-6 unstable on 4-bit."
+    notes: "Dead. Halve lr when doubling rank."
+
+  # NEXT
+  timmy-v1.0:
+    base: hermes4-14b-4bit
+    date: 2026-03-26
+    status: rejected
+    data: 1125 train / 126 valid (same curated set, reused from 8B — NOT re-tokenized)
+    training: { lr: 1e-6, rank: 16, iters: 800 }
+    eval: "Val NaN iter 100, train NaN iter 160. Dead."
+    notes: "Data was pre-truncated for Llama3 tokenizer, not Qwen3. Must re-run clean_data.py with 14B tokenizer before v1.1."
--- a/bin/timmy-dashboard
+++ b/bin/timmy-dashboard
@@ -0,0 +1,252 @@
+#!/usr/bin/env python3
+"""Timmy Model Dashboard — where are my models, what are they doing.
+
+Usage:
+    timmy-dashboard              # one-shot
+    timmy-dashboard --watch      # live refresh every 30s
+    timmy-dashboard --hours=48   # look back 48h
+"""
+
+import json
+import os
+import subprocess
+import sys
+import time
+import urllib.request
+from datetime import datetime, timezone, timedelta
+from pathlib import Path
+
+HERMES_HOME = Path.home() / ".hermes"
+TIMMY_HOME = Path.home() / ".timmy"
+METRICS_DIR = TIMMY_HOME / "metrics"
+
+# ── Data Sources ──────────────────────────────────────────────────────
+
+def get_ollama_models():
+    try:
+        req = urllib.request.Request("http://localhost:11434/api/tags")
+        with urllib.request.urlopen(req, timeout=5) as resp:
+            return json.loads(resp.read()).get("models", [])
+    except Exception:
+        return []
+
+
+def get_loaded_models():
+    try:
+        req = urllib.request.Request("http://localhost:11434/api/ps")
+        with urllib.request.urlopen(req, timeout=5) as resp:
+            return json.loads(resp.read()).get("models", [])
+    except Exception:
+        return []
+
+
+def get_huey_pid():
+    try:
+        r = subprocess.run(["pgrep", "-f", "huey_consumer"],
+                          capture_output=True, text=True, timeout=5)
+        return r.stdout.strip().split("\n")[0] if r.returncode == 0 else None
+    except Exception:
+        return None
+
+
+def get_hermes_sessions():
+    sessions_file = HERMES_HOME / "sessions" / "sessions.json"
+    if not sessions_file.exists():
+        return []
+    try:
+        data = json.loads(sessions_file.read_text())
+        return list(data.values())
+    except Exception:
+        return []
+
+
+def get_heartbeat_ticks(date_str=None):
+    if not date_str:
+        date_str = datetime.now().strftime("%Y%m%d")
+    tick_file = TIMMY_HOME / "heartbeat" / f"ticks_{date_str}.jsonl"
+    if not tick_file.exists():
+        return []
+    ticks = []
+    for line in tick_file.read_text().strip().split("\n"):
+        if not line.strip():
+            continue
+        try:
+            ticks.append(json.loads(line))
+        except Exception:
+            continue
+    return ticks
+
+
+def get_local_metrics(hours=24):
+    """Read local inference metrics from jsonl files."""
+    records = []
+    cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
+    if not METRICS_DIR.exists():
+        return records
+    for f in sorted(METRICS_DIR.glob("local_*.jsonl")):
+        for line in f.read_text().strip().split("\n"):
+            if not line.strip():
+                continue
+            try:
+                r = json.loads(line)
+                ts = datetime.fromisoformat(r["timestamp"])
+                if ts >= cutoff:
+                    records.append(r)
+            except Exception:
+                continue
+    return records
+
+
+def get_cron_jobs():
+    """Get Hermes cron job status."""
+    try:
+        r = subprocess.run(
+            ["hermes", "cron", "list", "--json"],
+            capture_output=True, text=True, timeout=10
+        )
+        if r.returncode == 0:
+            return json.loads(r.stdout).get("jobs", [])
+    except Exception:
+        pass
+    return []
+
+
+# ── Rendering ─────────────────────────────────────────────────────────
+
+DIM = "\033[2m"
+BOLD = "\033[1m"
+GREEN = "\033[32m"
+YELLOW = "\033[33m"
+RED = "\033[31m"
+CYAN = "\033[36m"
+RST = "\033[0m"
+CLR = "\033[2J\033[H"
+
+
+def render(hours=24):
+    models = get_ollama_models()
+    loaded = get_loaded_models()
+    huey_pid = get_huey_pid()
+    ticks = get_heartbeat_ticks()
+    metrics = get_local_metrics(hours)
+    sessions = get_hermes_sessions()
+
+    loaded_names = {m.get("name", "") for m in loaded}
+    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+    print(CLR, end="")
+    print(f"{BOLD}{'=' * 70}")
+    print(f"  TIMMY MODEL DASHBOARD")
+    print(f"  {now}  |  Huey: {GREEN}PID {huey_pid}{RST if huey_pid else f'{RED}DOWN{RST}'}")
+    print(f"{'=' * 70}{RST}")
+
+    # ── LOCAL MODELS ──
+    print(f"\n  {BOLD}LOCAL MODELS (Ollama){RST}")
+    print(f"  {DIM}{'-' * 55}{RST}")
+    if models:
+        for m in models:
+            name = m.get("name", "?")
+            size_gb = m.get("size", 0) / 1e9
+            if name in loaded_names:
+                status = f"{GREEN}IN VRAM{RST}"
+            else:
+                status = f"{DIM}on disk{RST}"
+            print(f"    {name:35s} {size_gb:5.1f}GB  {status}")
+    else:
+        print(f"    {RED}(Ollama not responding){RST}")
+
+    # ── LOCAL INFERENCE ACTIVITY ──
+    print(f"\n  {BOLD}LOCAL INFERENCE ({len(metrics)} calls, last {hours}h){RST}")
+    print(f"  {DIM}{'-' * 55}{RST}")
+    if metrics:
+        by_caller = {}
+        for r in metrics:
+            caller = r.get("caller", "unknown")
+            if caller not in by_caller:
+                by_caller[caller] = {"count": 0, "success": 0, "errors": 0}
+            by_caller[caller]["count"] += 1
+            if r.get("success"):
+                by_caller[caller]["success"] += 1
+            else:
+                by_caller[caller]["errors"] += 1
+        for caller, stats in by_caller.items():
+            err = f"  {RED}err:{stats['errors']}{RST}" if stats["errors"] else ""
+            print(f"    {caller:25s}  calls:{stats['count']:4d}  "
+                  f"{GREEN}ok:{stats['success']}{RST}{err}")
+
+        by_model = {}
+        for r in metrics:
+            model = r.get("model", "unknown")
+            by_model[model] = by_model.get(model, 0) + 1
+        print(f"\n    {DIM}Models used:{RST}")
+        for model, count in sorted(by_model.items(), key=lambda x: -x[1]):
+            print(f"      {model:30s}  {count} calls")
+    else:
+        print(f"    {DIM}(no local calls recorded yet){RST}")
+
+    # ── HEARTBEAT STATUS ──
+    print(f"\n  {BOLD}HEARTBEAT ({len(ticks)} ticks today){RST}")
+    print(f"  {DIM}{'-' * 55}{RST}")
+    if ticks:
+        last = ticks[-1]
+        decision = last.get("decision", last.get("actions", {}))
+        if isinstance(decision, dict):
+            severity = decision.get("severity", "unknown")
+            reasoning = decision.get("reasoning", "")
+            sev_color = GREEN if severity == "ok" else YELLOW if severity == "warning" else RED
+            print(f"    Last tick:  {last.get('tick_id', '?')}")
+            print(f"    Severity:   {sev_color}{severity}{RST}")
+            if reasoning:
+                print(f"    Reasoning:  {reasoning[:65]}")
+        else:
+            print(f"    Last tick:  {last.get('tick_id', '?')}")
+            actions = last.get("actions", [])
+            print(f"    Actions:    {actions if actions else 'none'}")
+
+        model_decisions = sum(1 for t in ticks
+                            if isinstance(t.get("decision"), dict)
+                            and t["decision"].get("severity") != "fallback")
+        fallback = len(ticks) - model_decisions
+        print(f"    {CYAN}Model: {model_decisions}{RST}  |  {DIM}Fallback: {fallback}{RST}")
+    else:
+        print(f"    {DIM}(no ticks today){RST}")
+
+    # ── HERMES SESSIONS ──
+    local_sessions = [s for s in sessions
+                     if "localhost:11434" in str(s.get("base_url", ""))]
+    cloud_sessions = [s for s in sessions if s not in local_sessions]
+    print(f"\n  {BOLD}HERMES SESSIONS{RST}")
+    print(f"  {DIM}{'-' * 55}{RST}")
+    print(f"    Total: {len(sessions)}  |  "
+          f"{GREEN}Local: {len(local_sessions)}{RST}  |  "
+          f"{YELLOW}Cloud: {len(cloud_sessions)}{RST}")
+
+    # ── ACTIVE LOOPS ──
+    print(f"\n  {BOLD}ACTIVE LOOPS{RST}")
+    print(f"  {DIM}{'-' * 55}{RST}")
+    print(f"    {CYAN}heartbeat_tick{RST}    10m    hermes4:14b    DECIDE phase")
+    print(f"    {DIM}model_health{RST}      5m     (local check)  Ollama ping")
+    print(f"    {DIM}gemini_worker{RST}     20m    gemini-2.5-pro aider")
+    print(f"    {DIM}grok_worker{RST}       20m    grok-3-fast    opencode")
+    print(f"    {DIM}cross_review{RST}      30m    gemini+grok    PR review")
+
+    print(f"\n{BOLD}{'=' * 70}{RST}")
+    print(f"  {DIM}Refresh: timmy-dashboard --watch | History: --hours=N{RST}")
+
+
+if __name__ == "__main__":
+    watch = "--watch" in sys.argv
+    hours = 24
+    for a in sys.argv[1:]:
+        if a.startswith("--hours="):
+            hours = int(a.split("=")[1])
+
+    if watch:
+        try:
+            while True:
+                render(hours)
+                time.sleep(30)
+        except KeyboardInterrupt:
+            print(f"\n{DIM}Dashboard stopped.{RST}")
+    else:
+        render(hours)
--- a/config.yaml
+++ b/config.yaml
@@ -1,11 +1,13 @@
 model:
-  default: claude-sonnet-4-6
-  provider: anthropic
+  default: hermes4
+  provider: custom
+  base_url: http://localhost:8081/v1
+  context_length: 65536
 toolsets:
 - all
 agent:
  max_turns: 30
-  reasoning_effort: medium
+  reasoning_effort: xhigh
  verbose: false
 terminal:
  backend: local
@@ -184,11 +186,15 @@ custom_providers:
 - name: Local Ollama
  base_url: http://localhost:11434/v1
  api_key: ollama
-  model: glm-4.7-flash:latest
+  model: hermes4:14b
 - name: Google Gemini
  base_url: https://generativelanguage.googleapis.com/v1beta/openai
  api_key_env: GEMINI_API_KEY
  model: gemini-2.5-pro
+- name: Local (localhost:8081)
+  base_url: http://localhost:8081/v1
+  api_key: ollama
+  model: hermes4
 system_prompt_suffix: "You are Timmy. Your soul is defined in SOUL.md \u2014 read\
  \ it, live it.\nYou run locally on your owner's machine via Ollama. You never phone\
  \ home.\nYou speak plainly. You prefer short sentences. Brevity is a kindness.\n\
@@ -202,12 +208,12 @@ providers:
    base_url: http://localhost:11434/v1
    model: hermes3:latest
 mcp_servers:
-  orchestration:
-    command: /Users/apayne/.hermes/hermes-agent/venv/bin/python3
+  morrowind:
+    command: python3
    args:
-    - /Users/apayne/.hermes/hermes-agent/tools/orchestration_mcp_server.py
+    - /Users/apayne/.timmy/morrowind/mcp_server.py
    env: {}
-    timeout: 120
+    timeout: 30
 fallback_model:
  provider: custom
  model: gemini-2.5-pro
--- a/docs/local-model-integration-sketch.md
+++ b/docs/local-model-integration-sketch.md
@@ -0,0 +1,438 @@
+# Local Model Integration Sketch v2
+# Hermes4-14B in the Heartbeat Loop — No New Telemetry
+
+## Principle
+
+No new inference layer. Huey tasks call `hermes chat -q` pointed at
+Ollama. Hermes handles sessions, token tracking, cost logging.
+The dashboard reads what Hermes already stores.
+
+---
+
+## Why Not Ollama Directly?
+
+Ollama is fine as a serving backend. The issue isn't Ollama — it's that
+calling Ollama directly with urllib bypasses the harness. The harness
+already tracks sessions, tokens, model/provider, platform. Building a
+second telemetry layer is owning code we don't need.
+
+Ollama as a named provider isn't wired into the --provider flag yet,
+but routing works via env vars:
+
+    HERMES_MODEL="hermes4:14b" \
+    HERMES_PROVIDER="custom" \
+    HERMES_BASE_URL="http://localhost:11434/v1" \
+    hermes chat -q "prompt here" -Q
+
+This creates a tracked session, logs tokens, and returns the response.
+That's our local inference call.
+
+### Alternatives to Ollama for serving:
+- **llama.cpp server** — lighter, no Python, raw HTTP. Good for single
+  model serving. Less convenient for model switching.
+- **vLLM** — best throughput, but needs NVIDIA GPU. Not for M3 Mac.
+- **MLX serving** — native Apple Silicon, but no OpenAI-compat API yet.
+  MLX is for training, not serving (our current policy).
+- **llamafile** — single binary, portable. Good for distribution.
+
+Verdict: Ollama is fine. It's the standard OpenAI-compat local server
+on Mac. The issue was never Ollama — it was bypassing the harness.
+
+---
+
+## 1. The Call Pattern
+
+One function in tasks.py that all Huey tasks use:
+
+```python
+import subprocess
+import json
+
+HERMES_BIN = "hermes"
+LOCAL_ENV = {
+    "HERMES_MODEL": "hermes4:14b",
+    "HERMES_PROVIDER": "custom",
+    "HERMES_BASE_URL": "http://localhost:11434/v1",
+}
+
+def hermes_local(prompt, caller_tag=None, max_retries=2):
+    """Call hermes with local Ollama model. Returns response text.
+    
+    Every call creates a hermes session with full telemetry.
+    caller_tag gets prepended to prompt for searchability.
+    """
+    import os
+    env = os.environ.copy()
+    env.update(LOCAL_ENV)
+    
+    tagged_prompt = prompt
+    if caller_tag:
+        tagged_prompt = f"[{caller_tag}] {prompt}"
+    
+    for attempt in range(max_retries + 1):
+        try:
+            result = subprocess.run(
+                [HERMES_BIN, "chat", "-q", tagged_prompt, "-Q", "-t", "none"],
+                capture_output=True, text=True,
+                timeout=120, env=env,
+            )
+            if result.returncode == 0 and result.stdout.strip():
+                # Strip the session_id line from -Q output
+                lines = result.stdout.strip().split("\n")
+                response_lines = [l for l in lines if not l.startswith("session_id:")]
+                return "\n".join(response_lines).strip()
+        except subprocess.TimeoutExpired:
+            if attempt == max_retries:
+                return None
+            continue
+    return None
+```
+
+Notes:
+- `-t none` disables all toolsets — the heartbeat model shouldn't
+  have terminal/file access. Pure reasoning only.
+- `-Q` quiet mode suppresses banner/spinner, gives clean output.
+- Every call creates a session in Hermes session store. Searchable,
+  exportable, countable.
+- The `[caller_tag]` prefix lets you filter sessions by which Huey
+  task generated them: `hermes sessions list | grep heartbeat`
+
+---
+
+## 2. Heartbeat DECIDE Phase
+
+Replace the hardcoded if/else with a model call:
+
+```python
+# In heartbeat_tick(), replace the DECIDE + ACT section:
+
+    # DECIDE: let hermes4:14b reason about what to do
+    decide_prompt = f"""System state at {now.isoformat()}:
+
+{json.dumps(perception, indent=2)}
+
+Previous tick: {last_tick.get('tick_id', 'none')}
+
+You are the heartbeat monitor. Based on this state:
+1. List any actions needed (alerts, restarts, escalations). Empty if all OK.
+2. Rate severity: ok, warning, or critical.
+3. One sentence of reasoning.
+
+Respond ONLY with JSON:
+{{"actions": [], "severity": "ok", "reasoning": "..."}}"""
+
+    decision = None
+    try:
+        raw = hermes_local(decide_prompt, caller_tag="heartbeat_tick")
+        if raw:
+            # Try to parse JSON from the response
+            # Model might wrap it in markdown, so extract
+            for line in raw.split("\n"):
+                line = line.strip()
+                if line.startswith("{"):
+                    decision = json.loads(line)
+                    break
+            if not decision:
+                decision = json.loads(raw)
+    except (json.JSONDecodeError, Exception) as e:
+        decision = None
+
+    # Fallback to hardcoded logic if model fails or is down
+    if decision is None:
+        actions = []
+        if not perception.get("gitea_alive"):
+            actions.append("ALERT: Gitea unreachable")
+        health = perception.get("model_health", {})
+        if isinstance(health, dict) and not health.get("ollama_running"):
+            actions.append("ALERT: Ollama not running")
+        decision = {
+            "actions": actions,
+            "severity": "fallback",
+            "reasoning": "model unavailable, used hardcoded checks"
+        }
+
+    tick_record["decision"] = decision
+    actions = decision.get("actions", [])
+```
+
+---
+
+## 3. DPO Candidate Collection
+
+No new database. Hermes sessions ARE the DPO candidates.
+
+Every `hermes_local()` call creates a session. To extract DPO pairs:
+
+```bash
+# Export all local-model sessions
+hermes sessions export --output /tmp/local-sessions.jsonl
+
+# Filter for heartbeat decisions
+grep "heartbeat_tick" /tmp/local-sessions.jsonl > heartbeat_decisions.jsonl
+```
+
+The existing `session_export` Huey task (runs every 4h) already extracts
+user→assistant pairs. It just needs to be aware that some sessions are
+now local-model decisions instead of human conversations.
+
+For DPO annotation, add a simple review script:
+
+```python
+# review_decisions.py — reads heartbeat tick logs, shows model decisions,
+# asks Alexander to mark chosen/rejected
+# Writes annotations back to the tick log files
+
+import json
+from pathlib import Path
+
+TICK_DIR = Path.home() / ".timmy" / "heartbeat"
+
+for log_file in sorted(TICK_DIR.glob("ticks_*.jsonl")):
+    for line in log_file.read_text().strip().split("\n"):
+        tick = json.loads(line)
+        decision = tick.get("decision", {})
+        if decision.get("severity") == "fallback":
+            continue  # skip fallback entries
+        
+        print(f"\n--- Tick {tick['tick_id']} ---")
+        print(f"Perception: {json.dumps(tick['perception'], indent=2)}")
+        print(f"Decision:   {json.dumps(decision, indent=2)}")
+        
+        rating = input("Rate (c=chosen, r=rejected, s=skip): ").strip()
+        if rating in ("c", "r"):
+            tick["dpo_label"] = "chosen" if rating == "c" else "rejected"
+            # write back... (append to annotated file)
+```
+
+---
+
+## 4. Dashboard — Reads Hermes Data
+
+```python
+#!/usr/bin/env python3
+"""Timmy Model Dashboard — reads from Hermes, owns nothing."""
+
+import json
+import os
+import subprocess
+import sys
+import time
+import urllib.request
+from datetime import datetime
+from pathlib import Path
+
+HERMES_HOME = Path.home() / ".hermes"
+TIMMY_HOME = Path.home() / ".timmy"
+
+
+def get_ollama_models():
+    """What's available in Ollama."""
+    try:
+        req = urllib.request.Request("http://localhost:11434/api/tags")
+        with urllib.request.urlopen(req, timeout=5) as resp:
+            return json.loads(resp.read()).get("models", [])
+    except Exception:
+        return []
+
+
+def get_loaded_models():
+    """What's actually in VRAM right now."""
+    try:
+        req = urllib.request.Request("http://localhost:11434/api/ps")
+        with urllib.request.urlopen(req, timeout=5) as resp:
+            return json.loads(resp.read()).get("models", [])
+    except Exception:
+        return []
+
+
+def get_huey_status():
+    try:
+        r = subprocess.run(["pgrep", "-f", "huey_consumer"],
+                          capture_output=True, timeout=5)
+        return r.returncode == 0
+    except Exception:
+        return False
+
+
+def get_hermes_sessions(hours=24):
+    """Read session metadata from Hermes session store."""
+    sessions_file = HERMES_HOME / "sessions" / "sessions.json"
+    if not sessions_file.exists():
+        return []
+    try:
+        data = json.loads(sessions_file.read_text())
+        return list(data.values())
+    except Exception:
+        return []
+
+
+def get_heartbeat_ticks(date_str=None):
+    """Read today's heartbeat ticks."""
+    if not date_str:
+        date_str = datetime.now().strftime("%Y%m%d")
+    tick_file = TIMMY_HOME / "heartbeat" / f"ticks_{date_str}.jsonl"
+    if not tick_file.exists():
+        return []
+    ticks = []
+    for line in tick_file.read_text().strip().split("\n"):
+        try:
+            ticks.append(json.loads(line))
+        except Exception:
+            continue
+    return ticks
+
+
+def render(hours=24):
+    models = get_ollama_models()
+    loaded = get_loaded_models()
+    huey = get_huey_status()
+    sessions = get_hermes_sessions(hours)
+    ticks = get_heartbeat_ticks()
+
+    loaded_names = {m.get("name", "") for m in loaded}
+
+    print("\033[2J\033[H")
+    print("=" * 70)
+    print("  TIMMY MODEL DASHBOARD")
+    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    print(f"  {now}  |  Huey: {'UP' if huey else 'DOWN'}  |  Ollama models: {len(models)}")
+    print("=" * 70)
+
+    # DEPLOYMENTS
+    print("\n  LOCAL MODELS")
+    print("  " + "-" * 55)
+    for m in models:
+        name = m.get("name", "?")
+        size_gb = m.get("size", 0) / 1e9
+        status = "IN VRAM" if name in loaded_names else "on disk"
+        print(f"    {name:35s} {size_gb:5.1f}GB  {status}")
+    if not models:
+        print("    (Ollama not responding)")
+
+    # HERMES SESSION ACTIVITY
+    # Count sessions by platform/provider
+    print(f"\n  HERMES SESSIONS (recent)")
+    print("  " + "-" * 55)
+    local_sessions = [s for s in sessions
+                     if "localhost" in str(s.get("origin", {}))]
+    cli_sessions = [s for s in sessions
+                   if s.get("platform") == "cli" or s.get("origin", {}).get("platform") == "cli"]
+
+    total_tokens = sum(s.get("total_tokens", 0) for s in sessions)
+    print(f"    Total sessions: {len(sessions)}")
+    print(f"    CLI sessions: {len(cli_sessions)}")
+    print(f"    Total tokens: {total_tokens:,}")
+
+    # HEARTBEAT STATUS
+    print(f"\n  HEARTBEAT ({len(ticks)} ticks today)")
+    print("  " + "-" * 55)
+    if ticks:
+        last = ticks[-1]
+        decision = last.get("decision", {})
+        severity = decision.get("severity", "unknown")
+        reasoning = decision.get("reasoning", "no model decision yet")
+        print(f"    Last tick: {last.get('tick_id', '?')}")
+        print(f"    Severity:  {severity}")
+        print(f"    Reasoning: {reasoning[:60]}")
+
+        # Count model vs fallback decisions
+        model_decisions = sum(1 for t in ticks
+                            if t.get("decision", {}).get("severity") != "fallback")
+        fallback = len(ticks) - model_decisions
+        print(f"    Model decisions: {model_decisions}  |  Fallback: {fallback}")
+
+        # DPO labels if any
+        labeled = sum(1 for t in ticks if "dpo_label" in t)
+        if labeled:
+            chosen = sum(1 for t in ticks if t.get("dpo_label") == "chosen")
+            rejected = sum(1 for t in ticks if t.get("dpo_label") == "rejected")
+            print(f"    DPO labeled: {labeled} (chosen: {chosen}, rejected: {rejected})")
+    else:
+        print("    (no ticks today)")
+
+    # ACTIVE LOOPS
+    print(f"\n  ACTIVE LOOPS USING LOCAL MODELS")
+    print("  " + "-" * 55)
+    print("    heartbeat_tick    10m    hermes4:14b    DECIDE phase")
+    print("    (future)          15m    hermes4:14b    issue triage")
+    print("    (future)          daily  timmy:v0.1     morning report")
+
+    print(f"\n  NON-LOCAL LOOPS (Gemini/Grok API)")
+    print("  " + "-" * 55)
+    print("    gemini_worker     20m    gemini-2.5-pro   aider")
+    print("    grok_worker       20m    grok-3-fast      opencode")
+    print("    cross_review      30m    both             PR review")
+
+    print("\n" + "=" * 70)
+
+
+if __name__ == "__main__":
+    watch = "--watch" in sys.argv
+    hours = 24
+    for a in sys.argv[1:]:
+        if a.startswith("--hours="):
+            hours = int(a.split("=")[1])
+    if watch:
+        while True:
+            render(hours)
+            time.sleep(30)
+    else:
+        render(hours)
+```
+
+---
+
+## 5. Implementation Steps
+
+### Step 1: Add hermes_local() to tasks.py
+- One function, ~20 lines
+- Calls `hermes chat -q` with Ollama env vars
+- All telemetry comes from Hermes for free
+
+### Step 2: Wire heartbeat_tick DECIDE phase
+- Replace 6 lines of if/else with hermes_local() call
+- Keep hardcoded fallback when model is down
+- Decision stored in tick record for DPO review
+
+### Step 3: Fix the MCP server warning
+- The orchestration MCP server path is broken — harmless but noisy
+- Either fix the path or remove from config
+
+### Step 4: Drop model_dashboard.py in timmy-config/bin/
+- Reads Ollama API, Hermes sessions, heartbeat ticks
+- No new data stores — just views over existing ones
+- `python3 model_dashboard.py --watch` for live view
+
+### Step 5: Expand to more Huey tasks
+- triage_issues: model reads issue, picks agent
+- good_morning_report: model writes the "From Timmy" section
+- Each expansion is just calling hermes_local() with a different prompt
+
+---
+
+## What Gets Hotfixed in Hermes Config
+
+If `hermes insights` is broken (the cache_read_tokens column error),
+that needs a fix. The dashboard falls back to reading sessions.json
+directly, but insights would be the better data source.
+
+The `providers.ollama` section in config.yaml exists but isn't wired
+to the --provider flag. Filing this upstream or patching locally would
+let us do `hermes chat -q "..." --provider ollama` cleanly instead
+of relying on env vars. Not blocking — env vars work today.
+
+---
+
+## What This Owns
+
+- hermes_local() — 20-line wrapper around a subprocess call
+- model_dashboard.py — read-only views over existing data
+- review_decisions.py — optional DPO annotation CLI
+
+## What This Does NOT Own
+
+- Inference. Ollama does that.
+- Telemetry. Hermes does that.
+- Session storage. Hermes does that.
+- Token counting. Hermes does that.
+- Training pipeline. Already exists in timmy-config/training/.
--- a/tasks.py
+++ b/tasks.py
@@ -14,12 +14,89 @@ from gitea_client import GiteaClient

 HERMES_HOME = Path.home() / ".hermes"
 TIMMY_HOME = Path.home() / ".timmy"
+HERMES_AGENT_DIR = HERMES_HOME / "hermes-agent"
+METRICS_DIR = TIMMY_HOME / "metrics"
 REPOS = [
    "Timmy_Foundation/the-nexus",
    "Timmy_Foundation/timmy-config",
 ]
 NET_LINE_LIMIT = 10

+# ── Local Model Inference via Hermes Harness ─────────────────────────
+
+HEARTBEAT_MODEL = "hermes4:14b"
+FALLBACK_MODEL = "hermes3:8b"
+
+
+def hermes_local(prompt, model=None, caller_tag=None):
+    """Call a local Ollama model through the Hermes harness.
+
+    Uses provider="local-ollama" which routes through the custom_providers
+    entry in config.yaml → Ollama at localhost:11434.
+    Returns response text or None on failure.
+    Every call creates a Hermes session with telemetry.
+    """
+    _model = model or HEARTBEAT_MODEL
+    tagged = f"[{caller_tag}] {prompt}" if caller_tag else prompt
+
+    # Import hermes cli.main directly — no subprocess, no env vars
+    _agent_dir = str(HERMES_AGENT_DIR)
+    if _agent_dir not in sys.path:
+        sys.path.insert(0, _agent_dir)
+    old_cwd = os.getcwd()
+    os.chdir(_agent_dir)
+
+    try:
+        from cli import main as hermes_main
+        import io
+        from contextlib import redirect_stdout, redirect_stderr
+
+        buf = io.StringIO()
+        err = io.StringIO()
+        with redirect_stdout(buf), redirect_stderr(err):
+            hermes_main(
+                query=tagged,
+                model=_model,
+                provider="local-ollama",
+                quiet=True,
+            )
+        output = buf.getvalue().strip()
+        # Strip session_id line from quiet output
+        lines = [l for l in output.split("\n") if not l.startswith("session_id:")]
+        response = "\n".join(lines).strip()
+
+        # Log to metrics jsonl
+        METRICS_DIR.mkdir(parents=True, exist_ok=True)
+        metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl"
+        record = {
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "model": _model,
+            "caller": caller_tag or "unknown",
+            "prompt_len": len(prompt),
+            "response_len": len(response),
+            "success": bool(response),
+        }
+        with open(metrics_file, "a") as f:
+            f.write(json.dumps(record) + "\n")
+
+        return response if response else None
+    except Exception as e:
+        # Log failure
+        METRICS_DIR.mkdir(parents=True, exist_ok=True)
+        metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl"
+        record = {
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "model": _model,
+            "caller": caller_tag or "unknown",
+            "error": str(e),
+            "success": False,
+        }
+        with open(metrics_file, "a") as f:
+            f.write(json.dumps(record) + "\n")
+        return None
+    finally:
+        os.chdir(old_cwd)
+

 # ── Existing: Orchestration ──────────────────────────────────────────

@@ -280,15 +357,49 @@ def heartbeat_tick():
        "previous_tick": last_tick.get("tick_id", "none"),
    }

-    # DECIDE + ACT: check for problems
-    actions = []
-    if not perception.get("gitea_alive"):
-        actions.append("ALERT: Gitea unreachable")
-    health = perception.get("model_health", {})
-    if isinstance(health, dict) and not health.get("ollama_running"):
-        actions.append("ALERT: Ollama not running")
-    
-    tick_record["actions"] = actions
+    # DECIDE: let hermes4:14b reason about what to do
+    decide_prompt = (
+        f"System state at {now.isoformat()}:\n\n"
+        f"{json.dumps(perception, indent=2)}\n\n"
+        f"Previous tick: {last_tick.get('tick_id', 'none')}\n\n"
+        "You are the heartbeat monitor. Based on this state:\n"
+        "1. List any actions needed (alerts, restarts, escalations). Empty if all OK.\n"
+        "2. Rate severity: ok, warning, or critical.\n"
+        "3. One sentence of reasoning.\n\n"
+        'Respond ONLY with JSON: {"actions": [], "severity": "ok", "reasoning": "..."}'
+    )
+
+    decision = None
+    try:
+        raw = hermes_local(decide_prompt, caller_tag="heartbeat_tick")
+        if raw:
+            # Model might wrap JSON in markdown, extract first { line
+            for line in raw.split("\n"):
+                line = line.strip()
+                if line.startswith("{"):
+                    decision = json.loads(line)
+                    break
+            if not decision:
+                decision = json.loads(raw)
+    except (json.JSONDecodeError, Exception):
+        decision = None
+
+    # Fallback to hardcoded logic if model fails or is down
+    if decision is None:
+        actions = []
+        if not perception.get("gitea_alive"):
+            actions.append("ALERT: Gitea unreachable")
+        health = perception.get("model_health", {})
+        if isinstance(health, dict) and not health.get("ollama_running"):
+            actions.append("ALERT: Ollama not running")
+        decision = {
+            "actions": actions,
+            "severity": "fallback",
+            "reasoning": "model unavailable, used hardcoded checks",
+        }
+
+    tick_record["decision"] = decision
+    actions = decision.get("actions", [])
    
    # Save tick
    last_tick_file.write_text(json.dumps(tick_record, indent=2))
Author	SHA1	Message	Date
Alexander Whitestone	fe2805b458	[gemini] Huey retrospective — 1-hour check-in and test plan (#8 )	2026-03-27 11:20:41 -04:00
Alexander Whitestone	f46a4826d9	config: update config.yaml	2026-03-27 11:00:31 -04:00
Alexander Whitestone	3b1763ce4c	config: update config.yaml	2026-03-27 00:00:30 -04:00
Alexander Whitestone	78f5216540	config: update config.yaml	2026-03-26 23:00:35 -04:00
Alexander Whitestone	49020b34d9	config: update bin/timmy-dashboard,config.yaml,docs/local-model-integration-sketch.md,tasks.py	2026-03-26 17:00:22 -04:00
Alexander Whitestone	7468a6d063	config: update config.yaml	2026-03-26 13:00:29 -04:00
Alexander Whitestone	f9155b28e3	v1.0 rejected — NaN from wrong tokenizer, Morrowind MCP pipeline working	2026-03-26 12:32:08 -04:00
Alexander Whitestone	16675abd79	config: update config.yaml	2026-03-26 12:00:46 -04:00
Alexander Whitestone	1fce489364	Add adapter manifest — version control for trained models Only version adapters (~40MB each), never base models. Base models are reproducible HuggingFace downloads referenced by path. Manifest records: base, data, training config, eval results, status. History: v0 through v0.2 on 8B (crisis gated, retired/rejected). Active: v1.0 training now on Hermes4-14B-4bit.	2026-03-26 11:44:29 -04:00