[gemini] Huey retrospective — 1-hour check-in and test plan (#8 )

config: update config.yaml
2026-03-27 11:20:41 -04:00 · 2026-03-27 11:00:31 -04:00 · 2026-03-27 00:00:30 -04:00 · 2026-03-26 23:00:35 -04:00 · 2026-03-26 17:00:22 -04:00 · 2026-03-26 13:00:29 -04:00
5 changed files with 825 additions and 17 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@
 *.db-wal
 *.db-shm
 __pycache__/
 .aider*
--- a/bin/timmy-dashboard
+++ b/bin/timmy-dashboard
@@ -0,0 +1,252 @@
 #!/usr/bin/env python3
 """Timmy Model Dashboard — where are my models, what are they doing.
 Usage:
    timmy-dashboard              # one-shot
    timmy-dashboard --watch      # live refresh every 30s
    timmy-dashboard --hours=48   # look back 48h
 """
 import json
 import os
 import subprocess
 import sys
 import time
 import urllib.request
 from datetime import datetime, timezone, timedelta
 from pathlib import Path
 HERMES_HOME = Path.home() / ".hermes"
 TIMMY_HOME = Path.home() / ".timmy"
 METRICS_DIR = TIMMY_HOME / "metrics"
 # ── Data Sources ──────────────────────────────────────────────────────
 def get_ollama_models():
    try:
        req = urllib.request.Request("http://localhost:11434/api/tags")
        with urllib.request.urlopen(req, timeout=5) as resp:
            return json.loads(resp.read()).get("models", [])
    except Exception:
        return []
 def get_loaded_models():
    try:
        req = urllib.request.Request("http://localhost:11434/api/ps")
        with urllib.request.urlopen(req, timeout=5) as resp:
            return json.loads(resp.read()).get("models", [])
    except Exception:
        return []
 def get_huey_pid():
    try:
        r = subprocess.run(["pgrep", "-f", "huey_consumer"],
                          capture_output=True, text=True, timeout=5)
        return r.stdout.strip().split("\n")[0] if r.returncode == 0 else None
    except Exception:
        return None
 def get_hermes_sessions():
    sessions_file = HERMES_HOME / "sessions" / "sessions.json"
    if not sessions_file.exists():
        return []
    try:
        data = json.loads(sessions_file.read_text())
        return list(data.values())
    except Exception:
        return []
 def get_heartbeat_ticks(date_str=None):
    if not date_str:
        date_str = datetime.now().strftime("%Y%m%d")
    tick_file = TIMMY_HOME / "heartbeat" / f"ticks_{date_str}.jsonl"
    if not tick_file.exists():
        return []
    ticks = []
    for line in tick_file.read_text().strip().split("\n"):
        if not line.strip():
            continue
        try:
            ticks.append(json.loads(line))
        except Exception:
            continue
    return ticks
 def get_local_metrics(hours=24):
    """Read local inference metrics from jsonl files."""
    records = []
    cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
    if not METRICS_DIR.exists():
        return records
    for f in sorted(METRICS_DIR.glob("local_*.jsonl")):
        for line in f.read_text().strip().split("\n"):
            if not line.strip():
                continue
            try:
                r = json.loads(line)
                ts = datetime.fromisoformat(r["timestamp"])
                if ts >= cutoff:
                    records.append(r)
            except Exception:
                continue
    return records
 def get_cron_jobs():
    """Get Hermes cron job status."""
    try:
        r = subprocess.run(
            ["hermes", "cron", "list", "--json"],
            capture_output=True, text=True, timeout=10
        )
        if r.returncode == 0:
            return json.loads(r.stdout).get("jobs", [])
    except Exception:
        pass
    return []
 # ── Rendering ─────────────────────────────────────────────────────────
 DIM = "\033[2m"
 BOLD = "\033[1m"
 GREEN = "\033[32m"
 YELLOW = "\033[33m"
 RED = "\033[31m"
 CYAN = "\033[36m"
 RST = "\033[0m"
 CLR = "\033[2J\033[H"
 def render(hours=24):
    models = get_ollama_models()
    loaded = get_loaded_models()
    huey_pid = get_huey_pid()
    ticks = get_heartbeat_ticks()
    metrics = get_local_metrics(hours)
    sessions = get_hermes_sessions()
    loaded_names = {m.get("name", "") for m in loaded}
    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(CLR, end="")
    print(f"{BOLD}{'=' * 70}")
    print(f"  TIMMY MODEL DASHBOARD")
    print(f"  {now}  |  Huey: {GREEN}PID {huey_pid}{RST if huey_pid else f'{RED}DOWN{RST}'}")
    print(f"{'=' * 70}{RST}")
    # ── LOCAL MODELS ──
    print(f"\n  {BOLD}LOCAL MODELS (Ollama){RST}")
    print(f"  {DIM}{'-' * 55}{RST}")
    if models:
        for m in models:
            name = m.get("name", "?")
            size_gb = m.get("size", 0) / 1e9
            if name in loaded_names:
                status = f"{GREEN}IN VRAM{RST}"
            else:
                status = f"{DIM}on disk{RST}"
            print(f"    {name:35s} {size_gb:5.1f}GB  {status}")
    else:
        print(f"    {RED}(Ollama not responding){RST}")
    # ── LOCAL INFERENCE ACTIVITY ──
    print(f"\n  {BOLD}LOCAL INFERENCE ({len(metrics)} calls, last {hours}h){RST}")
    print(f"  {DIM}{'-' * 55}{RST}")
    if metrics:
        by_caller = {}
        for r in metrics:
            caller = r.get("caller", "unknown")
            if caller not in by_caller:
                by_caller[caller] = {"count": 0, "success": 0, "errors": 0}
            by_caller[caller]["count"] += 1
            if r.get("success"):
                by_caller[caller]["success"] += 1
            else:
                by_caller[caller]["errors"] += 1
        for caller, stats in by_caller.items():
            err = f"  {RED}err:{stats['errors']}{RST}" if stats["errors"] else ""
            print(f"    {caller:25s}  calls:{stats['count']:4d}  "
                  f"{GREEN}ok:{stats['success']}{RST}{err}")
        by_model = {}
        for r in metrics:
            model = r.get("model", "unknown")
            by_model[model] = by_model.get(model, 0) + 1
        print(f"\n    {DIM}Models used:{RST}")
        for model, count in sorted(by_model.items(), key=lambda x: -x[1]):
            print(f"      {model:30s}  {count} calls")
    else:
        print(f"    {DIM}(no local calls recorded yet){RST}")
    # ── HEARTBEAT STATUS ──
    print(f"\n  {BOLD}HEARTBEAT ({len(ticks)} ticks today){RST}")
    print(f"  {DIM}{'-' * 55}{RST}")
    if ticks:
        last = ticks[-1]
        decision = last.get("decision", last.get("actions", {}))
        if isinstance(decision, dict):
            severity = decision.get("severity", "unknown")
            reasoning = decision.get("reasoning", "")
            sev_color = GREEN if severity == "ok" else YELLOW if severity == "warning" else RED
            print(f"    Last tick:  {last.get('tick_id', '?')}")
            print(f"    Severity:   {sev_color}{severity}{RST}")
            if reasoning:
                print(f"    Reasoning:  {reasoning[:65]}")
        else:
            print(f"    Last tick:  {last.get('tick_id', '?')}")
            actions = last.get("actions", [])
            print(f"    Actions:    {actions if actions else 'none'}")
        model_decisions = sum(1 for t in ticks
                            if isinstance(t.get("decision"), dict)
                            and t["decision"].get("severity") != "fallback")
        fallback = len(ticks) - model_decisions
        print(f"    {CYAN}Model: {model_decisions}{RST}  |  {DIM}Fallback: {fallback}{RST}")
    else:
        print(f"    {DIM}(no ticks today){RST}")
    # ── HERMES SESSIONS ──
    local_sessions = [s for s in sessions
                     if "localhost:11434" in str(s.get("base_url", ""))]
    cloud_sessions = [s for s in sessions if s not in local_sessions]
    print(f"\n  {BOLD}HERMES SESSIONS{RST}")
    print(f"  {DIM}{'-' * 55}{RST}")
    print(f"    Total: {len(sessions)}  |  "
          f"{GREEN}Local: {len(local_sessions)}{RST}  |  "
          f"{YELLOW}Cloud: {len(cloud_sessions)}{RST}")
    # ── ACTIVE LOOPS ──
    print(f"\n  {BOLD}ACTIVE LOOPS{RST}")
    print(f"  {DIM}{'-' * 55}{RST}")
    print(f"    {CYAN}heartbeat_tick{RST}    10m    hermes4:14b    DECIDE phase")
    print(f"    {DIM}model_health{RST}      5m     (local check)  Ollama ping")
    print(f"    {DIM}gemini_worker{RST}     20m    gemini-2.5-pro aider")
    print(f"    {DIM}grok_worker{RST}       20m    grok-3-fast    opencode")
    print(f"    {DIM}cross_review{RST}      30m    gemini+grok    PR review")
    print(f"\n{BOLD}{'=' * 70}{RST}")
    print(f"  {DIM}Refresh: timmy-dashboard --watch | History: --hours=N{RST}")
 if __name__ == "__main__":
    watch = "--watch" in sys.argv
    hours = 24
    for a in sys.argv[1:]:
        if a.startswith("--hours="):
            hours = int(a.split("=")[1])
    if watch:
        try:
            while True:
                render(hours)
                time.sleep(30)
        except KeyboardInterrupt:
            print(f"\n{DIM}Dashboard stopped.{RST}")
    else:
        render(hours)
--- a/config.yaml
+++ b/config.yaml
@@ -1,11 +1,13 @@
 model:
-  default: claude-opus-4-6
+  default: hermes4
-  provider: anthropic
+  provider: custom
  base_url: http://localhost:8081/v1
  context_length: 65536
 toolsets:
 - all
 agent:
  max_turns: 30
-  reasoning_effort: medium
+  reasoning_effort: xhigh
  verbose: false
 terminal:
  backend: local
@@ -184,11 +186,15 @@ custom_providers:
 - name: Local Ollama
  base_url: http://localhost:11434/v1
  api_key: ollama
-  model: glm-4.7-flash:latest
+  model: hermes4:14b
 - name: Google Gemini
  base_url: https://generativelanguage.googleapis.com/v1beta/openai
  api_key_env: GEMINI_API_KEY
  model: gemini-2.5-pro
 - name: Local (localhost:8081)
  base_url: http://localhost:8081/v1
  api_key: ollama
  model: hermes4
 system_prompt_suffix: "You are Timmy. Your soul is defined in SOUL.md \u2014 read\
  \ it, live it.\nYou run locally on your owner's machine via Ollama. You never phone\
  \ home.\nYou speak plainly. You prefer short sentences. Brevity is a kindness.\n\
@@ -202,12 +208,12 @@ providers:
    base_url: http://localhost:11434/v1
    model: hermes3:latest
 mcp_servers:
-  orchestration:
+  morrowind:
-    command: /Users/apayne/.hermes/hermes-agent/venv/bin/python3
+    command: python3
    args:
-    - /Users/apayne/.hermes/hermes-agent/tools/orchestration_mcp_server.py
+    - /Users/apayne/.timmy/morrowind/mcp_server.py
    env: {}
-    timeout: 120
+    timeout: 30
 fallback_model:
  provider: custom
  model: gemini-2.5-pro
--- a/docs/local-model-integration-sketch.md
+++ b/docs/local-model-integration-sketch.md
@@ -0,0 +1,438 @@
 # Local Model Integration Sketch v2
 # Hermes4-14B in the Heartbeat Loop — No New Telemetry
 ## Principle
 No new inference layer. Huey tasks call `hermes chat -q` pointed at
 Ollama. Hermes handles sessions, token tracking, cost logging.
 The dashboard reads what Hermes already stores.
 ---
 ## Why Not Ollama Directly?
 Ollama is fine as a serving backend. The issue isn't Ollama — it's that
 calling Ollama directly with urllib bypasses the harness. The harness
 already tracks sessions, tokens, model/provider, platform. Building a
 second telemetry layer is owning code we don't need.
 Ollama as a named provider isn't wired into the --provider flag yet,
 but routing works via env vars:
    HERMES_MODEL="hermes4:14b" \
    HERMES_PROVIDER="custom" \
    HERMES_BASE_URL="http://localhost:11434/v1" \
    hermes chat -q "prompt here" -Q
 This creates a tracked session, logs tokens, and returns the response.
 That's our local inference call.
 ### Alternatives to Ollama for serving:
 - **llama.cpp server** — lighter, no Python, raw HTTP. Good for single
  model serving. Less convenient for model switching.
 - **vLLM** — best throughput, but needs NVIDIA GPU. Not for M3 Mac.
 - **MLX serving** — native Apple Silicon, but no OpenAI-compat API yet.
  MLX is for training, not serving (our current policy).
 - **llamafile** — single binary, portable. Good for distribution.
 Verdict: Ollama is fine. It's the standard OpenAI-compat local server
 on Mac. The issue was never Ollama — it was bypassing the harness.
 ---
 ## 1. The Call Pattern
 One function in tasks.py that all Huey tasks use:
 ```python
 import subprocess
 import json
 HERMES_BIN = "hermes"
 LOCAL_ENV = {
    "HERMES_MODEL": "hermes4:14b",
    "HERMES_PROVIDER": "custom",
    "HERMES_BASE_URL": "http://localhost:11434/v1",
 }
 def hermes_local(prompt, caller_tag=None, max_retries=2):
    """Call hermes with local Ollama model. Returns response text.
    Every call creates a hermes session with full telemetry.
    caller_tag gets prepended to prompt for searchability.
    """
    import os
    env = os.environ.copy()
    env.update(LOCAL_ENV)
    tagged_prompt = prompt
    if caller_tag:
        tagged_prompt = f"[{caller_tag}] {prompt}"
    for attempt in range(max_retries + 1):
        try:
            result = subprocess.run(
                [HERMES_BIN, "chat", "-q", tagged_prompt, "-Q", "-t", "none"],
                capture_output=True, text=True,
                timeout=120, env=env,
            )
            if result.returncode == 0 and result.stdout.strip():
                # Strip the session_id line from -Q output
                lines = result.stdout.strip().split("\n")
                response_lines = [l for l in lines if not l.startswith("session_id:")]
                return "\n".join(response_lines).strip()
        except subprocess.TimeoutExpired:
            if attempt == max_retries:
                return None
            continue
    return None
 ```
 Notes:
 - `-t none` disables all toolsets — the heartbeat model shouldn't
  have terminal/file access. Pure reasoning only.
 - `-Q` quiet mode suppresses banner/spinner, gives clean output.
 - Every call creates a session in Hermes session store. Searchable,
  exportable, countable.
 - The `[caller_tag]` prefix lets you filter sessions by which Huey
  task generated them: `hermes sessions list | grep heartbeat`
 ---
 ## 2. Heartbeat DECIDE Phase
 Replace the hardcoded if/else with a model call:
 ```python
 # In heartbeat_tick(), replace the DECIDE + ACT section:
    # DECIDE: let hermes4:14b reason about what to do
    decide_prompt = f"""System state at {now.isoformat()}:
 {json.dumps(perception, indent=2)}
 Previous tick: {last_tick.get('tick_id', 'none')}
 You are the heartbeat monitor. Based on this state:
 1. List any actions needed (alerts, restarts, escalations). Empty if all OK.
 2. Rate severity: ok, warning, or critical.
 3. One sentence of reasoning.
 Respond ONLY with JSON:
 {{"actions": [], "severity": "ok", "reasoning": "..."}}"""
    decision = None
    try:
        raw = hermes_local(decide_prompt, caller_tag="heartbeat_tick")
        if raw:
            # Try to parse JSON from the response
            # Model might wrap it in markdown, so extract
            for line in raw.split("\n"):
                line = line.strip()
                if line.startswith("{"):
                    decision = json.loads(line)
                    break
            if not decision:
                decision = json.loads(raw)
    except (json.JSONDecodeError, Exception) as e:
        decision = None
    # Fallback to hardcoded logic if model fails or is down
    if decision is None:
        actions = []
        if not perception.get("gitea_alive"):
            actions.append("ALERT: Gitea unreachable")
        health = perception.get("model_health", {})
        if isinstance(health, dict) and not health.get("ollama_running"):
            actions.append("ALERT: Ollama not running")
        decision = {
            "actions": actions,
            "severity": "fallback",
            "reasoning": "model unavailable, used hardcoded checks"
        }
    tick_record["decision"] = decision
    actions = decision.get("actions", [])
 ```
 ---
 ## 3. DPO Candidate Collection
 No new database. Hermes sessions ARE the DPO candidates.
 Every `hermes_local()` call creates a session. To extract DPO pairs:
 ```bash
 # Export all local-model sessions
 hermes sessions export --output /tmp/local-sessions.jsonl
 # Filter for heartbeat decisions
 grep "heartbeat_tick" /tmp/local-sessions.jsonl > heartbeat_decisions.jsonl
 ```
 The existing `session_export` Huey task (runs every 4h) already extracts
 user→assistant pairs. It just needs to be aware that some sessions are
 now local-model decisions instead of human conversations.
 For DPO annotation, add a simple review script:
 ```python
 # review_decisions.py — reads heartbeat tick logs, shows model decisions,
 # asks Alexander to mark chosen/rejected
 # Writes annotations back to the tick log files
 import json
 from pathlib import Path
 TICK_DIR = Path.home() / ".timmy" / "heartbeat"
 for log_file in sorted(TICK_DIR.glob("ticks_*.jsonl")):
    for line in log_file.read_text().strip().split("\n"):
        tick = json.loads(line)
        decision = tick.get("decision", {})
        if decision.get("severity") == "fallback":
            continue  # skip fallback entries
        print(f"\n--- Tick {tick['tick_id']} ---")
        print(f"Perception: {json.dumps(tick['perception'], indent=2)}")
        print(f"Decision:   {json.dumps(decision, indent=2)}")
        rating = input("Rate (c=chosen, r=rejected, s=skip): ").strip()
        if rating in ("c", "r"):
            tick["dpo_label"] = "chosen" if rating == "c" else "rejected"
            # write back... (append to annotated file)
 ```
 ---
 ## 4. Dashboard — Reads Hermes Data
 ```python
 #!/usr/bin/env python3
 """Timmy Model Dashboard — reads from Hermes, owns nothing."""
 import json
 import os
 import subprocess
 import sys
 import time
 import urllib.request
 from datetime import datetime
 from pathlib import Path
 HERMES_HOME = Path.home() / ".hermes"
 TIMMY_HOME = Path.home() / ".timmy"
 def get_ollama_models():
    """What's available in Ollama."""
    try:
        req = urllib.request.Request("http://localhost:11434/api/tags")
        with urllib.request.urlopen(req, timeout=5) as resp:
            return json.loads(resp.read()).get("models", [])
    except Exception:
        return []
 def get_loaded_models():
    """What's actually in VRAM right now."""
    try:
        req = urllib.request.Request("http://localhost:11434/api/ps")
        with urllib.request.urlopen(req, timeout=5) as resp:
            return json.loads(resp.read()).get("models", [])
    except Exception:
        return []
 def get_huey_status():
    try:
        r = subprocess.run(["pgrep", "-f", "huey_consumer"],
                          capture_output=True, timeout=5)
        return r.returncode == 0
    except Exception:
        return False
 def get_hermes_sessions(hours=24):
    """Read session metadata from Hermes session store."""
    sessions_file = HERMES_HOME / "sessions" / "sessions.json"
    if not sessions_file.exists():
        return []
    try:
        data = json.loads(sessions_file.read_text())
        return list(data.values())
    except Exception:
        return []
 def get_heartbeat_ticks(date_str=None):
    """Read today's heartbeat ticks."""
    if not date_str:
        date_str = datetime.now().strftime("%Y%m%d")
    tick_file = TIMMY_HOME / "heartbeat" / f"ticks_{date_str}.jsonl"
    if not tick_file.exists():
        return []
    ticks = []
    for line in tick_file.read_text().strip().split("\n"):
        try:
            ticks.append(json.loads(line))
        except Exception:
            continue
    return ticks
 def render(hours=24):
    models = get_ollama_models()
    loaded = get_loaded_models()
    huey = get_huey_status()
    sessions = get_hermes_sessions(hours)
    ticks = get_heartbeat_ticks()
    loaded_names = {m.get("name", "") for m in loaded}
    print("\033[2J\033[H")
    print("=" * 70)
    print("  TIMMY MODEL DASHBOARD")
    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"  {now}  |  Huey: {'UP' if huey else 'DOWN'}  |  Ollama models: {len(models)}")
    print("=" * 70)
    # DEPLOYMENTS
    print("\n  LOCAL MODELS")
    print("  " + "-" * 55)
    for m in models:
        name = m.get("name", "?")
        size_gb = m.get("size", 0) / 1e9
        status = "IN VRAM" if name in loaded_names else "on disk"
        print(f"    {name:35s} {size_gb:5.1f}GB  {status}")
    if not models:
        print("    (Ollama not responding)")
    # HERMES SESSION ACTIVITY
    # Count sessions by platform/provider
    print(f"\n  HERMES SESSIONS (recent)")
    print("  " + "-" * 55)
    local_sessions = [s for s in sessions
                     if "localhost" in str(s.get("origin", {}))]
    cli_sessions = [s for s in sessions
                   if s.get("platform") == "cli" or s.get("origin", {}).get("platform") == "cli"]
    total_tokens = sum(s.get("total_tokens", 0) for s in sessions)
    print(f"    Total sessions: {len(sessions)}")
    print(f"    CLI sessions: {len(cli_sessions)}")
    print(f"    Total tokens: {total_tokens:,}")
    # HEARTBEAT STATUS
    print(f"\n  HEARTBEAT ({len(ticks)} ticks today)")
    print("  " + "-" * 55)
    if ticks:
        last = ticks[-1]
        decision = last.get("decision", {})
        severity = decision.get("severity", "unknown")
        reasoning = decision.get("reasoning", "no model decision yet")
        print(f"    Last tick: {last.get('tick_id', '?')}")
        print(f"    Severity:  {severity}")
        print(f"    Reasoning: {reasoning[:60]}")
        # Count model vs fallback decisions
        model_decisions = sum(1 for t in ticks
                            if t.get("decision", {}).get("severity") != "fallback")
        fallback = len(ticks) - model_decisions
        print(f"    Model decisions: {model_decisions}  |  Fallback: {fallback}")
        # DPO labels if any
        labeled = sum(1 for t in ticks if "dpo_label" in t)
        if labeled:
            chosen = sum(1 for t in ticks if t.get("dpo_label") == "chosen")
            rejected = sum(1 for t in ticks if t.get("dpo_label") == "rejected")
            print(f"    DPO labeled: {labeled} (chosen: {chosen}, rejected: {rejected})")
    else:
        print("    (no ticks today)")
    # ACTIVE LOOPS
    print(f"\n  ACTIVE LOOPS USING LOCAL MODELS")
    print("  " + "-" * 55)
    print("    heartbeat_tick    10m    hermes4:14b    DECIDE phase")
    print("    (future)          15m    hermes4:14b    issue triage")
    print("    (future)          daily  timmy:v0.1     morning report")
    print(f"\n  NON-LOCAL LOOPS (Gemini/Grok API)")
    print("  " + "-" * 55)
    print("    gemini_worker     20m    gemini-2.5-pro   aider")
    print("    grok_worker       20m    grok-3-fast      opencode")
    print("    cross_review      30m    both             PR review")
    print("\n" + "=" * 70)
 if __name__ == "__main__":
    watch = "--watch" in sys.argv
    hours = 24
    for a in sys.argv[1:]:
        if a.startswith("--hours="):
            hours = int(a.split("=")[1])
    if watch:
        while True:
            render(hours)
            time.sleep(30)
    else:
        render(hours)
 ```
 ---
 ## 5. Implementation Steps
 ### Step 1: Add hermes_local() to tasks.py
 - One function, ~20 lines
 - Calls `hermes chat -q` with Ollama env vars
 - All telemetry comes from Hermes for free
 ### Step 2: Wire heartbeat_tick DECIDE phase
 - Replace 6 lines of if/else with hermes_local() call
 - Keep hardcoded fallback when model is down
 - Decision stored in tick record for DPO review
 ### Step 3: Fix the MCP server warning
 - The orchestration MCP server path is broken — harmless but noisy
 - Either fix the path or remove from config
 ### Step 4: Drop model_dashboard.py in timmy-config/bin/
 - Reads Ollama API, Hermes sessions, heartbeat ticks
 - No new data stores — just views over existing ones
 - `python3 model_dashboard.py --watch` for live view
 ### Step 5: Expand to more Huey tasks
 - triage_issues: model reads issue, picks agent
 - good_morning_report: model writes the "From Timmy" section
 - Each expansion is just calling hermes_local() with a different prompt
 ---
 ## What Gets Hotfixed in Hermes Config
 If `hermes insights` is broken (the cache_read_tokens column error),
 that needs a fix. The dashboard falls back to reading sessions.json
 directly, but insights would be the better data source.
 The `providers.ollama` section in config.yaml exists but isn't wired
 to the --provider flag. Filing this upstream or patching locally would
 let us do `hermes chat -q "..." --provider ollama` cleanly instead
 of relying on env vars. Not blocking — env vars work today.
 ---
 ## What This Owns
 - hermes_local() — 20-line wrapper around a subprocess call
 - model_dashboard.py — read-only views over existing data
 - review_decisions.py — optional DPO annotation CLI
 ## What This Does NOT Own
 - Inference. Ollama does that.
 - Telemetry. Hermes does that.
 - Session storage. Hermes does that.
 - Token counting. Hermes does that.
 - Training pipeline. Already exists in timmy-config/training/.
--- a/tasks.py
+++ b/tasks.py
@@ -14,12 +14,89 @@ from gitea_client import GiteaClient
 HERMES_HOME = Path.home() / ".hermes"
 TIMMY_HOME = Path.home() / ".timmy"
 HERMES_AGENT_DIR = HERMES_HOME / "hermes-agent"
 METRICS_DIR = TIMMY_HOME / "metrics"
 REPOS = [
    "Timmy_Foundation/the-nexus",
    "Timmy_Foundation/timmy-config",
 ]
 NET_LINE_LIMIT = 10
 # ── Local Model Inference via Hermes Harness ─────────────────────────
 HEARTBEAT_MODEL = "hermes4:14b"
 FALLBACK_MODEL = "hermes3:8b"
 def hermes_local(prompt, model=None, caller_tag=None):
    """Call a local Ollama model through the Hermes harness.
    Uses provider="local-ollama" which routes through the custom_providers
    entry in config.yaml → Ollama at localhost:11434.
    Returns response text or None on failure.
    Every call creates a Hermes session with telemetry.
    """
    _model = model or HEARTBEAT_MODEL
    tagged = f"[{caller_tag}] {prompt}" if caller_tag else prompt
    # Import hermes cli.main directly — no subprocess, no env vars
    _agent_dir = str(HERMES_AGENT_DIR)
    if _agent_dir not in sys.path:
        sys.path.insert(0, _agent_dir)
    old_cwd = os.getcwd()
    os.chdir(_agent_dir)
    try:
        from cli import main as hermes_main
        import io
        from contextlib import redirect_stdout, redirect_stderr
        buf = io.StringIO()
        err = io.StringIO()
        with redirect_stdout(buf), redirect_stderr(err):
            hermes_main(
                query=tagged,
                model=_model,
                provider="local-ollama",
                quiet=True,
            )
        output = buf.getvalue().strip()
        # Strip session_id line from quiet output
        lines = [l for l in output.split("\n") if not l.startswith("session_id:")]
        response = "\n".join(lines).strip()
        # Log to metrics jsonl
        METRICS_DIR.mkdir(parents=True, exist_ok=True)
        metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl"
        record = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "model": _model,
            "caller": caller_tag or "unknown",
            "prompt_len": len(prompt),
            "response_len": len(response),
            "success": bool(response),
        }
        with open(metrics_file, "a") as f:
            f.write(json.dumps(record) + "\n")
        return response if response else None
    except Exception as e:
        # Log failure
        METRICS_DIR.mkdir(parents=True, exist_ok=True)
        metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl"
        record = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "model": _model,
            "caller": caller_tag or "unknown",
            "error": str(e),
            "success": False,
        }
        with open(metrics_file, "a") as f:
            f.write(json.dumps(record) + "\n")
        return None
    finally:
        os.chdir(old_cwd)
 # ── Existing: Orchestration ──────────────────────────────────────────
@@ -280,15 +357,49 @@ def heartbeat_tick():
        "previous_tick": last_tick.get("tick_id", "none"),
    }
-    # DECIDE + ACT: check for problems
+    # DECIDE: let hermes4:14b reason about what to do
-    actions = []
+    decide_prompt = (
-    if not perception.get("gitea_alive"):
+        f"System state at {now.isoformat()}:\n\n"
-        actions.append("ALERT: Gitea unreachable")
+        f"{json.dumps(perception, indent=2)}\n\n"
-    health = perception.get("model_health", {})
+        f"Previous tick: {last_tick.get('tick_id', 'none')}\n\n"
-    if isinstance(health, dict) and not health.get("ollama_running"):
+        "You are the heartbeat monitor. Based on this state:\n"
-        actions.append("ALERT: Ollama not running")
+        "1. List any actions needed (alerts, restarts, escalations). Empty if all OK.\n"
-    
+        "2. Rate severity: ok, warning, or critical.\n"
-    tick_record["actions"] = actions
+        "3. One sentence of reasoning.\n\n"
        'Respond ONLY with JSON: {"actions": [], "severity": "ok", "reasoning": "..."}'
    )
    decision = None
    try:
        raw = hermes_local(decide_prompt, caller_tag="heartbeat_tick")
        if raw:
            # Model might wrap JSON in markdown, extract first { line
            for line in raw.split("\n"):
                line = line.strip()
                if line.startswith("{"):
                    decision = json.loads(line)
                    break
            if not decision:
                decision = json.loads(raw)
    except (json.JSONDecodeError, Exception):
        decision = None
    # Fallback to hardcoded logic if model fails or is down
    if decision is None:
        actions = []
        if not perception.get("gitea_alive"):
            actions.append("ALERT: Gitea unreachable")
        health = perception.get("model_health", {})
        if isinstance(health, dict) and not health.get("ollama_running"):
            actions.append("ALERT: Ollama not running")
        decision = {
            "actions": actions,
            "severity": "fallback",
            "reasoning": "model unavailable, used hardcoded checks",
        }
    tick_record["decision"] = decision
    actions = decision.get("actions", [])
    # Save tick
    last_tick_file.write_text(json.dumps(tick_record, indent=2))
Author	SHA1	Message	Date
Alexander Whitestone	fe2805b458	[gemini] Huey retrospective — 1-hour check-in and test plan (#8 )	2026-03-27 11:20:41 -04:00
Alexander Whitestone	f46a4826d9	config: update config.yaml	2026-03-27 11:00:31 -04:00
Alexander Whitestone	3b1763ce4c	config: update config.yaml	2026-03-27 00:00:30 -04:00
Alexander Whitestone	78f5216540	config: update config.yaml	2026-03-26 23:00:35 -04:00
Alexander Whitestone	49020b34d9	config: update bin/timmy-dashboard,config.yaml,docs/local-model-integration-sketch.md,tasks.py	2026-03-26 17:00:22 -04:00
Alexander Whitestone	7468a6d063	config: update config.yaml	2026-03-26 13:00:29 -04:00