From 49020b34d93819ae7ecf57f307d4453c2c91f33a Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Thu, 26 Mar 2026 17:00:22 -0400 Subject: [PATCH] config: update bin/timmy-dashboard,config.yaml,docs/local-model-integration-sketch.md,tasks.py --- bin/timmy-dashboard | 252 ++++++++++++++ config.yaml | 7 +- docs/local-model-integration-sketch.md | 438 +++++++++++++++++++++++++ tasks.py | 129 +++++++- 4 files changed, 811 insertions(+), 15 deletions(-) create mode 100755 bin/timmy-dashboard create mode 100644 docs/local-model-integration-sketch.md diff --git a/bin/timmy-dashboard b/bin/timmy-dashboard new file mode 100755 index 00000000..4ac092e6 --- /dev/null +++ b/bin/timmy-dashboard @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 +"""Timmy Model Dashboard — where are my models, what are they doing. + +Usage: + timmy-dashboard # one-shot + timmy-dashboard --watch # live refresh every 30s + timmy-dashboard --hours=48 # look back 48h +""" + +import json +import os +import subprocess +import sys +import time +import urllib.request +from datetime import datetime, timezone, timedelta +from pathlib import Path + +HERMES_HOME = Path.home() / ".hermes" +TIMMY_HOME = Path.home() / ".timmy" +METRICS_DIR = TIMMY_HOME / "metrics" + +# ── Data Sources ────────────────────────────────────────────────────── + +def get_ollama_models(): + try: + req = urllib.request.Request("http://localhost:11434/api/tags") + with urllib.request.urlopen(req, timeout=5) as resp: + return json.loads(resp.read()).get("models", []) + except Exception: + return [] + + +def get_loaded_models(): + try: + req = urllib.request.Request("http://localhost:11434/api/ps") + with urllib.request.urlopen(req, timeout=5) as resp: + return json.loads(resp.read()).get("models", []) + except Exception: + return [] + + +def get_huey_pid(): + try: + r = subprocess.run(["pgrep", "-f", "huey_consumer"], + capture_output=True, text=True, timeout=5) + return r.stdout.strip().split("\n")[0] if r.returncode == 0 else None + except Exception: + return None + + +def get_hermes_sessions(): + sessions_file = HERMES_HOME / "sessions" / "sessions.json" + if not sessions_file.exists(): + return [] + try: + data = json.loads(sessions_file.read_text()) + return list(data.values()) + except Exception: + return [] + + +def get_heartbeat_ticks(date_str=None): + if not date_str: + date_str = datetime.now().strftime("%Y%m%d") + tick_file = TIMMY_HOME / "heartbeat" / f"ticks_{date_str}.jsonl" + if not tick_file.exists(): + return [] + ticks = [] + for line in tick_file.read_text().strip().split("\n"): + if not line.strip(): + continue + try: + ticks.append(json.loads(line)) + except Exception: + continue + return ticks + + +def get_local_metrics(hours=24): + """Read local inference metrics from jsonl files.""" + records = [] + cutoff = datetime.now(timezone.utc) - timedelta(hours=hours) + if not METRICS_DIR.exists(): + return records + for f in sorted(METRICS_DIR.glob("local_*.jsonl")): + for line in f.read_text().strip().split("\n"): + if not line.strip(): + continue + try: + r = json.loads(line) + ts = datetime.fromisoformat(r["timestamp"]) + if ts >= cutoff: + records.append(r) + except Exception: + continue + return records + + +def get_cron_jobs(): + """Get Hermes cron job status.""" + try: + r = subprocess.run( + ["hermes", "cron", "list", "--json"], + capture_output=True, text=True, timeout=10 + ) + if r.returncode == 0: + return json.loads(r.stdout).get("jobs", []) + except Exception: + pass + return [] + + +# ── Rendering ───────────────────────────────────────────────────────── + +DIM = "\033[2m" +BOLD = "\033[1m" +GREEN = "\033[32m" +YELLOW = "\033[33m" +RED = "\033[31m" +CYAN = "\033[36m" +RST = "\033[0m" +CLR = "\033[2J\033[H" + + +def render(hours=24): + models = get_ollama_models() + loaded = get_loaded_models() + huey_pid = get_huey_pid() + ticks = get_heartbeat_ticks() + metrics = get_local_metrics(hours) + sessions = get_hermes_sessions() + + loaded_names = {m.get("name", "") for m in loaded} + now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + print(CLR, end="") + print(f"{BOLD}{'=' * 70}") + print(f" TIMMY MODEL DASHBOARD") + print(f" {now} | Huey: {GREEN}PID {huey_pid}{RST if huey_pid else f'{RED}DOWN{RST}'}") + print(f"{'=' * 70}{RST}") + + # ── LOCAL MODELS ── + print(f"\n {BOLD}LOCAL MODELS (Ollama){RST}") + print(f" {DIM}{'-' * 55}{RST}") + if models: + for m in models: + name = m.get("name", "?") + size_gb = m.get("size", 0) / 1e9 + if name in loaded_names: + status = f"{GREEN}IN VRAM{RST}" + else: + status = f"{DIM}on disk{RST}" + print(f" {name:35s} {size_gb:5.1f}GB {status}") + else: + print(f" {RED}(Ollama not responding){RST}") + + # ── LOCAL INFERENCE ACTIVITY ── + print(f"\n {BOLD}LOCAL INFERENCE ({len(metrics)} calls, last {hours}h){RST}") + print(f" {DIM}{'-' * 55}{RST}") + if metrics: + by_caller = {} + for r in metrics: + caller = r.get("caller", "unknown") + if caller not in by_caller: + by_caller[caller] = {"count": 0, "success": 0, "errors": 0} + by_caller[caller]["count"] += 1 + if r.get("success"): + by_caller[caller]["success"] += 1 + else: + by_caller[caller]["errors"] += 1 + for caller, stats in by_caller.items(): + err = f" {RED}err:{stats['errors']}{RST}" if stats["errors"] else "" + print(f" {caller:25s} calls:{stats['count']:4d} " + f"{GREEN}ok:{stats['success']}{RST}{err}") + + by_model = {} + for r in metrics: + model = r.get("model", "unknown") + by_model[model] = by_model.get(model, 0) + 1 + print(f"\n {DIM}Models used:{RST}") + for model, count in sorted(by_model.items(), key=lambda x: -x[1]): + print(f" {model:30s} {count} calls") + else: + print(f" {DIM}(no local calls recorded yet){RST}") + + # ── HEARTBEAT STATUS ── + print(f"\n {BOLD}HEARTBEAT ({len(ticks)} ticks today){RST}") + print(f" {DIM}{'-' * 55}{RST}") + if ticks: + last = ticks[-1] + decision = last.get("decision", last.get("actions", {})) + if isinstance(decision, dict): + severity = decision.get("severity", "unknown") + reasoning = decision.get("reasoning", "") + sev_color = GREEN if severity == "ok" else YELLOW if severity == "warning" else RED + print(f" Last tick: {last.get('tick_id', '?')}") + print(f" Severity: {sev_color}{severity}{RST}") + if reasoning: + print(f" Reasoning: {reasoning[:65]}") + else: + print(f" Last tick: {last.get('tick_id', '?')}") + actions = last.get("actions", []) + print(f" Actions: {actions if actions else 'none'}") + + model_decisions = sum(1 for t in ticks + if isinstance(t.get("decision"), dict) + and t["decision"].get("severity") != "fallback") + fallback = len(ticks) - model_decisions + print(f" {CYAN}Model: {model_decisions}{RST} | {DIM}Fallback: {fallback}{RST}") + else: + print(f" {DIM}(no ticks today){RST}") + + # ── HERMES SESSIONS ── + local_sessions = [s for s in sessions + if "localhost:11434" in str(s.get("base_url", ""))] + cloud_sessions = [s for s in sessions if s not in local_sessions] + print(f"\n {BOLD}HERMES SESSIONS{RST}") + print(f" {DIM}{'-' * 55}{RST}") + print(f" Total: {len(sessions)} | " + f"{GREEN}Local: {len(local_sessions)}{RST} | " + f"{YELLOW}Cloud: {len(cloud_sessions)}{RST}") + + # ── ACTIVE LOOPS ── + print(f"\n {BOLD}ACTIVE LOOPS{RST}") + print(f" {DIM}{'-' * 55}{RST}") + print(f" {CYAN}heartbeat_tick{RST} 10m hermes4:14b DECIDE phase") + print(f" {DIM}model_health{RST} 5m (local check) Ollama ping") + print(f" {DIM}gemini_worker{RST} 20m gemini-2.5-pro aider") + print(f" {DIM}grok_worker{RST} 20m grok-3-fast opencode") + print(f" {DIM}cross_review{RST} 30m gemini+grok PR review") + + print(f"\n{BOLD}{'=' * 70}{RST}") + print(f" {DIM}Refresh: timmy-dashboard --watch | History: --hours=N{RST}") + + +if __name__ == "__main__": + watch = "--watch" in sys.argv + hours = 24 + for a in sys.argv[1:]: + if a.startswith("--hours="): + hours = int(a.split("=")[1]) + + if watch: + try: + while True: + render(hours) + time.sleep(30) + except KeyboardInterrupt: + print(f"\n{DIM}Dashboard stopped.{RST}") + else: + render(hours) diff --git a/config.yaml b/config.yaml index dc6799a4..645682c5 100644 --- a/config.yaml +++ b/config.yaml @@ -202,12 +202,7 @@ providers: base_url: http://localhost:11434/v1 model: hermes3:latest mcp_servers: - orchestration: - command: /Users/apayne/.hermes/hermes-agent/venv/bin/python3 - args: - - /Users/apayne/.hermes/hermes-agent/tools/orchestration_mcp_server.py - env: {} - timeout: 120 + morrowind: command: python3 args: diff --git a/docs/local-model-integration-sketch.md b/docs/local-model-integration-sketch.md new file mode 100644 index 00000000..f14463e5 --- /dev/null +++ b/docs/local-model-integration-sketch.md @@ -0,0 +1,438 @@ +# Local Model Integration Sketch v2 +# Hermes4-14B in the Heartbeat Loop — No New Telemetry + +## Principle + +No new inference layer. Huey tasks call `hermes chat -q` pointed at +Ollama. Hermes handles sessions, token tracking, cost logging. +The dashboard reads what Hermes already stores. + +--- + +## Why Not Ollama Directly? + +Ollama is fine as a serving backend. The issue isn't Ollama — it's that +calling Ollama directly with urllib bypasses the harness. The harness +already tracks sessions, tokens, model/provider, platform. Building a +second telemetry layer is owning code we don't need. + +Ollama as a named provider isn't wired into the --provider flag yet, +but routing works via env vars: + + HERMES_MODEL="hermes4:14b" \ + HERMES_PROVIDER="custom" \ + HERMES_BASE_URL="http://localhost:11434/v1" \ + hermes chat -q "prompt here" -Q + +This creates a tracked session, logs tokens, and returns the response. +That's our local inference call. + +### Alternatives to Ollama for serving: +- **llama.cpp server** — lighter, no Python, raw HTTP. Good for single + model serving. Less convenient for model switching. +- **vLLM** — best throughput, but needs NVIDIA GPU. Not for M3 Mac. +- **MLX serving** — native Apple Silicon, but no OpenAI-compat API yet. + MLX is for training, not serving (our current policy). +- **llamafile** — single binary, portable. Good for distribution. + +Verdict: Ollama is fine. It's the standard OpenAI-compat local server +on Mac. The issue was never Ollama — it was bypassing the harness. + +--- + +## 1. The Call Pattern + +One function in tasks.py that all Huey tasks use: + +```python +import subprocess +import json + +HERMES_BIN = "hermes" +LOCAL_ENV = { + "HERMES_MODEL": "hermes4:14b", + "HERMES_PROVIDER": "custom", + "HERMES_BASE_URL": "http://localhost:11434/v1", +} + +def hermes_local(prompt, caller_tag=None, max_retries=2): + """Call hermes with local Ollama model. Returns response text. + + Every call creates a hermes session with full telemetry. + caller_tag gets prepended to prompt for searchability. + """ + import os + env = os.environ.copy() + env.update(LOCAL_ENV) + + tagged_prompt = prompt + if caller_tag: + tagged_prompt = f"[{caller_tag}] {prompt}" + + for attempt in range(max_retries + 1): + try: + result = subprocess.run( + [HERMES_BIN, "chat", "-q", tagged_prompt, "-Q", "-t", "none"], + capture_output=True, text=True, + timeout=120, env=env, + ) + if result.returncode == 0 and result.stdout.strip(): + # Strip the session_id line from -Q output + lines = result.stdout.strip().split("\n") + response_lines = [l for l in lines if not l.startswith("session_id:")] + return "\n".join(response_lines).strip() + except subprocess.TimeoutExpired: + if attempt == max_retries: + return None + continue + return None +``` + +Notes: +- `-t none` disables all toolsets — the heartbeat model shouldn't + have terminal/file access. Pure reasoning only. +- `-Q` quiet mode suppresses banner/spinner, gives clean output. +- Every call creates a session in Hermes session store. Searchable, + exportable, countable. +- The `[caller_tag]` prefix lets you filter sessions by which Huey + task generated them: `hermes sessions list | grep heartbeat` + +--- + +## 2. Heartbeat DECIDE Phase + +Replace the hardcoded if/else with a model call: + +```python +# In heartbeat_tick(), replace the DECIDE + ACT section: + + # DECIDE: let hermes4:14b reason about what to do + decide_prompt = f"""System state at {now.isoformat()}: + +{json.dumps(perception, indent=2)} + +Previous tick: {last_tick.get('tick_id', 'none')} + +You are the heartbeat monitor. Based on this state: +1. List any actions needed (alerts, restarts, escalations). Empty if all OK. +2. Rate severity: ok, warning, or critical. +3. One sentence of reasoning. + +Respond ONLY with JSON: +{{"actions": [], "severity": "ok", "reasoning": "..."}}""" + + decision = None + try: + raw = hermes_local(decide_prompt, caller_tag="heartbeat_tick") + if raw: + # Try to parse JSON from the response + # Model might wrap it in markdown, so extract + for line in raw.split("\n"): + line = line.strip() + if line.startswith("{"): + decision = json.loads(line) + break + if not decision: + decision = json.loads(raw) + except (json.JSONDecodeError, Exception) as e: + decision = None + + # Fallback to hardcoded logic if model fails or is down + if decision is None: + actions = [] + if not perception.get("gitea_alive"): + actions.append("ALERT: Gitea unreachable") + health = perception.get("model_health", {}) + if isinstance(health, dict) and not health.get("ollama_running"): + actions.append("ALERT: Ollama not running") + decision = { + "actions": actions, + "severity": "fallback", + "reasoning": "model unavailable, used hardcoded checks" + } + + tick_record["decision"] = decision + actions = decision.get("actions", []) +``` + +--- + +## 3. DPO Candidate Collection + +No new database. Hermes sessions ARE the DPO candidates. + +Every `hermes_local()` call creates a session. To extract DPO pairs: + +```bash +# Export all local-model sessions +hermes sessions export --output /tmp/local-sessions.jsonl + +# Filter for heartbeat decisions +grep "heartbeat_tick" /tmp/local-sessions.jsonl > heartbeat_decisions.jsonl +``` + +The existing `session_export` Huey task (runs every 4h) already extracts +user→assistant pairs. It just needs to be aware that some sessions are +now local-model decisions instead of human conversations. + +For DPO annotation, add a simple review script: + +```python +# review_decisions.py — reads heartbeat tick logs, shows model decisions, +# asks Alexander to mark chosen/rejected +# Writes annotations back to the tick log files + +import json +from pathlib import Path + +TICK_DIR = Path.home() / ".timmy" / "heartbeat" + +for log_file in sorted(TICK_DIR.glob("ticks_*.jsonl")): + for line in log_file.read_text().strip().split("\n"): + tick = json.loads(line) + decision = tick.get("decision", {}) + if decision.get("severity") == "fallback": + continue # skip fallback entries + + print(f"\n--- Tick {tick['tick_id']} ---") + print(f"Perception: {json.dumps(tick['perception'], indent=2)}") + print(f"Decision: {json.dumps(decision, indent=2)}") + + rating = input("Rate (c=chosen, r=rejected, s=skip): ").strip() + if rating in ("c", "r"): + tick["dpo_label"] = "chosen" if rating == "c" else "rejected" + # write back... (append to annotated file) +``` + +--- + +## 4. Dashboard — Reads Hermes Data + +```python +#!/usr/bin/env python3 +"""Timmy Model Dashboard — reads from Hermes, owns nothing.""" + +import json +import os +import subprocess +import sys +import time +import urllib.request +from datetime import datetime +from pathlib import Path + +HERMES_HOME = Path.home() / ".hermes" +TIMMY_HOME = Path.home() / ".timmy" + + +def get_ollama_models(): + """What's available in Ollama.""" + try: + req = urllib.request.Request("http://localhost:11434/api/tags") + with urllib.request.urlopen(req, timeout=5) as resp: + return json.loads(resp.read()).get("models", []) + except Exception: + return [] + + +def get_loaded_models(): + """What's actually in VRAM right now.""" + try: + req = urllib.request.Request("http://localhost:11434/api/ps") + with urllib.request.urlopen(req, timeout=5) as resp: + return json.loads(resp.read()).get("models", []) + except Exception: + return [] + + +def get_huey_status(): + try: + r = subprocess.run(["pgrep", "-f", "huey_consumer"], + capture_output=True, timeout=5) + return r.returncode == 0 + except Exception: + return False + + +def get_hermes_sessions(hours=24): + """Read session metadata from Hermes session store.""" + sessions_file = HERMES_HOME / "sessions" / "sessions.json" + if not sessions_file.exists(): + return [] + try: + data = json.loads(sessions_file.read_text()) + return list(data.values()) + except Exception: + return [] + + +def get_heartbeat_ticks(date_str=None): + """Read today's heartbeat ticks.""" + if not date_str: + date_str = datetime.now().strftime("%Y%m%d") + tick_file = TIMMY_HOME / "heartbeat" / f"ticks_{date_str}.jsonl" + if not tick_file.exists(): + return [] + ticks = [] + for line in tick_file.read_text().strip().split("\n"): + try: + ticks.append(json.loads(line)) + except Exception: + continue + return ticks + + +def render(hours=24): + models = get_ollama_models() + loaded = get_loaded_models() + huey = get_huey_status() + sessions = get_hermes_sessions(hours) + ticks = get_heartbeat_ticks() + + loaded_names = {m.get("name", "") for m in loaded} + + print("\033[2J\033[H") + print("=" * 70) + print(" TIMMY MODEL DASHBOARD") + now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print(f" {now} | Huey: {'UP' if huey else 'DOWN'} | Ollama models: {len(models)}") + print("=" * 70) + + # DEPLOYMENTS + print("\n LOCAL MODELS") + print(" " + "-" * 55) + for m in models: + name = m.get("name", "?") + size_gb = m.get("size", 0) / 1e9 + status = "IN VRAM" if name in loaded_names else "on disk" + print(f" {name:35s} {size_gb:5.1f}GB {status}") + if not models: + print(" (Ollama not responding)") + + # HERMES SESSION ACTIVITY + # Count sessions by platform/provider + print(f"\n HERMES SESSIONS (recent)") + print(" " + "-" * 55) + local_sessions = [s for s in sessions + if "localhost" in str(s.get("origin", {}))] + cli_sessions = [s for s in sessions + if s.get("platform") == "cli" or s.get("origin", {}).get("platform") == "cli"] + + total_tokens = sum(s.get("total_tokens", 0) for s in sessions) + print(f" Total sessions: {len(sessions)}") + print(f" CLI sessions: {len(cli_sessions)}") + print(f" Total tokens: {total_tokens:,}") + + # HEARTBEAT STATUS + print(f"\n HEARTBEAT ({len(ticks)} ticks today)") + print(" " + "-" * 55) + if ticks: + last = ticks[-1] + decision = last.get("decision", {}) + severity = decision.get("severity", "unknown") + reasoning = decision.get("reasoning", "no model decision yet") + print(f" Last tick: {last.get('tick_id', '?')}") + print(f" Severity: {severity}") + print(f" Reasoning: {reasoning[:60]}") + + # Count model vs fallback decisions + model_decisions = sum(1 for t in ticks + if t.get("decision", {}).get("severity") != "fallback") + fallback = len(ticks) - model_decisions + print(f" Model decisions: {model_decisions} | Fallback: {fallback}") + + # DPO labels if any + labeled = sum(1 for t in ticks if "dpo_label" in t) + if labeled: + chosen = sum(1 for t in ticks if t.get("dpo_label") == "chosen") + rejected = sum(1 for t in ticks if t.get("dpo_label") == "rejected") + print(f" DPO labeled: {labeled} (chosen: {chosen}, rejected: {rejected})") + else: + print(" (no ticks today)") + + # ACTIVE LOOPS + print(f"\n ACTIVE LOOPS USING LOCAL MODELS") + print(" " + "-" * 55) + print(" heartbeat_tick 10m hermes4:14b DECIDE phase") + print(" (future) 15m hermes4:14b issue triage") + print(" (future) daily timmy:v0.1 morning report") + + print(f"\n NON-LOCAL LOOPS (Gemini/Grok API)") + print(" " + "-" * 55) + print(" gemini_worker 20m gemini-2.5-pro aider") + print(" grok_worker 20m grok-3-fast opencode") + print(" cross_review 30m both PR review") + + print("\n" + "=" * 70) + + +if __name__ == "__main__": + watch = "--watch" in sys.argv + hours = 24 + for a in sys.argv[1:]: + if a.startswith("--hours="): + hours = int(a.split("=")[1]) + if watch: + while True: + render(hours) + time.sleep(30) + else: + render(hours) +``` + +--- + +## 5. Implementation Steps + +### Step 1: Add hermes_local() to tasks.py +- One function, ~20 lines +- Calls `hermes chat -q` with Ollama env vars +- All telemetry comes from Hermes for free + +### Step 2: Wire heartbeat_tick DECIDE phase +- Replace 6 lines of if/else with hermes_local() call +- Keep hardcoded fallback when model is down +- Decision stored in tick record for DPO review + +### Step 3: Fix the MCP server warning +- The orchestration MCP server path is broken — harmless but noisy +- Either fix the path or remove from config + +### Step 4: Drop model_dashboard.py in timmy-config/bin/ +- Reads Ollama API, Hermes sessions, heartbeat ticks +- No new data stores — just views over existing ones +- `python3 model_dashboard.py --watch` for live view + +### Step 5: Expand to more Huey tasks +- triage_issues: model reads issue, picks agent +- good_morning_report: model writes the "From Timmy" section +- Each expansion is just calling hermes_local() with a different prompt + +--- + +## What Gets Hotfixed in Hermes Config + +If `hermes insights` is broken (the cache_read_tokens column error), +that needs a fix. The dashboard falls back to reading sessions.json +directly, but insights would be the better data source. + +The `providers.ollama` section in config.yaml exists but isn't wired +to the --provider flag. Filing this upstream or patching locally would +let us do `hermes chat -q "..." --provider ollama` cleanly instead +of relying on env vars. Not blocking — env vars work today. + +--- + +## What This Owns + +- hermes_local() — 20-line wrapper around a subprocess call +- model_dashboard.py — read-only views over existing data +- review_decisions.py — optional DPO annotation CLI + +## What This Does NOT Own + +- Inference. Ollama does that. +- Telemetry. Hermes does that. +- Session storage. Hermes does that. +- Token counting. Hermes does that. +- Training pipeline. Already exists in timmy-config/training/. diff --git a/tasks.py b/tasks.py index 7b5aebe8..11e065c1 100644 --- a/tasks.py +++ b/tasks.py @@ -14,12 +14,89 @@ from gitea_client import GiteaClient HERMES_HOME = Path.home() / ".hermes" TIMMY_HOME = Path.home() / ".timmy" +HERMES_AGENT_DIR = HERMES_HOME / "hermes-agent" +METRICS_DIR = TIMMY_HOME / "metrics" REPOS = [ "Timmy_Foundation/the-nexus", "Timmy_Foundation/timmy-config", ] NET_LINE_LIMIT = 10 +# ── Local Model Inference via Hermes Harness ───────────────────────── + +HEARTBEAT_MODEL = "hermes4:14b" +FALLBACK_MODEL = "hermes3:8b" + + +def hermes_local(prompt, model=None, caller_tag=None): + """Call a local Ollama model through the Hermes harness. + + Uses provider="local-ollama" which routes through the custom_providers + entry in config.yaml → Ollama at localhost:11434. + Returns response text or None on failure. + Every call creates a Hermes session with telemetry. + """ + _model = model or HEARTBEAT_MODEL + tagged = f"[{caller_tag}] {prompt}" if caller_tag else prompt + + # Import hermes cli.main directly — no subprocess, no env vars + _agent_dir = str(HERMES_AGENT_DIR) + if _agent_dir not in sys.path: + sys.path.insert(0, _agent_dir) + old_cwd = os.getcwd() + os.chdir(_agent_dir) + + try: + from cli import main as hermes_main + import io + from contextlib import redirect_stdout, redirect_stderr + + buf = io.StringIO() + err = io.StringIO() + with redirect_stdout(buf), redirect_stderr(err): + hermes_main( + query=tagged, + model=_model, + provider="local-ollama", + quiet=True, + ) + output = buf.getvalue().strip() + # Strip session_id line from quiet output + lines = [l for l in output.split("\n") if not l.startswith("session_id:")] + response = "\n".join(lines).strip() + + # Log to metrics jsonl + METRICS_DIR.mkdir(parents=True, exist_ok=True) + metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl" + record = { + "timestamp": datetime.now(timezone.utc).isoformat(), + "model": _model, + "caller": caller_tag or "unknown", + "prompt_len": len(prompt), + "response_len": len(response), + "success": bool(response), + } + with open(metrics_file, "a") as f: + f.write(json.dumps(record) + "\n") + + return response if response else None + except Exception as e: + # Log failure + METRICS_DIR.mkdir(parents=True, exist_ok=True) + metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl" + record = { + "timestamp": datetime.now(timezone.utc).isoformat(), + "model": _model, + "caller": caller_tag or "unknown", + "error": str(e), + "success": False, + } + with open(metrics_file, "a") as f: + f.write(json.dumps(record) + "\n") + return None + finally: + os.chdir(old_cwd) + # ── Existing: Orchestration ────────────────────────────────────────── @@ -280,15 +357,49 @@ def heartbeat_tick(): "previous_tick": last_tick.get("tick_id", "none"), } - # DECIDE + ACT: check for problems - actions = [] - if not perception.get("gitea_alive"): - actions.append("ALERT: Gitea unreachable") - health = perception.get("model_health", {}) - if isinstance(health, dict) and not health.get("ollama_running"): - actions.append("ALERT: Ollama not running") - - tick_record["actions"] = actions + # DECIDE: let hermes4:14b reason about what to do + decide_prompt = ( + f"System state at {now.isoformat()}:\n\n" + f"{json.dumps(perception, indent=2)}\n\n" + f"Previous tick: {last_tick.get('tick_id', 'none')}\n\n" + "You are the heartbeat monitor. Based on this state:\n" + "1. List any actions needed (alerts, restarts, escalations). Empty if all OK.\n" + "2. Rate severity: ok, warning, or critical.\n" + "3. One sentence of reasoning.\n\n" + 'Respond ONLY with JSON: {"actions": [], "severity": "ok", "reasoning": "..."}' + ) + + decision = None + try: + raw = hermes_local(decide_prompt, caller_tag="heartbeat_tick") + if raw: + # Model might wrap JSON in markdown, extract first { line + for line in raw.split("\n"): + line = line.strip() + if line.startswith("{"): + decision = json.loads(line) + break + if not decision: + decision = json.loads(raw) + except (json.JSONDecodeError, Exception): + decision = None + + # Fallback to hardcoded logic if model fails or is down + if decision is None: + actions = [] + if not perception.get("gitea_alive"): + actions.append("ALERT: Gitea unreachable") + health = perception.get("model_health", {}) + if isinstance(health, dict) and not health.get("ollama_running"): + actions.append("ALERT: Ollama not running") + decision = { + "actions": actions, + "severity": "fallback", + "reasoning": "model unavailable, used hardcoded checks", + } + + tick_record["decision"] = decision + actions = decision.get("actions", []) # Save tick last_tick_file.write_text(json.dumps(tick_record, indent=2))