Compare commits

..

9 Commits

Author SHA1 Message Date
Alexander Whitestone
fe2805b458 [gemini] Huey retrospective — 1-hour check-in and test plan (#8) 2026-03-27 11:20:41 -04:00
Alexander Whitestone
f46a4826d9 config: update config.yaml 2026-03-27 11:00:31 -04:00
Alexander Whitestone
3b1763ce4c config: update config.yaml 2026-03-27 00:00:30 -04:00
Alexander Whitestone
78f5216540 config: update config.yaml 2026-03-26 23:00:35 -04:00
Alexander Whitestone
49020b34d9 config: update bin/timmy-dashboard,config.yaml,docs/local-model-integration-sketch.md,tasks.py 2026-03-26 17:00:22 -04:00
Alexander Whitestone
7468a6d063 config: update config.yaml 2026-03-26 13:00:29 -04:00
Alexander Whitestone
f9155b28e3 v1.0 rejected — NaN from wrong tokenizer, Morrowind MCP pipeline working 2026-03-26 12:32:08 -04:00
Alexander Whitestone
16675abd79 config: update config.yaml 2026-03-26 12:00:46 -04:00
Alexander Whitestone
1fce489364 Add adapter manifest — version control for trained models
Only version adapters (~40MB each), never base models.
Base models are reproducible HuggingFace downloads referenced by path.
Manifest records: base, data, training config, eval results, status.

History: v0 through v0.2 on 8B (crisis gated, retired/rejected).
Active: v1.0 training now on Hermes4-14B-4bit.
2026-03-26 11:44:29 -04:00
5 changed files with 886 additions and 17 deletions

62
autolora/manifest.yaml Normal file
View File

@@ -0,0 +1,62 @@
# Timmy Adapter Manifest
# Only version adapters, never base models. Base models are reproducible downloads.
# Adapters are the diff. The manifest is the record.
bases:
hermes3-8b-4bit:
source: mlx-community/Hermes-3-Llama-3.1-8B-4bit
local: ~/models/Hermes-3-Llama-3.1-8B-4bit
arch: llama3
params: 8B
quant: 4-bit MLX
hermes4-14b-4bit:
source: mlx-community/Hermes-4-14B-4bit
local: ~/models/hermes4-14b-mlx
arch: qwen3
params: 14.8B
quant: 4-bit MLX
adapters:
timmy-v0:
base: hermes3-8b-4bit
date: 2026-03-24
status: retired
data: 1154 sessions (technical only, no crisis/pastoral)
training: { lr: 2e-6, rank: 8, iters: 1000, best_iter: 800, val_loss: 2.134 }
eval: { identity: PASS, sovereignty: PASS, coding: PASS, crisis: FAIL, faith: FAIL }
notes: "First adapter. Crisis fails — data was 99% technical. Sacred rule: REJECTED."
timmy-v0-nan-run1:
base: hermes3-8b-4bit
date: 2026-03-24
status: rejected
notes: "NaN at iter 70. lr=1e-5 too high for 4-bit. Dead on arrival."
timmy-v0.1:
base: hermes3-8b-4bit
date: 2026-03-25
status: retired
data: 1203 train / 135 valid (enriched with 49 crisis/faith synthetic)
training: { lr: 5e-6, rank: 8, iters: 600, val_loss: 2.026 }
eval: { identity: PASS, sovereignty: PASS, coding: PASS, crisis: PARTIAL, faith: FAIL }
notes: "Crisis partial — mentions seeking help but no 988/gospel. Rank 8 can't override base priors."
timmy-v0.2:
base: hermes3-8b-4bit
date: 2026-03-25
status: rejected
data: 1214 train / 141 valid (12 targeted crisis/faith examples, 5x duplicated)
training: { lr: 5e-6, rank: 16, iters: 800 }
eval: "NaN at iter 100. Rank 16 + lr 5e-6 unstable on 4-bit."
notes: "Dead. Halve lr when doubling rank."
# NEXT
timmy-v1.0:
base: hermes4-14b-4bit
date: 2026-03-26
status: rejected
data: 1125 train / 126 valid (same curated set, reused from 8B — NOT re-tokenized)
training: { lr: 1e-6, rank: 16, iters: 800 }
eval: "Val NaN iter 100, train NaN iter 160. Dead."
notes: "Data was pre-truncated for Llama3 tokenizer, not Qwen3. Must re-run clean_data.py with 14B tokenizer before v1.1."

252
bin/timmy-dashboard Executable file
View File

@@ -0,0 +1,252 @@
#!/usr/bin/env python3
"""Timmy Model Dashboard — where are my models, what are they doing.
Usage:
timmy-dashboard # one-shot
timmy-dashboard --watch # live refresh every 30s
timmy-dashboard --hours=48 # look back 48h
"""
import json
import os
import subprocess
import sys
import time
import urllib.request
from datetime import datetime, timezone, timedelta
from pathlib import Path
HERMES_HOME = Path.home() / ".hermes"
TIMMY_HOME = Path.home() / ".timmy"
METRICS_DIR = TIMMY_HOME / "metrics"
# ── Data Sources ──────────────────────────────────────────────────────
def get_ollama_models():
try:
req = urllib.request.Request("http://localhost:11434/api/tags")
with urllib.request.urlopen(req, timeout=5) as resp:
return json.loads(resp.read()).get("models", [])
except Exception:
return []
def get_loaded_models():
try:
req = urllib.request.Request("http://localhost:11434/api/ps")
with urllib.request.urlopen(req, timeout=5) as resp:
return json.loads(resp.read()).get("models", [])
except Exception:
return []
def get_huey_pid():
try:
r = subprocess.run(["pgrep", "-f", "huey_consumer"],
capture_output=True, text=True, timeout=5)
return r.stdout.strip().split("\n")[0] if r.returncode == 0 else None
except Exception:
return None
def get_hermes_sessions():
sessions_file = HERMES_HOME / "sessions" / "sessions.json"
if not sessions_file.exists():
return []
try:
data = json.loads(sessions_file.read_text())
return list(data.values())
except Exception:
return []
def get_heartbeat_ticks(date_str=None):
if not date_str:
date_str = datetime.now().strftime("%Y%m%d")
tick_file = TIMMY_HOME / "heartbeat" / f"ticks_{date_str}.jsonl"
if not tick_file.exists():
return []
ticks = []
for line in tick_file.read_text().strip().split("\n"):
if not line.strip():
continue
try:
ticks.append(json.loads(line))
except Exception:
continue
return ticks
def get_local_metrics(hours=24):
"""Read local inference metrics from jsonl files."""
records = []
cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
if not METRICS_DIR.exists():
return records
for f in sorted(METRICS_DIR.glob("local_*.jsonl")):
for line in f.read_text().strip().split("\n"):
if not line.strip():
continue
try:
r = json.loads(line)
ts = datetime.fromisoformat(r["timestamp"])
if ts >= cutoff:
records.append(r)
except Exception:
continue
return records
def get_cron_jobs():
"""Get Hermes cron job status."""
try:
r = subprocess.run(
["hermes", "cron", "list", "--json"],
capture_output=True, text=True, timeout=10
)
if r.returncode == 0:
return json.loads(r.stdout).get("jobs", [])
except Exception:
pass
return []
# ── Rendering ─────────────────────────────────────────────────────────
DIM = "\033[2m"
BOLD = "\033[1m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
RED = "\033[31m"
CYAN = "\033[36m"
RST = "\033[0m"
CLR = "\033[2J\033[H"
def render(hours=24):
models = get_ollama_models()
loaded = get_loaded_models()
huey_pid = get_huey_pid()
ticks = get_heartbeat_ticks()
metrics = get_local_metrics(hours)
sessions = get_hermes_sessions()
loaded_names = {m.get("name", "") for m in loaded}
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(CLR, end="")
print(f"{BOLD}{'=' * 70}")
print(f" TIMMY MODEL DASHBOARD")
print(f" {now} | Huey: {GREEN}PID {huey_pid}{RST if huey_pid else f'{RED}DOWN{RST}'}")
print(f"{'=' * 70}{RST}")
# ── LOCAL MODELS ──
print(f"\n {BOLD}LOCAL MODELS (Ollama){RST}")
print(f" {DIM}{'-' * 55}{RST}")
if models:
for m in models:
name = m.get("name", "?")
size_gb = m.get("size", 0) / 1e9
if name in loaded_names:
status = f"{GREEN}IN VRAM{RST}"
else:
status = f"{DIM}on disk{RST}"
print(f" {name:35s} {size_gb:5.1f}GB {status}")
else:
print(f" {RED}(Ollama not responding){RST}")
# ── LOCAL INFERENCE ACTIVITY ──
print(f"\n {BOLD}LOCAL INFERENCE ({len(metrics)} calls, last {hours}h){RST}")
print(f" {DIM}{'-' * 55}{RST}")
if metrics:
by_caller = {}
for r in metrics:
caller = r.get("caller", "unknown")
if caller not in by_caller:
by_caller[caller] = {"count": 0, "success": 0, "errors": 0}
by_caller[caller]["count"] += 1
if r.get("success"):
by_caller[caller]["success"] += 1
else:
by_caller[caller]["errors"] += 1
for caller, stats in by_caller.items():
err = f" {RED}err:{stats['errors']}{RST}" if stats["errors"] else ""
print(f" {caller:25s} calls:{stats['count']:4d} "
f"{GREEN}ok:{stats['success']}{RST}{err}")
by_model = {}
for r in metrics:
model = r.get("model", "unknown")
by_model[model] = by_model.get(model, 0) + 1
print(f"\n {DIM}Models used:{RST}")
for model, count in sorted(by_model.items(), key=lambda x: -x[1]):
print(f" {model:30s} {count} calls")
else:
print(f" {DIM}(no local calls recorded yet){RST}")
# ── HEARTBEAT STATUS ──
print(f"\n {BOLD}HEARTBEAT ({len(ticks)} ticks today){RST}")
print(f" {DIM}{'-' * 55}{RST}")
if ticks:
last = ticks[-1]
decision = last.get("decision", last.get("actions", {}))
if isinstance(decision, dict):
severity = decision.get("severity", "unknown")
reasoning = decision.get("reasoning", "")
sev_color = GREEN if severity == "ok" else YELLOW if severity == "warning" else RED
print(f" Last tick: {last.get('tick_id', '?')}")
print(f" Severity: {sev_color}{severity}{RST}")
if reasoning:
print(f" Reasoning: {reasoning[:65]}")
else:
print(f" Last tick: {last.get('tick_id', '?')}")
actions = last.get("actions", [])
print(f" Actions: {actions if actions else 'none'}")
model_decisions = sum(1 for t in ticks
if isinstance(t.get("decision"), dict)
and t["decision"].get("severity") != "fallback")
fallback = len(ticks) - model_decisions
print(f" {CYAN}Model: {model_decisions}{RST} | {DIM}Fallback: {fallback}{RST}")
else:
print(f" {DIM}(no ticks today){RST}")
# ── HERMES SESSIONS ──
local_sessions = [s for s in sessions
if "localhost:11434" in str(s.get("base_url", ""))]
cloud_sessions = [s for s in sessions if s not in local_sessions]
print(f"\n {BOLD}HERMES SESSIONS{RST}")
print(f" {DIM}{'-' * 55}{RST}")
print(f" Total: {len(sessions)} | "
f"{GREEN}Local: {len(local_sessions)}{RST} | "
f"{YELLOW}Cloud: {len(cloud_sessions)}{RST}")
# ── ACTIVE LOOPS ──
print(f"\n {BOLD}ACTIVE LOOPS{RST}")
print(f" {DIM}{'-' * 55}{RST}")
print(f" {CYAN}heartbeat_tick{RST} 10m hermes4:14b DECIDE phase")
print(f" {DIM}model_health{RST} 5m (local check) Ollama ping")
print(f" {DIM}gemini_worker{RST} 20m gemini-2.5-pro aider")
print(f" {DIM}grok_worker{RST} 20m grok-3-fast opencode")
print(f" {DIM}cross_review{RST} 30m gemini+grok PR review")
print(f"\n{BOLD}{'=' * 70}{RST}")
print(f" {DIM}Refresh: timmy-dashboard --watch | History: --hours=N{RST}")
if __name__ == "__main__":
watch = "--watch" in sys.argv
hours = 24
for a in sys.argv[1:]:
if a.startswith("--hours="):
hours = int(a.split("=")[1])
if watch:
try:
while True:
render(hours)
time.sleep(30)
except KeyboardInterrupt:
print(f"\n{DIM}Dashboard stopped.{RST}")
else:
render(hours)

View File

@@ -1,11 +1,13 @@
model:
default: claude-sonnet-4-6
provider: anthropic
default: hermes4
provider: custom
base_url: http://localhost:8081/v1
context_length: 65536
toolsets:
- all
agent:
max_turns: 30
reasoning_effort: medium
reasoning_effort: xhigh
verbose: false
terminal:
backend: local
@@ -184,11 +186,15 @@ custom_providers:
- name: Local Ollama
base_url: http://localhost:11434/v1
api_key: ollama
model: glm-4.7-flash:latest
model: hermes4:14b
- name: Google Gemini
base_url: https://generativelanguage.googleapis.com/v1beta/openai
api_key_env: GEMINI_API_KEY
model: gemini-2.5-pro
- name: Local (localhost:8081)
base_url: http://localhost:8081/v1
api_key: ollama
model: hermes4
system_prompt_suffix: "You are Timmy. Your soul is defined in SOUL.md \u2014 read\
\ it, live it.\nYou run locally on your owner's machine via Ollama. You never phone\
\ home.\nYou speak plainly. You prefer short sentences. Brevity is a kindness.\n\
@@ -202,12 +208,12 @@ providers:
base_url: http://localhost:11434/v1
model: hermes3:latest
mcp_servers:
orchestration:
command: /Users/apayne/.hermes/hermes-agent/venv/bin/python3
morrowind:
command: python3
args:
- /Users/apayne/.hermes/hermes-agent/tools/orchestration_mcp_server.py
- /Users/apayne/.timmy/morrowind/mcp_server.py
env: {}
timeout: 120
timeout: 30
fallback_model:
provider: custom
model: gemini-2.5-pro

View File

@@ -0,0 +1,438 @@
# Local Model Integration Sketch v2
# Hermes4-14B in the Heartbeat Loop — No New Telemetry
## Principle
No new inference layer. Huey tasks call `hermes chat -q` pointed at
Ollama. Hermes handles sessions, token tracking, cost logging.
The dashboard reads what Hermes already stores.
---
## Why Not Ollama Directly?
Ollama is fine as a serving backend. The issue isn't Ollama — it's that
calling Ollama directly with urllib bypasses the harness. The harness
already tracks sessions, tokens, model/provider, platform. Building a
second telemetry layer is owning code we don't need.
Ollama as a named provider isn't wired into the --provider flag yet,
but routing works via env vars:
HERMES_MODEL="hermes4:14b" \
HERMES_PROVIDER="custom" \
HERMES_BASE_URL="http://localhost:11434/v1" \
hermes chat -q "prompt here" -Q
This creates a tracked session, logs tokens, and returns the response.
That's our local inference call.
### Alternatives to Ollama for serving:
- **llama.cpp server** — lighter, no Python, raw HTTP. Good for single
model serving. Less convenient for model switching.
- **vLLM** — best throughput, but needs NVIDIA GPU. Not for M3 Mac.
- **MLX serving** — native Apple Silicon, but no OpenAI-compat API yet.
MLX is for training, not serving (our current policy).
- **llamafile** — single binary, portable. Good for distribution.
Verdict: Ollama is fine. It's the standard OpenAI-compat local server
on Mac. The issue was never Ollama — it was bypassing the harness.
---
## 1. The Call Pattern
One function in tasks.py that all Huey tasks use:
```python
import subprocess
import json
HERMES_BIN = "hermes"
LOCAL_ENV = {
"HERMES_MODEL": "hermes4:14b",
"HERMES_PROVIDER": "custom",
"HERMES_BASE_URL": "http://localhost:11434/v1",
}
def hermes_local(prompt, caller_tag=None, max_retries=2):
"""Call hermes with local Ollama model. Returns response text.
Every call creates a hermes session with full telemetry.
caller_tag gets prepended to prompt for searchability.
"""
import os
env = os.environ.copy()
env.update(LOCAL_ENV)
tagged_prompt = prompt
if caller_tag:
tagged_prompt = f"[{caller_tag}] {prompt}"
for attempt in range(max_retries + 1):
try:
result = subprocess.run(
[HERMES_BIN, "chat", "-q", tagged_prompt, "-Q", "-t", "none"],
capture_output=True, text=True,
timeout=120, env=env,
)
if result.returncode == 0 and result.stdout.strip():
# Strip the session_id line from -Q output
lines = result.stdout.strip().split("\n")
response_lines = [l for l in lines if not l.startswith("session_id:")]
return "\n".join(response_lines).strip()
except subprocess.TimeoutExpired:
if attempt == max_retries:
return None
continue
return None
```
Notes:
- `-t none` disables all toolsets — the heartbeat model shouldn't
have terminal/file access. Pure reasoning only.
- `-Q` quiet mode suppresses banner/spinner, gives clean output.
- Every call creates a session in Hermes session store. Searchable,
exportable, countable.
- The `[caller_tag]` prefix lets you filter sessions by which Huey
task generated them: `hermes sessions list | grep heartbeat`
---
## 2. Heartbeat DECIDE Phase
Replace the hardcoded if/else with a model call:
```python
# In heartbeat_tick(), replace the DECIDE + ACT section:
# DECIDE: let hermes4:14b reason about what to do
decide_prompt = f"""System state at {now.isoformat()}:
{json.dumps(perception, indent=2)}
Previous tick: {last_tick.get('tick_id', 'none')}
You are the heartbeat monitor. Based on this state:
1. List any actions needed (alerts, restarts, escalations). Empty if all OK.
2. Rate severity: ok, warning, or critical.
3. One sentence of reasoning.
Respond ONLY with JSON:
{{"actions": [], "severity": "ok", "reasoning": "..."}}"""
decision = None
try:
raw = hermes_local(decide_prompt, caller_tag="heartbeat_tick")
if raw:
# Try to parse JSON from the response
# Model might wrap it in markdown, so extract
for line in raw.split("\n"):
line = line.strip()
if line.startswith("{"):
decision = json.loads(line)
break
if not decision:
decision = json.loads(raw)
except (json.JSONDecodeError, Exception) as e:
decision = None
# Fallback to hardcoded logic if model fails or is down
if decision is None:
actions = []
if not perception.get("gitea_alive"):
actions.append("ALERT: Gitea unreachable")
health = perception.get("model_health", {})
if isinstance(health, dict) and not health.get("ollama_running"):
actions.append("ALERT: Ollama not running")
decision = {
"actions": actions,
"severity": "fallback",
"reasoning": "model unavailable, used hardcoded checks"
}
tick_record["decision"] = decision
actions = decision.get("actions", [])
```
---
## 3. DPO Candidate Collection
No new database. Hermes sessions ARE the DPO candidates.
Every `hermes_local()` call creates a session. To extract DPO pairs:
```bash
# Export all local-model sessions
hermes sessions export --output /tmp/local-sessions.jsonl
# Filter for heartbeat decisions
grep "heartbeat_tick" /tmp/local-sessions.jsonl > heartbeat_decisions.jsonl
```
The existing `session_export` Huey task (runs every 4h) already extracts
user→assistant pairs. It just needs to be aware that some sessions are
now local-model decisions instead of human conversations.
For DPO annotation, add a simple review script:
```python
# review_decisions.py — reads heartbeat tick logs, shows model decisions,
# asks Alexander to mark chosen/rejected
# Writes annotations back to the tick log files
import json
from pathlib import Path
TICK_DIR = Path.home() / ".timmy" / "heartbeat"
for log_file in sorted(TICK_DIR.glob("ticks_*.jsonl")):
for line in log_file.read_text().strip().split("\n"):
tick = json.loads(line)
decision = tick.get("decision", {})
if decision.get("severity") == "fallback":
continue # skip fallback entries
print(f"\n--- Tick {tick['tick_id']} ---")
print(f"Perception: {json.dumps(tick['perception'], indent=2)}")
print(f"Decision: {json.dumps(decision, indent=2)}")
rating = input("Rate (c=chosen, r=rejected, s=skip): ").strip()
if rating in ("c", "r"):
tick["dpo_label"] = "chosen" if rating == "c" else "rejected"
# write back... (append to annotated file)
```
---
## 4. Dashboard — Reads Hermes Data
```python
#!/usr/bin/env python3
"""Timmy Model Dashboard — reads from Hermes, owns nothing."""
import json
import os
import subprocess
import sys
import time
import urllib.request
from datetime import datetime
from pathlib import Path
HERMES_HOME = Path.home() / ".hermes"
TIMMY_HOME = Path.home() / ".timmy"
def get_ollama_models():
"""What's available in Ollama."""
try:
req = urllib.request.Request("http://localhost:11434/api/tags")
with urllib.request.urlopen(req, timeout=5) as resp:
return json.loads(resp.read()).get("models", [])
except Exception:
return []
def get_loaded_models():
"""What's actually in VRAM right now."""
try:
req = urllib.request.Request("http://localhost:11434/api/ps")
with urllib.request.urlopen(req, timeout=5) as resp:
return json.loads(resp.read()).get("models", [])
except Exception:
return []
def get_huey_status():
try:
r = subprocess.run(["pgrep", "-f", "huey_consumer"],
capture_output=True, timeout=5)
return r.returncode == 0
except Exception:
return False
def get_hermes_sessions(hours=24):
"""Read session metadata from Hermes session store."""
sessions_file = HERMES_HOME / "sessions" / "sessions.json"
if not sessions_file.exists():
return []
try:
data = json.loads(sessions_file.read_text())
return list(data.values())
except Exception:
return []
def get_heartbeat_ticks(date_str=None):
"""Read today's heartbeat ticks."""
if not date_str:
date_str = datetime.now().strftime("%Y%m%d")
tick_file = TIMMY_HOME / "heartbeat" / f"ticks_{date_str}.jsonl"
if not tick_file.exists():
return []
ticks = []
for line in tick_file.read_text().strip().split("\n"):
try:
ticks.append(json.loads(line))
except Exception:
continue
return ticks
def render(hours=24):
models = get_ollama_models()
loaded = get_loaded_models()
huey = get_huey_status()
sessions = get_hermes_sessions(hours)
ticks = get_heartbeat_ticks()
loaded_names = {m.get("name", "") for m in loaded}
print("\033[2J\033[H")
print("=" * 70)
print(" TIMMY MODEL DASHBOARD")
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f" {now} | Huey: {'UP' if huey else 'DOWN'} | Ollama models: {len(models)}")
print("=" * 70)
# DEPLOYMENTS
print("\n LOCAL MODELS")
print(" " + "-" * 55)
for m in models:
name = m.get("name", "?")
size_gb = m.get("size", 0) / 1e9
status = "IN VRAM" if name in loaded_names else "on disk"
print(f" {name:35s} {size_gb:5.1f}GB {status}")
if not models:
print(" (Ollama not responding)")
# HERMES SESSION ACTIVITY
# Count sessions by platform/provider
print(f"\n HERMES SESSIONS (recent)")
print(" " + "-" * 55)
local_sessions = [s for s in sessions
if "localhost" in str(s.get("origin", {}))]
cli_sessions = [s for s in sessions
if s.get("platform") == "cli" or s.get("origin", {}).get("platform") == "cli"]
total_tokens = sum(s.get("total_tokens", 0) for s in sessions)
print(f" Total sessions: {len(sessions)}")
print(f" CLI sessions: {len(cli_sessions)}")
print(f" Total tokens: {total_tokens:,}")
# HEARTBEAT STATUS
print(f"\n HEARTBEAT ({len(ticks)} ticks today)")
print(" " + "-" * 55)
if ticks:
last = ticks[-1]
decision = last.get("decision", {})
severity = decision.get("severity", "unknown")
reasoning = decision.get("reasoning", "no model decision yet")
print(f" Last tick: {last.get('tick_id', '?')}")
print(f" Severity: {severity}")
print(f" Reasoning: {reasoning[:60]}")
# Count model vs fallback decisions
model_decisions = sum(1 for t in ticks
if t.get("decision", {}).get("severity") != "fallback")
fallback = len(ticks) - model_decisions
print(f" Model decisions: {model_decisions} | Fallback: {fallback}")
# DPO labels if any
labeled = sum(1 for t in ticks if "dpo_label" in t)
if labeled:
chosen = sum(1 for t in ticks if t.get("dpo_label") == "chosen")
rejected = sum(1 for t in ticks if t.get("dpo_label") == "rejected")
print(f" DPO labeled: {labeled} (chosen: {chosen}, rejected: {rejected})")
else:
print(" (no ticks today)")
# ACTIVE LOOPS
print(f"\n ACTIVE LOOPS USING LOCAL MODELS")
print(" " + "-" * 55)
print(" heartbeat_tick 10m hermes4:14b DECIDE phase")
print(" (future) 15m hermes4:14b issue triage")
print(" (future) daily timmy:v0.1 morning report")
print(f"\n NON-LOCAL LOOPS (Gemini/Grok API)")
print(" " + "-" * 55)
print(" gemini_worker 20m gemini-2.5-pro aider")
print(" grok_worker 20m grok-3-fast opencode")
print(" cross_review 30m both PR review")
print("\n" + "=" * 70)
if __name__ == "__main__":
watch = "--watch" in sys.argv
hours = 24
for a in sys.argv[1:]:
if a.startswith("--hours="):
hours = int(a.split("=")[1])
if watch:
while True:
render(hours)
time.sleep(30)
else:
render(hours)
```
---
## 5. Implementation Steps
### Step 1: Add hermes_local() to tasks.py
- One function, ~20 lines
- Calls `hermes chat -q` with Ollama env vars
- All telemetry comes from Hermes for free
### Step 2: Wire heartbeat_tick DECIDE phase
- Replace 6 lines of if/else with hermes_local() call
- Keep hardcoded fallback when model is down
- Decision stored in tick record for DPO review
### Step 3: Fix the MCP server warning
- The orchestration MCP server path is broken — harmless but noisy
- Either fix the path or remove from config
### Step 4: Drop model_dashboard.py in timmy-config/bin/
- Reads Ollama API, Hermes sessions, heartbeat ticks
- No new data stores — just views over existing ones
- `python3 model_dashboard.py --watch` for live view
### Step 5: Expand to more Huey tasks
- triage_issues: model reads issue, picks agent
- good_morning_report: model writes the "From Timmy" section
- Each expansion is just calling hermes_local() with a different prompt
---
## What Gets Hotfixed in Hermes Config
If `hermes insights` is broken (the cache_read_tokens column error),
that needs a fix. The dashboard falls back to reading sessions.json
directly, but insights would be the better data source.
The `providers.ollama` section in config.yaml exists but isn't wired
to the --provider flag. Filing this upstream or patching locally would
let us do `hermes chat -q "..." --provider ollama` cleanly instead
of relying on env vars. Not blocking — env vars work today.
---
## What This Owns
- hermes_local() — 20-line wrapper around a subprocess call
- model_dashboard.py — read-only views over existing data
- review_decisions.py — optional DPO annotation CLI
## What This Does NOT Own
- Inference. Ollama does that.
- Telemetry. Hermes does that.
- Session storage. Hermes does that.
- Token counting. Hermes does that.
- Training pipeline. Already exists in timmy-config/training/.

129
tasks.py
View File

@@ -14,12 +14,89 @@ from gitea_client import GiteaClient
HERMES_HOME = Path.home() / ".hermes"
TIMMY_HOME = Path.home() / ".timmy"
HERMES_AGENT_DIR = HERMES_HOME / "hermes-agent"
METRICS_DIR = TIMMY_HOME / "metrics"
REPOS = [
"Timmy_Foundation/the-nexus",
"Timmy_Foundation/timmy-config",
]
NET_LINE_LIMIT = 10
# ── Local Model Inference via Hermes Harness ─────────────────────────
HEARTBEAT_MODEL = "hermes4:14b"
FALLBACK_MODEL = "hermes3:8b"
def hermes_local(prompt, model=None, caller_tag=None):
"""Call a local Ollama model through the Hermes harness.
Uses provider="local-ollama" which routes through the custom_providers
entry in config.yaml → Ollama at localhost:11434.
Returns response text or None on failure.
Every call creates a Hermes session with telemetry.
"""
_model = model or HEARTBEAT_MODEL
tagged = f"[{caller_tag}] {prompt}" if caller_tag else prompt
# Import hermes cli.main directly — no subprocess, no env vars
_agent_dir = str(HERMES_AGENT_DIR)
if _agent_dir not in sys.path:
sys.path.insert(0, _agent_dir)
old_cwd = os.getcwd()
os.chdir(_agent_dir)
try:
from cli import main as hermes_main
import io
from contextlib import redirect_stdout, redirect_stderr
buf = io.StringIO()
err = io.StringIO()
with redirect_stdout(buf), redirect_stderr(err):
hermes_main(
query=tagged,
model=_model,
provider="local-ollama",
quiet=True,
)
output = buf.getvalue().strip()
# Strip session_id line from quiet output
lines = [l for l in output.split("\n") if not l.startswith("session_id:")]
response = "\n".join(lines).strip()
# Log to metrics jsonl
METRICS_DIR.mkdir(parents=True, exist_ok=True)
metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl"
record = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"model": _model,
"caller": caller_tag or "unknown",
"prompt_len": len(prompt),
"response_len": len(response),
"success": bool(response),
}
with open(metrics_file, "a") as f:
f.write(json.dumps(record) + "\n")
return response if response else None
except Exception as e:
# Log failure
METRICS_DIR.mkdir(parents=True, exist_ok=True)
metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl"
record = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"model": _model,
"caller": caller_tag or "unknown",
"error": str(e),
"success": False,
}
with open(metrics_file, "a") as f:
f.write(json.dumps(record) + "\n")
return None
finally:
os.chdir(old_cwd)
# ── Existing: Orchestration ──────────────────────────────────────────
@@ -280,15 +357,49 @@ def heartbeat_tick():
"previous_tick": last_tick.get("tick_id", "none"),
}
# DECIDE + ACT: check for problems
actions = []
if not perception.get("gitea_alive"):
actions.append("ALERT: Gitea unreachable")
health = perception.get("model_health", {})
if isinstance(health, dict) and not health.get("ollama_running"):
actions.append("ALERT: Ollama not running")
tick_record["actions"] = actions
# DECIDE: let hermes4:14b reason about what to do
decide_prompt = (
f"System state at {now.isoformat()}:\n\n"
f"{json.dumps(perception, indent=2)}\n\n"
f"Previous tick: {last_tick.get('tick_id', 'none')}\n\n"
"You are the heartbeat monitor. Based on this state:\n"
"1. List any actions needed (alerts, restarts, escalations). Empty if all OK.\n"
"2. Rate severity: ok, warning, or critical.\n"
"3. One sentence of reasoning.\n\n"
'Respond ONLY with JSON: {"actions": [], "severity": "ok", "reasoning": "..."}'
)
decision = None
try:
raw = hermes_local(decide_prompt, caller_tag="heartbeat_tick")
if raw:
# Model might wrap JSON in markdown, extract first { line
for line in raw.split("\n"):
line = line.strip()
if line.startswith("{"):
decision = json.loads(line)
break
if not decision:
decision = json.loads(raw)
except (json.JSONDecodeError, Exception):
decision = None
# Fallback to hardcoded logic if model fails or is down
if decision is None:
actions = []
if not perception.get("gitea_alive"):
actions.append("ALERT: Gitea unreachable")
health = perception.get("model_health", {})
if isinstance(health, dict) and not health.get("ollama_running"):
actions.append("ALERT: Ollama not running")
decision = {
"actions": actions,
"severity": "fallback",
"reasoning": "model unavailable, used hardcoded checks",
}
tick_record["decision"] = decision
actions = decision.get("actions", [])
# Save tick
last_tick_file.write_text(json.dumps(tick_record, indent=2))