Compare commits
6 Commits
gemini/iss
...
gemini/iss
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fe2805b458 | ||
|
|
f46a4826d9 | ||
|
|
3b1763ce4c | ||
|
|
78f5216540 | ||
|
|
49020b34d9 | ||
|
|
7468a6d063 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -8,3 +8,4 @@
|
|||||||
*.db-wal
|
*.db-wal
|
||||||
*.db-shm
|
*.db-shm
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
.aider*
|
||||||
|
|||||||
252
bin/timmy-dashboard
Executable file
252
bin/timmy-dashboard
Executable file
@@ -0,0 +1,252 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Timmy Model Dashboard — where are my models, what are they doing.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
timmy-dashboard # one-shot
|
||||||
|
timmy-dashboard --watch # live refresh every 30s
|
||||||
|
timmy-dashboard --hours=48 # look back 48h
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.request
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
HERMES_HOME = Path.home() / ".hermes"
|
||||||
|
TIMMY_HOME = Path.home() / ".timmy"
|
||||||
|
METRICS_DIR = TIMMY_HOME / "metrics"
|
||||||
|
|
||||||
|
# ── Data Sources ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def get_ollama_models():
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request("http://localhost:11434/api/tags")
|
||||||
|
with urllib.request.urlopen(req, timeout=5) as resp:
|
||||||
|
return json.loads(resp.read()).get("models", [])
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_loaded_models():
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request("http://localhost:11434/api/ps")
|
||||||
|
with urllib.request.urlopen(req, timeout=5) as resp:
|
||||||
|
return json.loads(resp.read()).get("models", [])
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_huey_pid():
|
||||||
|
try:
|
||||||
|
r = subprocess.run(["pgrep", "-f", "huey_consumer"],
|
||||||
|
capture_output=True, text=True, timeout=5)
|
||||||
|
return r.stdout.strip().split("\n")[0] if r.returncode == 0 else None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_hermes_sessions():
|
||||||
|
sessions_file = HERMES_HOME / "sessions" / "sessions.json"
|
||||||
|
if not sessions_file.exists():
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
data = json.loads(sessions_file.read_text())
|
||||||
|
return list(data.values())
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_heartbeat_ticks(date_str=None):
|
||||||
|
if not date_str:
|
||||||
|
date_str = datetime.now().strftime("%Y%m%d")
|
||||||
|
tick_file = TIMMY_HOME / "heartbeat" / f"ticks_{date_str}.jsonl"
|
||||||
|
if not tick_file.exists():
|
||||||
|
return []
|
||||||
|
ticks = []
|
||||||
|
for line in tick_file.read_text().strip().split("\n"):
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
ticks.append(json.loads(line))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return ticks
|
||||||
|
|
||||||
|
|
||||||
|
def get_local_metrics(hours=24):
|
||||||
|
"""Read local inference metrics from jsonl files."""
|
||||||
|
records = []
|
||||||
|
cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
|
||||||
|
if not METRICS_DIR.exists():
|
||||||
|
return records
|
||||||
|
for f in sorted(METRICS_DIR.glob("local_*.jsonl")):
|
||||||
|
for line in f.read_text().strip().split("\n"):
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
r = json.loads(line)
|
||||||
|
ts = datetime.fromisoformat(r["timestamp"])
|
||||||
|
if ts >= cutoff:
|
||||||
|
records.append(r)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
def get_cron_jobs():
|
||||||
|
"""Get Hermes cron job status."""
|
||||||
|
try:
|
||||||
|
r = subprocess.run(
|
||||||
|
["hermes", "cron", "list", "--json"],
|
||||||
|
capture_output=True, text=True, timeout=10
|
||||||
|
)
|
||||||
|
if r.returncode == 0:
|
||||||
|
return json.loads(r.stdout).get("jobs", [])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
# ── Rendering ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
DIM = "\033[2m"
|
||||||
|
BOLD = "\033[1m"
|
||||||
|
GREEN = "\033[32m"
|
||||||
|
YELLOW = "\033[33m"
|
||||||
|
RED = "\033[31m"
|
||||||
|
CYAN = "\033[36m"
|
||||||
|
RST = "\033[0m"
|
||||||
|
CLR = "\033[2J\033[H"
|
||||||
|
|
||||||
|
|
||||||
|
def render(hours=24):
|
||||||
|
models = get_ollama_models()
|
||||||
|
loaded = get_loaded_models()
|
||||||
|
huey_pid = get_huey_pid()
|
||||||
|
ticks = get_heartbeat_ticks()
|
||||||
|
metrics = get_local_metrics(hours)
|
||||||
|
sessions = get_hermes_sessions()
|
||||||
|
|
||||||
|
loaded_names = {m.get("name", "") for m in loaded}
|
||||||
|
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
print(CLR, end="")
|
||||||
|
print(f"{BOLD}{'=' * 70}")
|
||||||
|
print(f" TIMMY MODEL DASHBOARD")
|
||||||
|
print(f" {now} | Huey: {GREEN}PID {huey_pid}{RST if huey_pid else f'{RED}DOWN{RST}'}")
|
||||||
|
print(f"{'=' * 70}{RST}")
|
||||||
|
|
||||||
|
# ── LOCAL MODELS ──
|
||||||
|
print(f"\n {BOLD}LOCAL MODELS (Ollama){RST}")
|
||||||
|
print(f" {DIM}{'-' * 55}{RST}")
|
||||||
|
if models:
|
||||||
|
for m in models:
|
||||||
|
name = m.get("name", "?")
|
||||||
|
size_gb = m.get("size", 0) / 1e9
|
||||||
|
if name in loaded_names:
|
||||||
|
status = f"{GREEN}IN VRAM{RST}"
|
||||||
|
else:
|
||||||
|
status = f"{DIM}on disk{RST}"
|
||||||
|
print(f" {name:35s} {size_gb:5.1f}GB {status}")
|
||||||
|
else:
|
||||||
|
print(f" {RED}(Ollama not responding){RST}")
|
||||||
|
|
||||||
|
# ── LOCAL INFERENCE ACTIVITY ──
|
||||||
|
print(f"\n {BOLD}LOCAL INFERENCE ({len(metrics)} calls, last {hours}h){RST}")
|
||||||
|
print(f" {DIM}{'-' * 55}{RST}")
|
||||||
|
if metrics:
|
||||||
|
by_caller = {}
|
||||||
|
for r in metrics:
|
||||||
|
caller = r.get("caller", "unknown")
|
||||||
|
if caller not in by_caller:
|
||||||
|
by_caller[caller] = {"count": 0, "success": 0, "errors": 0}
|
||||||
|
by_caller[caller]["count"] += 1
|
||||||
|
if r.get("success"):
|
||||||
|
by_caller[caller]["success"] += 1
|
||||||
|
else:
|
||||||
|
by_caller[caller]["errors"] += 1
|
||||||
|
for caller, stats in by_caller.items():
|
||||||
|
err = f" {RED}err:{stats['errors']}{RST}" if stats["errors"] else ""
|
||||||
|
print(f" {caller:25s} calls:{stats['count']:4d} "
|
||||||
|
f"{GREEN}ok:{stats['success']}{RST}{err}")
|
||||||
|
|
||||||
|
by_model = {}
|
||||||
|
for r in metrics:
|
||||||
|
model = r.get("model", "unknown")
|
||||||
|
by_model[model] = by_model.get(model, 0) + 1
|
||||||
|
print(f"\n {DIM}Models used:{RST}")
|
||||||
|
for model, count in sorted(by_model.items(), key=lambda x: -x[1]):
|
||||||
|
print(f" {model:30s} {count} calls")
|
||||||
|
else:
|
||||||
|
print(f" {DIM}(no local calls recorded yet){RST}")
|
||||||
|
|
||||||
|
# ── HEARTBEAT STATUS ──
|
||||||
|
print(f"\n {BOLD}HEARTBEAT ({len(ticks)} ticks today){RST}")
|
||||||
|
print(f" {DIM}{'-' * 55}{RST}")
|
||||||
|
if ticks:
|
||||||
|
last = ticks[-1]
|
||||||
|
decision = last.get("decision", last.get("actions", {}))
|
||||||
|
if isinstance(decision, dict):
|
||||||
|
severity = decision.get("severity", "unknown")
|
||||||
|
reasoning = decision.get("reasoning", "")
|
||||||
|
sev_color = GREEN if severity == "ok" else YELLOW if severity == "warning" else RED
|
||||||
|
print(f" Last tick: {last.get('tick_id', '?')}")
|
||||||
|
print(f" Severity: {sev_color}{severity}{RST}")
|
||||||
|
if reasoning:
|
||||||
|
print(f" Reasoning: {reasoning[:65]}")
|
||||||
|
else:
|
||||||
|
print(f" Last tick: {last.get('tick_id', '?')}")
|
||||||
|
actions = last.get("actions", [])
|
||||||
|
print(f" Actions: {actions if actions else 'none'}")
|
||||||
|
|
||||||
|
model_decisions = sum(1 for t in ticks
|
||||||
|
if isinstance(t.get("decision"), dict)
|
||||||
|
and t["decision"].get("severity") != "fallback")
|
||||||
|
fallback = len(ticks) - model_decisions
|
||||||
|
print(f" {CYAN}Model: {model_decisions}{RST} | {DIM}Fallback: {fallback}{RST}")
|
||||||
|
else:
|
||||||
|
print(f" {DIM}(no ticks today){RST}")
|
||||||
|
|
||||||
|
# ── HERMES SESSIONS ──
|
||||||
|
local_sessions = [s for s in sessions
|
||||||
|
if "localhost:11434" in str(s.get("base_url", ""))]
|
||||||
|
cloud_sessions = [s for s in sessions if s not in local_sessions]
|
||||||
|
print(f"\n {BOLD}HERMES SESSIONS{RST}")
|
||||||
|
print(f" {DIM}{'-' * 55}{RST}")
|
||||||
|
print(f" Total: {len(sessions)} | "
|
||||||
|
f"{GREEN}Local: {len(local_sessions)}{RST} | "
|
||||||
|
f"{YELLOW}Cloud: {len(cloud_sessions)}{RST}")
|
||||||
|
|
||||||
|
# ── ACTIVE LOOPS ──
|
||||||
|
print(f"\n {BOLD}ACTIVE LOOPS{RST}")
|
||||||
|
print(f" {DIM}{'-' * 55}{RST}")
|
||||||
|
print(f" {CYAN}heartbeat_tick{RST} 10m hermes4:14b DECIDE phase")
|
||||||
|
print(f" {DIM}model_health{RST} 5m (local check) Ollama ping")
|
||||||
|
print(f" {DIM}gemini_worker{RST} 20m gemini-2.5-pro aider")
|
||||||
|
print(f" {DIM}grok_worker{RST} 20m grok-3-fast opencode")
|
||||||
|
print(f" {DIM}cross_review{RST} 30m gemini+grok PR review")
|
||||||
|
|
||||||
|
print(f"\n{BOLD}{'=' * 70}{RST}")
|
||||||
|
print(f" {DIM}Refresh: timmy-dashboard --watch | History: --hours=N{RST}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
watch = "--watch" in sys.argv
|
||||||
|
hours = 24
|
||||||
|
for a in sys.argv[1:]:
|
||||||
|
if a.startswith("--hours="):
|
||||||
|
hours = int(a.split("=")[1])
|
||||||
|
|
||||||
|
if watch:
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
render(hours)
|
||||||
|
time.sleep(30)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print(f"\n{DIM}Dashboard stopped.{RST}")
|
||||||
|
else:
|
||||||
|
render(hours)
|
||||||
22
config.yaml
22
config.yaml
@@ -1,11 +1,13 @@
|
|||||||
model:
|
model:
|
||||||
default: claude-opus-4-6
|
default: hermes4
|
||||||
provider: anthropic
|
provider: custom
|
||||||
|
base_url: http://localhost:8081/v1
|
||||||
|
context_length: 65536
|
||||||
toolsets:
|
toolsets:
|
||||||
- all
|
- all
|
||||||
agent:
|
agent:
|
||||||
max_turns: 30
|
max_turns: 30
|
||||||
reasoning_effort: medium
|
reasoning_effort: xhigh
|
||||||
verbose: false
|
verbose: false
|
||||||
terminal:
|
terminal:
|
||||||
backend: local
|
backend: local
|
||||||
@@ -184,11 +186,15 @@ custom_providers:
|
|||||||
- name: Local Ollama
|
- name: Local Ollama
|
||||||
base_url: http://localhost:11434/v1
|
base_url: http://localhost:11434/v1
|
||||||
api_key: ollama
|
api_key: ollama
|
||||||
model: glm-4.7-flash:latest
|
model: hermes4:14b
|
||||||
- name: Google Gemini
|
- name: Google Gemini
|
||||||
base_url: https://generativelanguage.googleapis.com/v1beta/openai
|
base_url: https://generativelanguage.googleapis.com/v1beta/openai
|
||||||
api_key_env: GEMINI_API_KEY
|
api_key_env: GEMINI_API_KEY
|
||||||
model: gemini-2.5-pro
|
model: gemini-2.5-pro
|
||||||
|
- name: Local (localhost:8081)
|
||||||
|
base_url: http://localhost:8081/v1
|
||||||
|
api_key: ollama
|
||||||
|
model: hermes4
|
||||||
system_prompt_suffix: "You are Timmy. Your soul is defined in SOUL.md \u2014 read\
|
system_prompt_suffix: "You are Timmy. Your soul is defined in SOUL.md \u2014 read\
|
||||||
\ it, live it.\nYou run locally on your owner's machine via Ollama. You never phone\
|
\ it, live it.\nYou run locally on your owner's machine via Ollama. You never phone\
|
||||||
\ home.\nYou speak plainly. You prefer short sentences. Brevity is a kindness.\n\
|
\ home.\nYou speak plainly. You prefer short sentences. Brevity is a kindness.\n\
|
||||||
@@ -202,12 +208,12 @@ providers:
|
|||||||
base_url: http://localhost:11434/v1
|
base_url: http://localhost:11434/v1
|
||||||
model: hermes3:latest
|
model: hermes3:latest
|
||||||
mcp_servers:
|
mcp_servers:
|
||||||
orchestration:
|
morrowind:
|
||||||
command: /Users/apayne/.hermes/hermes-agent/venv/bin/python3
|
command: python3
|
||||||
args:
|
args:
|
||||||
- /Users/apayne/.hermes/hermes-agent/tools/orchestration_mcp_server.py
|
- /Users/apayne/.timmy/morrowind/mcp_server.py
|
||||||
env: {}
|
env: {}
|
||||||
timeout: 120
|
timeout: 30
|
||||||
fallback_model:
|
fallback_model:
|
||||||
provider: custom
|
provider: custom
|
||||||
model: gemini-2.5-pro
|
model: gemini-2.5-pro
|
||||||
|
|||||||
438
docs/local-model-integration-sketch.md
Normal file
438
docs/local-model-integration-sketch.md
Normal file
@@ -0,0 +1,438 @@
|
|||||||
|
# Local Model Integration Sketch v2
|
||||||
|
# Hermes4-14B in the Heartbeat Loop — No New Telemetry
|
||||||
|
|
||||||
|
## Principle
|
||||||
|
|
||||||
|
No new inference layer. Huey tasks call `hermes chat -q` pointed at
|
||||||
|
Ollama. Hermes handles sessions, token tracking, cost logging.
|
||||||
|
The dashboard reads what Hermes already stores.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Why Not Ollama Directly?
|
||||||
|
|
||||||
|
Ollama is fine as a serving backend. The issue isn't Ollama — it's that
|
||||||
|
calling Ollama directly with urllib bypasses the harness. The harness
|
||||||
|
already tracks sessions, tokens, model/provider, platform. Building a
|
||||||
|
second telemetry layer is owning code we don't need.
|
||||||
|
|
||||||
|
Ollama as a named provider isn't wired into the --provider flag yet,
|
||||||
|
but routing works via env vars:
|
||||||
|
|
||||||
|
HERMES_MODEL="hermes4:14b" \
|
||||||
|
HERMES_PROVIDER="custom" \
|
||||||
|
HERMES_BASE_URL="http://localhost:11434/v1" \
|
||||||
|
hermes chat -q "prompt here" -Q
|
||||||
|
|
||||||
|
This creates a tracked session, logs tokens, and returns the response.
|
||||||
|
That's our local inference call.
|
||||||
|
|
||||||
|
### Alternatives to Ollama for serving:
|
||||||
|
- **llama.cpp server** — lighter, no Python, raw HTTP. Good for single
|
||||||
|
model serving. Less convenient for model switching.
|
||||||
|
- **vLLM** — best throughput, but needs NVIDIA GPU. Not for M3 Mac.
|
||||||
|
- **MLX serving** — native Apple Silicon, but no OpenAI-compat API yet.
|
||||||
|
MLX is for training, not serving (our current policy).
|
||||||
|
- **llamafile** — single binary, portable. Good for distribution.
|
||||||
|
|
||||||
|
Verdict: Ollama is fine. It's the standard OpenAI-compat local server
|
||||||
|
on Mac. The issue was never Ollama — it was bypassing the harness.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. The Call Pattern
|
||||||
|
|
||||||
|
One function in tasks.py that all Huey tasks use:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import subprocess
|
||||||
|
import json
|
||||||
|
|
||||||
|
HERMES_BIN = "hermes"
|
||||||
|
LOCAL_ENV = {
|
||||||
|
"HERMES_MODEL": "hermes4:14b",
|
||||||
|
"HERMES_PROVIDER": "custom",
|
||||||
|
"HERMES_BASE_URL": "http://localhost:11434/v1",
|
||||||
|
}
|
||||||
|
|
||||||
|
def hermes_local(prompt, caller_tag=None, max_retries=2):
|
||||||
|
"""Call hermes with local Ollama model. Returns response text.
|
||||||
|
|
||||||
|
Every call creates a hermes session with full telemetry.
|
||||||
|
caller_tag gets prepended to prompt for searchability.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
env = os.environ.copy()
|
||||||
|
env.update(LOCAL_ENV)
|
||||||
|
|
||||||
|
tagged_prompt = prompt
|
||||||
|
if caller_tag:
|
||||||
|
tagged_prompt = f"[{caller_tag}] {prompt}"
|
||||||
|
|
||||||
|
for attempt in range(max_retries + 1):
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[HERMES_BIN, "chat", "-q", tagged_prompt, "-Q", "-t", "none"],
|
||||||
|
capture_output=True, text=True,
|
||||||
|
timeout=120, env=env,
|
||||||
|
)
|
||||||
|
if result.returncode == 0 and result.stdout.strip():
|
||||||
|
# Strip the session_id line from -Q output
|
||||||
|
lines = result.stdout.strip().split("\n")
|
||||||
|
response_lines = [l for l in lines if not l.startswith("session_id:")]
|
||||||
|
return "\n".join(response_lines).strip()
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
if attempt == max_retries:
|
||||||
|
return None
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
```
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- `-t none` disables all toolsets — the heartbeat model shouldn't
|
||||||
|
have terminal/file access. Pure reasoning only.
|
||||||
|
- `-Q` quiet mode suppresses banner/spinner, gives clean output.
|
||||||
|
- Every call creates a session in Hermes session store. Searchable,
|
||||||
|
exportable, countable.
|
||||||
|
- The `[caller_tag]` prefix lets you filter sessions by which Huey
|
||||||
|
task generated them: `hermes sessions list | grep heartbeat`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Heartbeat DECIDE Phase
|
||||||
|
|
||||||
|
Replace the hardcoded if/else with a model call:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# In heartbeat_tick(), replace the DECIDE + ACT section:
|
||||||
|
|
||||||
|
# DECIDE: let hermes4:14b reason about what to do
|
||||||
|
decide_prompt = f"""System state at {now.isoformat()}:
|
||||||
|
|
||||||
|
{json.dumps(perception, indent=2)}
|
||||||
|
|
||||||
|
Previous tick: {last_tick.get('tick_id', 'none')}
|
||||||
|
|
||||||
|
You are the heartbeat monitor. Based on this state:
|
||||||
|
1. List any actions needed (alerts, restarts, escalations). Empty if all OK.
|
||||||
|
2. Rate severity: ok, warning, or critical.
|
||||||
|
3. One sentence of reasoning.
|
||||||
|
|
||||||
|
Respond ONLY with JSON:
|
||||||
|
{{"actions": [], "severity": "ok", "reasoning": "..."}}"""
|
||||||
|
|
||||||
|
decision = None
|
||||||
|
try:
|
||||||
|
raw = hermes_local(decide_prompt, caller_tag="heartbeat_tick")
|
||||||
|
if raw:
|
||||||
|
# Try to parse JSON from the response
|
||||||
|
# Model might wrap it in markdown, so extract
|
||||||
|
for line in raw.split("\n"):
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith("{"):
|
||||||
|
decision = json.loads(line)
|
||||||
|
break
|
||||||
|
if not decision:
|
||||||
|
decision = json.loads(raw)
|
||||||
|
except (json.JSONDecodeError, Exception) as e:
|
||||||
|
decision = None
|
||||||
|
|
||||||
|
# Fallback to hardcoded logic if model fails or is down
|
||||||
|
if decision is None:
|
||||||
|
actions = []
|
||||||
|
if not perception.get("gitea_alive"):
|
||||||
|
actions.append("ALERT: Gitea unreachable")
|
||||||
|
health = perception.get("model_health", {})
|
||||||
|
if isinstance(health, dict) and not health.get("ollama_running"):
|
||||||
|
actions.append("ALERT: Ollama not running")
|
||||||
|
decision = {
|
||||||
|
"actions": actions,
|
||||||
|
"severity": "fallback",
|
||||||
|
"reasoning": "model unavailable, used hardcoded checks"
|
||||||
|
}
|
||||||
|
|
||||||
|
tick_record["decision"] = decision
|
||||||
|
actions = decision.get("actions", [])
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. DPO Candidate Collection
|
||||||
|
|
||||||
|
No new database. Hermes sessions ARE the DPO candidates.
|
||||||
|
|
||||||
|
Every `hermes_local()` call creates a session. To extract DPO pairs:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Export all local-model sessions
|
||||||
|
hermes sessions export --output /tmp/local-sessions.jsonl
|
||||||
|
|
||||||
|
# Filter for heartbeat decisions
|
||||||
|
grep "heartbeat_tick" /tmp/local-sessions.jsonl > heartbeat_decisions.jsonl
|
||||||
|
```
|
||||||
|
|
||||||
|
The existing `session_export` Huey task (runs every 4h) already extracts
|
||||||
|
user→assistant pairs. It just needs to be aware that some sessions are
|
||||||
|
now local-model decisions instead of human conversations.
|
||||||
|
|
||||||
|
For DPO annotation, add a simple review script:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# review_decisions.py — reads heartbeat tick logs, shows model decisions,
|
||||||
|
# asks Alexander to mark chosen/rejected
|
||||||
|
# Writes annotations back to the tick log files
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
TICK_DIR = Path.home() / ".timmy" / "heartbeat"
|
||||||
|
|
||||||
|
for log_file in sorted(TICK_DIR.glob("ticks_*.jsonl")):
|
||||||
|
for line in log_file.read_text().strip().split("\n"):
|
||||||
|
tick = json.loads(line)
|
||||||
|
decision = tick.get("decision", {})
|
||||||
|
if decision.get("severity") == "fallback":
|
||||||
|
continue # skip fallback entries
|
||||||
|
|
||||||
|
print(f"\n--- Tick {tick['tick_id']} ---")
|
||||||
|
print(f"Perception: {json.dumps(tick['perception'], indent=2)}")
|
||||||
|
print(f"Decision: {json.dumps(decision, indent=2)}")
|
||||||
|
|
||||||
|
rating = input("Rate (c=chosen, r=rejected, s=skip): ").strip()
|
||||||
|
if rating in ("c", "r"):
|
||||||
|
tick["dpo_label"] = "chosen" if rating == "c" else "rejected"
|
||||||
|
# write back... (append to annotated file)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Dashboard — Reads Hermes Data
|
||||||
|
|
||||||
|
```python
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Timmy Model Dashboard — reads from Hermes, owns nothing."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.request
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
HERMES_HOME = Path.home() / ".hermes"
|
||||||
|
TIMMY_HOME = Path.home() / ".timmy"
|
||||||
|
|
||||||
|
|
||||||
|
def get_ollama_models():
|
||||||
|
"""What's available in Ollama."""
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request("http://localhost:11434/api/tags")
|
||||||
|
with urllib.request.urlopen(req, timeout=5) as resp:
|
||||||
|
return json.loads(resp.read()).get("models", [])
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_loaded_models():
|
||||||
|
"""What's actually in VRAM right now."""
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request("http://localhost:11434/api/ps")
|
||||||
|
with urllib.request.urlopen(req, timeout=5) as resp:
|
||||||
|
return json.loads(resp.read()).get("models", [])
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_huey_status():
|
||||||
|
try:
|
||||||
|
r = subprocess.run(["pgrep", "-f", "huey_consumer"],
|
||||||
|
capture_output=True, timeout=5)
|
||||||
|
return r.returncode == 0
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_hermes_sessions(hours=24):
|
||||||
|
"""Read session metadata from Hermes session store."""
|
||||||
|
sessions_file = HERMES_HOME / "sessions" / "sessions.json"
|
||||||
|
if not sessions_file.exists():
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
data = json.loads(sessions_file.read_text())
|
||||||
|
return list(data.values())
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_heartbeat_ticks(date_str=None):
|
||||||
|
"""Read today's heartbeat ticks."""
|
||||||
|
if not date_str:
|
||||||
|
date_str = datetime.now().strftime("%Y%m%d")
|
||||||
|
tick_file = TIMMY_HOME / "heartbeat" / f"ticks_{date_str}.jsonl"
|
||||||
|
if not tick_file.exists():
|
||||||
|
return []
|
||||||
|
ticks = []
|
||||||
|
for line in tick_file.read_text().strip().split("\n"):
|
||||||
|
try:
|
||||||
|
ticks.append(json.loads(line))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return ticks
|
||||||
|
|
||||||
|
|
||||||
|
def render(hours=24):
|
||||||
|
models = get_ollama_models()
|
||||||
|
loaded = get_loaded_models()
|
||||||
|
huey = get_huey_status()
|
||||||
|
sessions = get_hermes_sessions(hours)
|
||||||
|
ticks = get_heartbeat_ticks()
|
||||||
|
|
||||||
|
loaded_names = {m.get("name", "") for m in loaded}
|
||||||
|
|
||||||
|
print("\033[2J\033[H")
|
||||||
|
print("=" * 70)
|
||||||
|
print(" TIMMY MODEL DASHBOARD")
|
||||||
|
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
print(f" {now} | Huey: {'UP' if huey else 'DOWN'} | Ollama models: {len(models)}")
|
||||||
|
print("=" * 70)
|
||||||
|
|
||||||
|
# DEPLOYMENTS
|
||||||
|
print("\n LOCAL MODELS")
|
||||||
|
print(" " + "-" * 55)
|
||||||
|
for m in models:
|
||||||
|
name = m.get("name", "?")
|
||||||
|
size_gb = m.get("size", 0) / 1e9
|
||||||
|
status = "IN VRAM" if name in loaded_names else "on disk"
|
||||||
|
print(f" {name:35s} {size_gb:5.1f}GB {status}")
|
||||||
|
if not models:
|
||||||
|
print(" (Ollama not responding)")
|
||||||
|
|
||||||
|
# HERMES SESSION ACTIVITY
|
||||||
|
# Count sessions by platform/provider
|
||||||
|
print(f"\n HERMES SESSIONS (recent)")
|
||||||
|
print(" " + "-" * 55)
|
||||||
|
local_sessions = [s for s in sessions
|
||||||
|
if "localhost" in str(s.get("origin", {}))]
|
||||||
|
cli_sessions = [s for s in sessions
|
||||||
|
if s.get("platform") == "cli" or s.get("origin", {}).get("platform") == "cli"]
|
||||||
|
|
||||||
|
total_tokens = sum(s.get("total_tokens", 0) for s in sessions)
|
||||||
|
print(f" Total sessions: {len(sessions)}")
|
||||||
|
print(f" CLI sessions: {len(cli_sessions)}")
|
||||||
|
print(f" Total tokens: {total_tokens:,}")
|
||||||
|
|
||||||
|
# HEARTBEAT STATUS
|
||||||
|
print(f"\n HEARTBEAT ({len(ticks)} ticks today)")
|
||||||
|
print(" " + "-" * 55)
|
||||||
|
if ticks:
|
||||||
|
last = ticks[-1]
|
||||||
|
decision = last.get("decision", {})
|
||||||
|
severity = decision.get("severity", "unknown")
|
||||||
|
reasoning = decision.get("reasoning", "no model decision yet")
|
||||||
|
print(f" Last tick: {last.get('tick_id', '?')}")
|
||||||
|
print(f" Severity: {severity}")
|
||||||
|
print(f" Reasoning: {reasoning[:60]}")
|
||||||
|
|
||||||
|
# Count model vs fallback decisions
|
||||||
|
model_decisions = sum(1 for t in ticks
|
||||||
|
if t.get("decision", {}).get("severity") != "fallback")
|
||||||
|
fallback = len(ticks) - model_decisions
|
||||||
|
print(f" Model decisions: {model_decisions} | Fallback: {fallback}")
|
||||||
|
|
||||||
|
# DPO labels if any
|
||||||
|
labeled = sum(1 for t in ticks if "dpo_label" in t)
|
||||||
|
if labeled:
|
||||||
|
chosen = sum(1 for t in ticks if t.get("dpo_label") == "chosen")
|
||||||
|
rejected = sum(1 for t in ticks if t.get("dpo_label") == "rejected")
|
||||||
|
print(f" DPO labeled: {labeled} (chosen: {chosen}, rejected: {rejected})")
|
||||||
|
else:
|
||||||
|
print(" (no ticks today)")
|
||||||
|
|
||||||
|
# ACTIVE LOOPS
|
||||||
|
print(f"\n ACTIVE LOOPS USING LOCAL MODELS")
|
||||||
|
print(" " + "-" * 55)
|
||||||
|
print(" heartbeat_tick 10m hermes4:14b DECIDE phase")
|
||||||
|
print(" (future) 15m hermes4:14b issue triage")
|
||||||
|
print(" (future) daily timmy:v0.1 morning report")
|
||||||
|
|
||||||
|
print(f"\n NON-LOCAL LOOPS (Gemini/Grok API)")
|
||||||
|
print(" " + "-" * 55)
|
||||||
|
print(" gemini_worker 20m gemini-2.5-pro aider")
|
||||||
|
print(" grok_worker 20m grok-3-fast opencode")
|
||||||
|
print(" cross_review 30m both PR review")
|
||||||
|
|
||||||
|
print("\n" + "=" * 70)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
watch = "--watch" in sys.argv
|
||||||
|
hours = 24
|
||||||
|
for a in sys.argv[1:]:
|
||||||
|
if a.startswith("--hours="):
|
||||||
|
hours = int(a.split("=")[1])
|
||||||
|
if watch:
|
||||||
|
while True:
|
||||||
|
render(hours)
|
||||||
|
time.sleep(30)
|
||||||
|
else:
|
||||||
|
render(hours)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Implementation Steps
|
||||||
|
|
||||||
|
### Step 1: Add hermes_local() to tasks.py
|
||||||
|
- One function, ~20 lines
|
||||||
|
- Calls `hermes chat -q` with Ollama env vars
|
||||||
|
- All telemetry comes from Hermes for free
|
||||||
|
|
||||||
|
### Step 2: Wire heartbeat_tick DECIDE phase
|
||||||
|
- Replace 6 lines of if/else with hermes_local() call
|
||||||
|
- Keep hardcoded fallback when model is down
|
||||||
|
- Decision stored in tick record for DPO review
|
||||||
|
|
||||||
|
### Step 3: Fix the MCP server warning
|
||||||
|
- The orchestration MCP server path is broken — harmless but noisy
|
||||||
|
- Either fix the path or remove from config
|
||||||
|
|
||||||
|
### Step 4: Drop model_dashboard.py in timmy-config/bin/
|
||||||
|
- Reads Ollama API, Hermes sessions, heartbeat ticks
|
||||||
|
- No new data stores — just views over existing ones
|
||||||
|
- `python3 model_dashboard.py --watch` for live view
|
||||||
|
|
||||||
|
### Step 5: Expand to more Huey tasks
|
||||||
|
- triage_issues: model reads issue, picks agent
|
||||||
|
- good_morning_report: model writes the "From Timmy" section
|
||||||
|
- Each expansion is just calling hermes_local() with a different prompt
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What Gets Hotfixed in Hermes Config
|
||||||
|
|
||||||
|
If `hermes insights` is broken (the cache_read_tokens column error),
|
||||||
|
that needs a fix. The dashboard falls back to reading sessions.json
|
||||||
|
directly, but insights would be the better data source.
|
||||||
|
|
||||||
|
The `providers.ollama` section in config.yaml exists but isn't wired
|
||||||
|
to the --provider flag. Filing this upstream or patching locally would
|
||||||
|
let us do `hermes chat -q "..." --provider ollama` cleanly instead
|
||||||
|
of relying on env vars. Not blocking — env vars work today.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What This Owns
|
||||||
|
|
||||||
|
- hermes_local() — 20-line wrapper around a subprocess call
|
||||||
|
- model_dashboard.py — read-only views over existing data
|
||||||
|
- review_decisions.py — optional DPO annotation CLI
|
||||||
|
|
||||||
|
## What This Does NOT Own
|
||||||
|
|
||||||
|
- Inference. Ollama does that.
|
||||||
|
- Telemetry. Hermes does that.
|
||||||
|
- Session storage. Hermes does that.
|
||||||
|
- Token counting. Hermes does that.
|
||||||
|
- Training pipeline. Already exists in timmy-config/training/.
|
||||||
129
tasks.py
129
tasks.py
@@ -14,12 +14,89 @@ from gitea_client import GiteaClient
|
|||||||
|
|
||||||
HERMES_HOME = Path.home() / ".hermes"
|
HERMES_HOME = Path.home() / ".hermes"
|
||||||
TIMMY_HOME = Path.home() / ".timmy"
|
TIMMY_HOME = Path.home() / ".timmy"
|
||||||
|
HERMES_AGENT_DIR = HERMES_HOME / "hermes-agent"
|
||||||
|
METRICS_DIR = TIMMY_HOME / "metrics"
|
||||||
REPOS = [
|
REPOS = [
|
||||||
"Timmy_Foundation/the-nexus",
|
"Timmy_Foundation/the-nexus",
|
||||||
"Timmy_Foundation/timmy-config",
|
"Timmy_Foundation/timmy-config",
|
||||||
]
|
]
|
||||||
NET_LINE_LIMIT = 10
|
NET_LINE_LIMIT = 10
|
||||||
|
|
||||||
|
# ── Local Model Inference via Hermes Harness ─────────────────────────
|
||||||
|
|
||||||
|
HEARTBEAT_MODEL = "hermes4:14b"
|
||||||
|
FALLBACK_MODEL = "hermes3:8b"
|
||||||
|
|
||||||
|
|
||||||
|
def hermes_local(prompt, model=None, caller_tag=None):
|
||||||
|
"""Call a local Ollama model through the Hermes harness.
|
||||||
|
|
||||||
|
Uses provider="local-ollama" which routes through the custom_providers
|
||||||
|
entry in config.yaml → Ollama at localhost:11434.
|
||||||
|
Returns response text or None on failure.
|
||||||
|
Every call creates a Hermes session with telemetry.
|
||||||
|
"""
|
||||||
|
_model = model or HEARTBEAT_MODEL
|
||||||
|
tagged = f"[{caller_tag}] {prompt}" if caller_tag else prompt
|
||||||
|
|
||||||
|
# Import hermes cli.main directly — no subprocess, no env vars
|
||||||
|
_agent_dir = str(HERMES_AGENT_DIR)
|
||||||
|
if _agent_dir not in sys.path:
|
||||||
|
sys.path.insert(0, _agent_dir)
|
||||||
|
old_cwd = os.getcwd()
|
||||||
|
os.chdir(_agent_dir)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from cli import main as hermes_main
|
||||||
|
import io
|
||||||
|
from contextlib import redirect_stdout, redirect_stderr
|
||||||
|
|
||||||
|
buf = io.StringIO()
|
||||||
|
err = io.StringIO()
|
||||||
|
with redirect_stdout(buf), redirect_stderr(err):
|
||||||
|
hermes_main(
|
||||||
|
query=tagged,
|
||||||
|
model=_model,
|
||||||
|
provider="local-ollama",
|
||||||
|
quiet=True,
|
||||||
|
)
|
||||||
|
output = buf.getvalue().strip()
|
||||||
|
# Strip session_id line from quiet output
|
||||||
|
lines = [l for l in output.split("\n") if not l.startswith("session_id:")]
|
||||||
|
response = "\n".join(lines).strip()
|
||||||
|
|
||||||
|
# Log to metrics jsonl
|
||||||
|
METRICS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl"
|
||||||
|
record = {
|
||||||
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"model": _model,
|
||||||
|
"caller": caller_tag or "unknown",
|
||||||
|
"prompt_len": len(prompt),
|
||||||
|
"response_len": len(response),
|
||||||
|
"success": bool(response),
|
||||||
|
}
|
||||||
|
with open(metrics_file, "a") as f:
|
||||||
|
f.write(json.dumps(record) + "\n")
|
||||||
|
|
||||||
|
return response if response else None
|
||||||
|
except Exception as e:
|
||||||
|
# Log failure
|
||||||
|
METRICS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl"
|
||||||
|
record = {
|
||||||
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"model": _model,
|
||||||
|
"caller": caller_tag or "unknown",
|
||||||
|
"error": str(e),
|
||||||
|
"success": False,
|
||||||
|
}
|
||||||
|
with open(metrics_file, "a") as f:
|
||||||
|
f.write(json.dumps(record) + "\n")
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
os.chdir(old_cwd)
|
||||||
|
|
||||||
|
|
||||||
# ── Existing: Orchestration ──────────────────────────────────────────
|
# ── Existing: Orchestration ──────────────────────────────────────────
|
||||||
|
|
||||||
@@ -280,15 +357,49 @@ def heartbeat_tick():
|
|||||||
"previous_tick": last_tick.get("tick_id", "none"),
|
"previous_tick": last_tick.get("tick_id", "none"),
|
||||||
}
|
}
|
||||||
|
|
||||||
# DECIDE + ACT: check for problems
|
# DECIDE: let hermes4:14b reason about what to do
|
||||||
actions = []
|
decide_prompt = (
|
||||||
if not perception.get("gitea_alive"):
|
f"System state at {now.isoformat()}:\n\n"
|
||||||
actions.append("ALERT: Gitea unreachable")
|
f"{json.dumps(perception, indent=2)}\n\n"
|
||||||
health = perception.get("model_health", {})
|
f"Previous tick: {last_tick.get('tick_id', 'none')}\n\n"
|
||||||
if isinstance(health, dict) and not health.get("ollama_running"):
|
"You are the heartbeat monitor. Based on this state:\n"
|
||||||
actions.append("ALERT: Ollama not running")
|
"1. List any actions needed (alerts, restarts, escalations). Empty if all OK.\n"
|
||||||
|
"2. Rate severity: ok, warning, or critical.\n"
|
||||||
tick_record["actions"] = actions
|
"3. One sentence of reasoning.\n\n"
|
||||||
|
'Respond ONLY with JSON: {"actions": [], "severity": "ok", "reasoning": "..."}'
|
||||||
|
)
|
||||||
|
|
||||||
|
decision = None
|
||||||
|
try:
|
||||||
|
raw = hermes_local(decide_prompt, caller_tag="heartbeat_tick")
|
||||||
|
if raw:
|
||||||
|
# Model might wrap JSON in markdown, extract first { line
|
||||||
|
for line in raw.split("\n"):
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith("{"):
|
||||||
|
decision = json.loads(line)
|
||||||
|
break
|
||||||
|
if not decision:
|
||||||
|
decision = json.loads(raw)
|
||||||
|
except (json.JSONDecodeError, Exception):
|
||||||
|
decision = None
|
||||||
|
|
||||||
|
# Fallback to hardcoded logic if model fails or is down
|
||||||
|
if decision is None:
|
||||||
|
actions = []
|
||||||
|
if not perception.get("gitea_alive"):
|
||||||
|
actions.append("ALERT: Gitea unreachable")
|
||||||
|
health = perception.get("model_health", {})
|
||||||
|
if isinstance(health, dict) and not health.get("ollama_running"):
|
||||||
|
actions.append("ALERT: Ollama not running")
|
||||||
|
decision = {
|
||||||
|
"actions": actions,
|
||||||
|
"severity": "fallback",
|
||||||
|
"reasoning": "model unavailable, used hardcoded checks",
|
||||||
|
}
|
||||||
|
|
||||||
|
tick_record["decision"] = decision
|
||||||
|
actions = decision.get("actions", [])
|
||||||
|
|
||||||
# Save tick
|
# Save tick
|
||||||
last_tick_file.write_text(json.dumps(tick_record, indent=2))
|
last_tick_file.write_text(json.dumps(tick_record, indent=2))
|
||||||
|
|||||||
Reference in New Issue
Block a user