diff --git a/bin/timmy-dashboard b/bin/timmy-dashboard index 4ac092e6..ef543490 100755 --- a/bin/timmy-dashboard +++ b/bin/timmy-dashboard @@ -9,6 +9,7 @@ Usage: import json import os +import sqlite3 import subprocess import sys import time @@ -16,6 +17,12 @@ import urllib.request from datetime import datetime, timezone, timedelta from pathlib import Path +REPO_ROOT = Path(__file__).resolve().parent.parent +if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + +from metrics_helpers import summarize_local_metrics, summarize_session_rows + HERMES_HOME = Path.home() / ".hermes" TIMMY_HOME = Path.home() / ".timmy" METRICS_DIR = TIMMY_HOME / "metrics" @@ -60,6 +67,30 @@ def get_hermes_sessions(): return [] +def get_session_rows(hours=24): + state_db = HERMES_HOME / "state.db" + if not state_db.exists(): + return [] + cutoff = time.time() - (hours * 3600) + try: + conn = sqlite3.connect(str(state_db)) + rows = conn.execute( + """ + SELECT model, source, COUNT(*) as sessions, + SUM(message_count) as msgs, + SUM(tool_call_count) as tools + FROM sessions + WHERE started_at > ? AND model IS NOT NULL AND model != '' + GROUP BY model, source + """, + (cutoff,), + ).fetchall() + conn.close() + return rows + except Exception: + return [] + + def get_heartbeat_ticks(date_str=None): if not date_str: date_str = datetime.now().strftime("%Y%m%d") @@ -130,6 +161,9 @@ def render(hours=24): ticks = get_heartbeat_ticks() metrics = get_local_metrics(hours) sessions = get_hermes_sessions() + session_rows = get_session_rows(hours) + local_summary = summarize_local_metrics(metrics) + session_summary = summarize_session_rows(session_rows) loaded_names = {m.get("name", "") for m in loaded} now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -159,28 +193,18 @@ def render(hours=24): print(f"\n {BOLD}LOCAL INFERENCE ({len(metrics)} calls, last {hours}h){RST}") print(f" {DIM}{'-' * 55}{RST}") if metrics: - by_caller = {} - for r in metrics: - caller = r.get("caller", "unknown") - if caller not in by_caller: - by_caller[caller] = {"count": 0, "success": 0, "errors": 0} - by_caller[caller]["count"] += 1 - if r.get("success"): - by_caller[caller]["success"] += 1 - else: - by_caller[caller]["errors"] += 1 - for caller, stats in by_caller.items(): - err = f" {RED}err:{stats['errors']}{RST}" if stats["errors"] else "" - print(f" {caller:25s} calls:{stats['count']:4d} " - f"{GREEN}ok:{stats['success']}{RST}{err}") + print(f" Tokens: {local_summary['input_tokens']} in | {local_summary['output_tokens']} out | {local_summary['total_tokens']} total") + if local_summary.get('avg_latency_s') is not None: + print(f" Avg latency: {local_summary['avg_latency_s']:.2f}s") + if local_summary.get('avg_tokens_per_second') is not None: + print(f" Avg throughput: {GREEN}{local_summary['avg_tokens_per_second']:.2f} tok/s{RST}") + for caller, stats in sorted(local_summary['by_caller'].items()): + err = f" {RED}err:{stats['failed_calls']}{RST}" if stats['failed_calls'] else "" + print(f" {caller:25s} calls:{stats['calls']:4d} tokens:{stats['total_tokens']:5d} {GREEN}ok:{stats['successful_calls']}{RST}{err}") - by_model = {} - for r in metrics: - model = r.get("model", "unknown") - by_model[model] = by_model.get(model, 0) + 1 print(f"\n {DIM}Models used:{RST}") - for model, count in sorted(by_model.items(), key=lambda x: -x[1]): - print(f" {model:30s} {count} calls") + for model, stats in sorted(local_summary['by_model'].items(), key=lambda x: -x[1]['calls']): + print(f" {model:30s} {stats['calls']} calls {stats['total_tokens']} tok") else: print(f" {DIM}(no local calls recorded yet){RST}") @@ -211,15 +235,18 @@ def render(hours=24): else: print(f" {DIM}(no ticks today){RST}") - # ── HERMES SESSIONS ── - local_sessions = [s for s in sessions - if "localhost:11434" in str(s.get("base_url", ""))] + # ── HERMES SESSIONS / SOVEREIGNTY LOAD ── + local_sessions = [s for s in sessions if "localhost:11434" in str(s.get("base_url", ""))] cloud_sessions = [s for s in sessions if s not in local_sessions] - print(f"\n {BOLD}HERMES SESSIONS{RST}") + print(f"\n {BOLD}HERMES SESSIONS / SOVEREIGNTY LOAD{RST}") print(f" {DIM}{'-' * 55}{RST}") - print(f" Total: {len(sessions)} | " - f"{GREEN}Local: {len(local_sessions)}{RST} | " - f"{YELLOW}Cloud: {len(cloud_sessions)}{RST}") + print(f" Session cache: {len(sessions)} total | {GREEN}{len(local_sessions)} local{RST} | {YELLOW}{len(cloud_sessions)} cloud{RST}") + if session_rows: + print(f" Session DB: {session_summary['total_sessions']} total | {GREEN}{session_summary['local_sessions']} local{RST} | {YELLOW}{session_summary['cloud_sessions']} cloud{RST}") + print(f" Token est: {GREEN}{session_summary['local_est_tokens']} local{RST} | {YELLOW}{session_summary['cloud_est_tokens']} cloud{RST}") + print(f" Est cloud cost: ${session_summary['cloud_est_cost_usd']:.4f}") + else: + print(f" {DIM}(no session-db stats available){RST}") # ── ACTIVE LOOPS ── print(f"\n {BOLD}ACTIVE LOOPS{RST}") diff --git a/config.yaml b/config.yaml index 202c4da0..1ab24d56 100644 --- a/config.yaml +++ b/config.yaml @@ -1,8 +1,8 @@ model: - default: gpt-5.4 - provider: openai-codex + default: hermes4:14b + provider: custom context_length: 65536 - base_url: https://chatgpt.com/backend-api/codex + base_url: http://localhost:8081/v1 toolsets: - all agent: @@ -188,7 +188,7 @@ custom_providers: - name: Local llama.cpp base_url: http://localhost:8081/v1 api_key: none - model: auto + model: hermes4:14b - name: Google Gemini base_url: https://generativelanguage.googleapis.com/v1beta/openai api_key_env: GEMINI_API_KEY diff --git a/metrics_helpers.py b/metrics_helpers.py new file mode 100644 index 00000000..85d5bd8b --- /dev/null +++ b/metrics_helpers.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +import math +from datetime import datetime, timezone + +COST_TABLE = { + "claude-opus-4-6": {"input": 15.0, "output": 75.0}, + "claude-sonnet-4-6": {"input": 3.0, "output": 15.0}, + "claude-sonnet-4-20250514": {"input": 3.0, "output": 15.0}, + "claude-haiku-4-20250414": {"input": 0.25, "output": 1.25}, + "hermes4:14b": {"input": 0.0, "output": 0.0}, + "hermes3:8b": {"input": 0.0, "output": 0.0}, + "hermes3:latest": {"input": 0.0, "output": 0.0}, + "qwen3:30b": {"input": 0.0, "output": 0.0}, +} + + +def estimate_tokens_from_chars(char_count: int) -> int: + if char_count <= 0: + return 0 + return math.ceil(char_count / 4) + + + +def build_local_metric_record( + *, + prompt: str, + response: str, + model: str, + caller: str, + session_id: str | None, + latency_s: float, + success: bool, + error: str | None = None, +) -> dict: + input_tokens = estimate_tokens_from_chars(len(prompt)) + output_tokens = estimate_tokens_from_chars(len(response)) + total_tokens = input_tokens + output_tokens + tokens_per_second = round(total_tokens / latency_s, 2) if latency_s > 0 else None + return { + "timestamp": datetime.now(timezone.utc).isoformat(), + "model": model, + "caller": caller, + "prompt_len": len(prompt), + "response_len": len(response), + "session_id": session_id, + "latency_s": round(latency_s, 3), + "est_input_tokens": input_tokens, + "est_output_tokens": output_tokens, + "tokens_per_second": tokens_per_second, + "success": success, + "error": error, + } + + + +def summarize_local_metrics(records: list[dict]) -> dict: + total_calls = len(records) + successful_calls = sum(1 for record in records if record.get("success")) + failed_calls = total_calls - successful_calls + input_tokens = sum(int(record.get("est_input_tokens", 0) or 0) for record in records) + output_tokens = sum(int(record.get("est_output_tokens", 0) or 0) for record in records) + total_tokens = input_tokens + output_tokens + latencies = [float(record.get("latency_s", 0) or 0) for record in records if record.get("latency_s") is not None] + throughputs = [ + float(record.get("tokens_per_second", 0) or 0) + for record in records + if record.get("tokens_per_second") + ] + + by_caller: dict[str, dict] = {} + by_model: dict[str, dict] = {} + for record in records: + caller = record.get("caller", "unknown") + model = record.get("model", "unknown") + bucket_tokens = int(record.get("est_input_tokens", 0) or 0) + int(record.get("est_output_tokens", 0) or 0) + for key, table in ((caller, by_caller), (model, by_model)): + if key not in table: + table[key] = {"calls": 0, "successful_calls": 0, "failed_calls": 0, "total_tokens": 0} + table[key]["calls"] += 1 + table[key]["total_tokens"] += bucket_tokens + if record.get("success"): + table[key]["successful_calls"] += 1 + else: + table[key]["failed_calls"] += 1 + + return { + "total_calls": total_calls, + "successful_calls": successful_calls, + "failed_calls": failed_calls, + "input_tokens": input_tokens, + "output_tokens": output_tokens, + "total_tokens": total_tokens, + "avg_latency_s": round(sum(latencies) / len(latencies), 2) if latencies else None, + "avg_tokens_per_second": round(sum(throughputs) / len(throughputs), 2) if throughputs else None, + "by_caller": by_caller, + "by_model": by_model, + } + + + +def is_local_model(model: str | None) -> bool: + if not model: + return False + costs = COST_TABLE.get(model, {}) + if costs.get("input", 1) == 0 and costs.get("output", 1) == 0: + return True + return ":" in model and "/" not in model and "claude" not in model + + + +def summarize_session_rows(rows: list[tuple]) -> dict: + total_sessions = 0 + local_sessions = 0 + cloud_sessions = 0 + local_est_tokens = 0 + cloud_est_tokens = 0 + cloud_est_cost_usd = 0.0 + for model, source, sessions, messages, tool_calls in rows: + sessions = int(sessions or 0) + messages = int(messages or 0) + est_tokens = messages * 500 + total_sessions += sessions + if is_local_model(model): + local_sessions += sessions + local_est_tokens += est_tokens + else: + cloud_sessions += sessions + cloud_est_tokens += est_tokens + pricing = COST_TABLE.get(model, {"input": 5.0, "output": 15.0}) + cloud_est_cost_usd += (est_tokens / 1_000_000) * ((pricing["input"] + pricing["output"]) / 2) + return { + "total_sessions": total_sessions, + "local_sessions": local_sessions, + "cloud_sessions": cloud_sessions, + "local_est_tokens": local_est_tokens, + "cloud_est_tokens": cloud_est_tokens, + "cloud_est_cost_usd": round(cloud_est_cost_usd, 4), + } diff --git a/tasks.py b/tasks.py index 36daf10c..f4a99283 100644 --- a/tasks.py +++ b/tasks.py @@ -5,12 +5,14 @@ import glob import os import subprocess import sys +import time from datetime import datetime, timezone from pathlib import Path from orchestration import huey from huey import crontab from gitea_client import GiteaClient +from metrics_helpers import build_local_metric_record HERMES_HOME = Path.home() / ".hermes" TIMMY_HOME = Path.home() / ".timmy" @@ -57,6 +59,7 @@ def run_hermes_local( _model = model or HEARTBEAT_MODEL tagged = f"[{caller_tag}] {prompt}" if caller_tag else prompt + started = time.time() try: runner = """ import io @@ -167,15 +170,15 @@ sys.exit(exit_code) # Log to metrics jsonl METRICS_DIR.mkdir(parents=True, exist_ok=True) metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl" - record = { - "timestamp": datetime.now(timezone.utc).isoformat(), - "model": _model, - "caller": caller_tag or "unknown", - "prompt_len": len(prompt), - "response_len": len(response), - "session_id": session_id, - "success": bool(response), - } + record = build_local_metric_record( + prompt=prompt, + response=response, + model=_model, + caller=caller_tag or "unknown", + session_id=session_id, + latency_s=time.time() - started, + success=bool(response), + ) with open(metrics_file, "a") as f: f.write(json.dumps(record) + "\n") @@ -190,13 +193,16 @@ sys.exit(exit_code) # Log failure METRICS_DIR.mkdir(parents=True, exist_ok=True) metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl" - record = { - "timestamp": datetime.now(timezone.utc).isoformat(), - "model": _model, - "caller": caller_tag or "unknown", - "error": str(e), - "success": False, - } + record = build_local_metric_record( + prompt=prompt, + response="", + model=_model, + caller=caller_tag or "unknown", + session_id=None, + latency_s=time.time() - started, + success=False, + error=str(e), + ) with open(metrics_file, "a") as f: f.write(json.dumps(record) + "\n") return None diff --git a/tests/test_metrics_helpers.py b/tests/test_metrics_helpers.py new file mode 100644 index 00000000..c906fb26 --- /dev/null +++ b/tests/test_metrics_helpers.py @@ -0,0 +1,93 @@ +from metrics_helpers import ( + build_local_metric_record, + estimate_tokens_from_chars, + summarize_local_metrics, + summarize_session_rows, +) + + +def test_estimate_tokens_from_chars_uses_simple_local_heuristic() -> None: + assert estimate_tokens_from_chars(0) == 0 + assert estimate_tokens_from_chars(1) == 1 + assert estimate_tokens_from_chars(4) == 1 + assert estimate_tokens_from_chars(5) == 2 + assert estimate_tokens_from_chars(401) == 101 + + +def test_build_local_metric_record_adds_token_and_throughput_estimates() -> None: + record = build_local_metric_record( + prompt="abcd" * 10, + response="xyz" * 20, + model="hermes4:14b", + caller="heartbeat_tick", + session_id="session-123", + latency_s=2.0, + success=True, + ) + + assert record["model"] == "hermes4:14b" + assert record["caller"] == "heartbeat_tick" + assert record["session_id"] == "session-123" + assert record["est_input_tokens"] == 10 + assert record["est_output_tokens"] == 15 + assert record["tokens_per_second"] == 12.5 + + +def test_summarize_local_metrics_rolls_up_tokens_and_latency() -> None: + records = [ + { + "caller": "heartbeat_tick", + "model": "hermes4:14b", + "success": True, + "est_input_tokens": 100, + "est_output_tokens": 40, + "latency_s": 2.0, + "tokens_per_second": 20.0, + }, + { + "caller": "heartbeat_tick", + "model": "hermes4:14b", + "success": False, + "est_input_tokens": 30, + "est_output_tokens": 0, + "latency_s": 1.0, + }, + { + "caller": "session_export", + "model": "hermes3:8b", + "success": True, + "est_input_tokens": 50, + "est_output_tokens": 25, + "latency_s": 5.0, + "tokens_per_second": 5.0, + }, + ] + + summary = summarize_local_metrics(records) + + assert summary["total_calls"] == 3 + assert summary["successful_calls"] == 2 + assert summary["failed_calls"] == 1 + assert summary["input_tokens"] == 180 + assert summary["output_tokens"] == 65 + assert summary["total_tokens"] == 245 + assert summary["avg_latency_s"] == 2.67 + assert summary["avg_tokens_per_second"] == 12.5 + assert summary["by_caller"]["heartbeat_tick"]["total_tokens"] == 170 + assert summary["by_model"]["hermes4:14b"]["failed_calls"] == 1 + + +def test_summarize_session_rows_separates_local_and_cloud_estimates() -> None: + rows = [ + ("hermes4:14b", "local", 2, 10, 4), + ("claude-sonnet-4-6", "cli", 3, 9, 2), + ] + + summary = summarize_session_rows(rows) + + assert summary["total_sessions"] == 5 + assert summary["local_sessions"] == 2 + assert summary["cloud_sessions"] == 3 + assert summary["local_est_tokens"] == 5000 + assert summary["cloud_est_tokens"] == 4500 + assert summary["cloud_est_cost_usd"] > 0