feat: add local-vs-cloud token and throughput metrics (#85)
This commit was merged in pull request #85.
This commit is contained in:
@@ -9,6 +9,7 @@ Usage:
|
||||
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
@@ -16,6 +17,12 @@ import urllib.request
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
if str(REPO_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
from metrics_helpers import summarize_local_metrics, summarize_session_rows
|
||||
|
||||
HERMES_HOME = Path.home() / ".hermes"
|
||||
TIMMY_HOME = Path.home() / ".timmy"
|
||||
METRICS_DIR = TIMMY_HOME / "metrics"
|
||||
@@ -60,6 +67,30 @@ def get_hermes_sessions():
|
||||
return []
|
||||
|
||||
|
||||
def get_session_rows(hours=24):
|
||||
state_db = HERMES_HOME / "state.db"
|
||||
if not state_db.exists():
|
||||
return []
|
||||
cutoff = time.time() - (hours * 3600)
|
||||
try:
|
||||
conn = sqlite3.connect(str(state_db))
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT model, source, COUNT(*) as sessions,
|
||||
SUM(message_count) as msgs,
|
||||
SUM(tool_call_count) as tools
|
||||
FROM sessions
|
||||
WHERE started_at > ? AND model IS NOT NULL AND model != ''
|
||||
GROUP BY model, source
|
||||
""",
|
||||
(cutoff,),
|
||||
).fetchall()
|
||||
conn.close()
|
||||
return rows
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def get_heartbeat_ticks(date_str=None):
|
||||
if not date_str:
|
||||
date_str = datetime.now().strftime("%Y%m%d")
|
||||
@@ -130,6 +161,9 @@ def render(hours=24):
|
||||
ticks = get_heartbeat_ticks()
|
||||
metrics = get_local_metrics(hours)
|
||||
sessions = get_hermes_sessions()
|
||||
session_rows = get_session_rows(hours)
|
||||
local_summary = summarize_local_metrics(metrics)
|
||||
session_summary = summarize_session_rows(session_rows)
|
||||
|
||||
loaded_names = {m.get("name", "") for m in loaded}
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
@@ -159,28 +193,18 @@ def render(hours=24):
|
||||
print(f"\n {BOLD}LOCAL INFERENCE ({len(metrics)} calls, last {hours}h){RST}")
|
||||
print(f" {DIM}{'-' * 55}{RST}")
|
||||
if metrics:
|
||||
by_caller = {}
|
||||
for r in metrics:
|
||||
caller = r.get("caller", "unknown")
|
||||
if caller not in by_caller:
|
||||
by_caller[caller] = {"count": 0, "success": 0, "errors": 0}
|
||||
by_caller[caller]["count"] += 1
|
||||
if r.get("success"):
|
||||
by_caller[caller]["success"] += 1
|
||||
else:
|
||||
by_caller[caller]["errors"] += 1
|
||||
for caller, stats in by_caller.items():
|
||||
err = f" {RED}err:{stats['errors']}{RST}" if stats["errors"] else ""
|
||||
print(f" {caller:25s} calls:{stats['count']:4d} "
|
||||
f"{GREEN}ok:{stats['success']}{RST}{err}")
|
||||
print(f" Tokens: {local_summary['input_tokens']} in | {local_summary['output_tokens']} out | {local_summary['total_tokens']} total")
|
||||
if local_summary.get('avg_latency_s') is not None:
|
||||
print(f" Avg latency: {local_summary['avg_latency_s']:.2f}s")
|
||||
if local_summary.get('avg_tokens_per_second') is not None:
|
||||
print(f" Avg throughput: {GREEN}{local_summary['avg_tokens_per_second']:.2f} tok/s{RST}")
|
||||
for caller, stats in sorted(local_summary['by_caller'].items()):
|
||||
err = f" {RED}err:{stats['failed_calls']}{RST}" if stats['failed_calls'] else ""
|
||||
print(f" {caller:25s} calls:{stats['calls']:4d} tokens:{stats['total_tokens']:5d} {GREEN}ok:{stats['successful_calls']}{RST}{err}")
|
||||
|
||||
by_model = {}
|
||||
for r in metrics:
|
||||
model = r.get("model", "unknown")
|
||||
by_model[model] = by_model.get(model, 0) + 1
|
||||
print(f"\n {DIM}Models used:{RST}")
|
||||
for model, count in sorted(by_model.items(), key=lambda x: -x[1]):
|
||||
print(f" {model:30s} {count} calls")
|
||||
for model, stats in sorted(local_summary['by_model'].items(), key=lambda x: -x[1]['calls']):
|
||||
print(f" {model:30s} {stats['calls']} calls {stats['total_tokens']} tok")
|
||||
else:
|
||||
print(f" {DIM}(no local calls recorded yet){RST}")
|
||||
|
||||
@@ -211,15 +235,18 @@ def render(hours=24):
|
||||
else:
|
||||
print(f" {DIM}(no ticks today){RST}")
|
||||
|
||||
# ── HERMES SESSIONS ──
|
||||
local_sessions = [s for s in sessions
|
||||
if "localhost:11434" in str(s.get("base_url", ""))]
|
||||
# ── HERMES SESSIONS / SOVEREIGNTY LOAD ──
|
||||
local_sessions = [s for s in sessions if "localhost:11434" in str(s.get("base_url", ""))]
|
||||
cloud_sessions = [s for s in sessions if s not in local_sessions]
|
||||
print(f"\n {BOLD}HERMES SESSIONS{RST}")
|
||||
print(f"\n {BOLD}HERMES SESSIONS / SOVEREIGNTY LOAD{RST}")
|
||||
print(f" {DIM}{'-' * 55}{RST}")
|
||||
print(f" Total: {len(sessions)} | "
|
||||
f"{GREEN}Local: {len(local_sessions)}{RST} | "
|
||||
f"{YELLOW}Cloud: {len(cloud_sessions)}{RST}")
|
||||
print(f" Session cache: {len(sessions)} total | {GREEN}{len(local_sessions)} local{RST} | {YELLOW}{len(cloud_sessions)} cloud{RST}")
|
||||
if session_rows:
|
||||
print(f" Session DB: {session_summary['total_sessions']} total | {GREEN}{session_summary['local_sessions']} local{RST} | {YELLOW}{session_summary['cloud_sessions']} cloud{RST}")
|
||||
print(f" Token est: {GREEN}{session_summary['local_est_tokens']} local{RST} | {YELLOW}{session_summary['cloud_est_tokens']} cloud{RST}")
|
||||
print(f" Est cloud cost: ${session_summary['cloud_est_cost_usd']:.4f}")
|
||||
else:
|
||||
print(f" {DIM}(no session-db stats available){RST}")
|
||||
|
||||
# ── ACTIVE LOOPS ──
|
||||
print(f"\n {BOLD}ACTIVE LOOPS{RST}")
|
||||
|
||||
Reference in New Issue
Block a user