feat: add local-vs-cloud token and throughput metrics (#85)
This commit was merged in pull request #85.
This commit is contained in:
@@ -9,6 +9,7 @@ Usage:
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import sqlite3
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
@@ -16,6 +17,12 @@ import urllib.request
|
|||||||
from datetime import datetime, timezone, timedelta
|
from datetime import datetime, timezone, timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
if str(REPO_ROOT) not in sys.path:
|
||||||
|
sys.path.insert(0, str(REPO_ROOT))
|
||||||
|
|
||||||
|
from metrics_helpers import summarize_local_metrics, summarize_session_rows
|
||||||
|
|
||||||
HERMES_HOME = Path.home() / ".hermes"
|
HERMES_HOME = Path.home() / ".hermes"
|
||||||
TIMMY_HOME = Path.home() / ".timmy"
|
TIMMY_HOME = Path.home() / ".timmy"
|
||||||
METRICS_DIR = TIMMY_HOME / "metrics"
|
METRICS_DIR = TIMMY_HOME / "metrics"
|
||||||
@@ -60,6 +67,30 @@ def get_hermes_sessions():
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_session_rows(hours=24):
|
||||||
|
state_db = HERMES_HOME / "state.db"
|
||||||
|
if not state_db.exists():
|
||||||
|
return []
|
||||||
|
cutoff = time.time() - (hours * 3600)
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(str(state_db))
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT model, source, COUNT(*) as sessions,
|
||||||
|
SUM(message_count) as msgs,
|
||||||
|
SUM(tool_call_count) as tools
|
||||||
|
FROM sessions
|
||||||
|
WHERE started_at > ? AND model IS NOT NULL AND model != ''
|
||||||
|
GROUP BY model, source
|
||||||
|
""",
|
||||||
|
(cutoff,),
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
return rows
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def get_heartbeat_ticks(date_str=None):
|
def get_heartbeat_ticks(date_str=None):
|
||||||
if not date_str:
|
if not date_str:
|
||||||
date_str = datetime.now().strftime("%Y%m%d")
|
date_str = datetime.now().strftime("%Y%m%d")
|
||||||
@@ -130,6 +161,9 @@ def render(hours=24):
|
|||||||
ticks = get_heartbeat_ticks()
|
ticks = get_heartbeat_ticks()
|
||||||
metrics = get_local_metrics(hours)
|
metrics = get_local_metrics(hours)
|
||||||
sessions = get_hermes_sessions()
|
sessions = get_hermes_sessions()
|
||||||
|
session_rows = get_session_rows(hours)
|
||||||
|
local_summary = summarize_local_metrics(metrics)
|
||||||
|
session_summary = summarize_session_rows(session_rows)
|
||||||
|
|
||||||
loaded_names = {m.get("name", "") for m in loaded}
|
loaded_names = {m.get("name", "") for m in loaded}
|
||||||
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
@@ -159,28 +193,18 @@ def render(hours=24):
|
|||||||
print(f"\n {BOLD}LOCAL INFERENCE ({len(metrics)} calls, last {hours}h){RST}")
|
print(f"\n {BOLD}LOCAL INFERENCE ({len(metrics)} calls, last {hours}h){RST}")
|
||||||
print(f" {DIM}{'-' * 55}{RST}")
|
print(f" {DIM}{'-' * 55}{RST}")
|
||||||
if metrics:
|
if metrics:
|
||||||
by_caller = {}
|
print(f" Tokens: {local_summary['input_tokens']} in | {local_summary['output_tokens']} out | {local_summary['total_tokens']} total")
|
||||||
for r in metrics:
|
if local_summary.get('avg_latency_s') is not None:
|
||||||
caller = r.get("caller", "unknown")
|
print(f" Avg latency: {local_summary['avg_latency_s']:.2f}s")
|
||||||
if caller not in by_caller:
|
if local_summary.get('avg_tokens_per_second') is not None:
|
||||||
by_caller[caller] = {"count": 0, "success": 0, "errors": 0}
|
print(f" Avg throughput: {GREEN}{local_summary['avg_tokens_per_second']:.2f} tok/s{RST}")
|
||||||
by_caller[caller]["count"] += 1
|
for caller, stats in sorted(local_summary['by_caller'].items()):
|
||||||
if r.get("success"):
|
err = f" {RED}err:{stats['failed_calls']}{RST}" if stats['failed_calls'] else ""
|
||||||
by_caller[caller]["success"] += 1
|
print(f" {caller:25s} calls:{stats['calls']:4d} tokens:{stats['total_tokens']:5d} {GREEN}ok:{stats['successful_calls']}{RST}{err}")
|
||||||
else:
|
|
||||||
by_caller[caller]["errors"] += 1
|
|
||||||
for caller, stats in by_caller.items():
|
|
||||||
err = f" {RED}err:{stats['errors']}{RST}" if stats["errors"] else ""
|
|
||||||
print(f" {caller:25s} calls:{stats['count']:4d} "
|
|
||||||
f"{GREEN}ok:{stats['success']}{RST}{err}")
|
|
||||||
|
|
||||||
by_model = {}
|
|
||||||
for r in metrics:
|
|
||||||
model = r.get("model", "unknown")
|
|
||||||
by_model[model] = by_model.get(model, 0) + 1
|
|
||||||
print(f"\n {DIM}Models used:{RST}")
|
print(f"\n {DIM}Models used:{RST}")
|
||||||
for model, count in sorted(by_model.items(), key=lambda x: -x[1]):
|
for model, stats in sorted(local_summary['by_model'].items(), key=lambda x: -x[1]['calls']):
|
||||||
print(f" {model:30s} {count} calls")
|
print(f" {model:30s} {stats['calls']} calls {stats['total_tokens']} tok")
|
||||||
else:
|
else:
|
||||||
print(f" {DIM}(no local calls recorded yet){RST}")
|
print(f" {DIM}(no local calls recorded yet){RST}")
|
||||||
|
|
||||||
@@ -211,15 +235,18 @@ def render(hours=24):
|
|||||||
else:
|
else:
|
||||||
print(f" {DIM}(no ticks today){RST}")
|
print(f" {DIM}(no ticks today){RST}")
|
||||||
|
|
||||||
# ── HERMES SESSIONS ──
|
# ── HERMES SESSIONS / SOVEREIGNTY LOAD ──
|
||||||
local_sessions = [s for s in sessions
|
local_sessions = [s for s in sessions if "localhost:11434" in str(s.get("base_url", ""))]
|
||||||
if "localhost:11434" in str(s.get("base_url", ""))]
|
|
||||||
cloud_sessions = [s for s in sessions if s not in local_sessions]
|
cloud_sessions = [s for s in sessions if s not in local_sessions]
|
||||||
print(f"\n {BOLD}HERMES SESSIONS{RST}")
|
print(f"\n {BOLD}HERMES SESSIONS / SOVEREIGNTY LOAD{RST}")
|
||||||
print(f" {DIM}{'-' * 55}{RST}")
|
print(f" {DIM}{'-' * 55}{RST}")
|
||||||
print(f" Total: {len(sessions)} | "
|
print(f" Session cache: {len(sessions)} total | {GREEN}{len(local_sessions)} local{RST} | {YELLOW}{len(cloud_sessions)} cloud{RST}")
|
||||||
f"{GREEN}Local: {len(local_sessions)}{RST} | "
|
if session_rows:
|
||||||
f"{YELLOW}Cloud: {len(cloud_sessions)}{RST}")
|
print(f" Session DB: {session_summary['total_sessions']} total | {GREEN}{session_summary['local_sessions']} local{RST} | {YELLOW}{session_summary['cloud_sessions']} cloud{RST}")
|
||||||
|
print(f" Token est: {GREEN}{session_summary['local_est_tokens']} local{RST} | {YELLOW}{session_summary['cloud_est_tokens']} cloud{RST}")
|
||||||
|
print(f" Est cloud cost: ${session_summary['cloud_est_cost_usd']:.4f}")
|
||||||
|
else:
|
||||||
|
print(f" {DIM}(no session-db stats available){RST}")
|
||||||
|
|
||||||
# ── ACTIVE LOOPS ──
|
# ── ACTIVE LOOPS ──
|
||||||
print(f"\n {BOLD}ACTIVE LOOPS{RST}")
|
print(f"\n {BOLD}ACTIVE LOOPS{RST}")
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
model:
|
model:
|
||||||
default: gpt-5.4
|
default: hermes4:14b
|
||||||
provider: openai-codex
|
provider: custom
|
||||||
context_length: 65536
|
context_length: 65536
|
||||||
base_url: https://chatgpt.com/backend-api/codex
|
base_url: http://localhost:8081/v1
|
||||||
toolsets:
|
toolsets:
|
||||||
- all
|
- all
|
||||||
agent:
|
agent:
|
||||||
@@ -188,7 +188,7 @@ custom_providers:
|
|||||||
- name: Local llama.cpp
|
- name: Local llama.cpp
|
||||||
base_url: http://localhost:8081/v1
|
base_url: http://localhost:8081/v1
|
||||||
api_key: none
|
api_key: none
|
||||||
model: auto
|
model: hermes4:14b
|
||||||
- name: Google Gemini
|
- name: Google Gemini
|
||||||
base_url: https://generativelanguage.googleapis.com/v1beta/openai
|
base_url: https://generativelanguage.googleapis.com/v1beta/openai
|
||||||
api_key_env: GEMINI_API_KEY
|
api_key_env: GEMINI_API_KEY
|
||||||
|
|||||||
139
metrics_helpers.py
Normal file
139
metrics_helpers.py
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
COST_TABLE = {
|
||||||
|
"claude-opus-4-6": {"input": 15.0, "output": 75.0},
|
||||||
|
"claude-sonnet-4-6": {"input": 3.0, "output": 15.0},
|
||||||
|
"claude-sonnet-4-20250514": {"input": 3.0, "output": 15.0},
|
||||||
|
"claude-haiku-4-20250414": {"input": 0.25, "output": 1.25},
|
||||||
|
"hermes4:14b": {"input": 0.0, "output": 0.0},
|
||||||
|
"hermes3:8b": {"input": 0.0, "output": 0.0},
|
||||||
|
"hermes3:latest": {"input": 0.0, "output": 0.0},
|
||||||
|
"qwen3:30b": {"input": 0.0, "output": 0.0},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def estimate_tokens_from_chars(char_count: int) -> int:
|
||||||
|
if char_count <= 0:
|
||||||
|
return 0
|
||||||
|
return math.ceil(char_count / 4)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def build_local_metric_record(
|
||||||
|
*,
|
||||||
|
prompt: str,
|
||||||
|
response: str,
|
||||||
|
model: str,
|
||||||
|
caller: str,
|
||||||
|
session_id: str | None,
|
||||||
|
latency_s: float,
|
||||||
|
success: bool,
|
||||||
|
error: str | None = None,
|
||||||
|
) -> dict:
|
||||||
|
input_tokens = estimate_tokens_from_chars(len(prompt))
|
||||||
|
output_tokens = estimate_tokens_from_chars(len(response))
|
||||||
|
total_tokens = input_tokens + output_tokens
|
||||||
|
tokens_per_second = round(total_tokens / latency_s, 2) if latency_s > 0 else None
|
||||||
|
return {
|
||||||
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"model": model,
|
||||||
|
"caller": caller,
|
||||||
|
"prompt_len": len(prompt),
|
||||||
|
"response_len": len(response),
|
||||||
|
"session_id": session_id,
|
||||||
|
"latency_s": round(latency_s, 3),
|
||||||
|
"est_input_tokens": input_tokens,
|
||||||
|
"est_output_tokens": output_tokens,
|
||||||
|
"tokens_per_second": tokens_per_second,
|
||||||
|
"success": success,
|
||||||
|
"error": error,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_local_metrics(records: list[dict]) -> dict:
|
||||||
|
total_calls = len(records)
|
||||||
|
successful_calls = sum(1 for record in records if record.get("success"))
|
||||||
|
failed_calls = total_calls - successful_calls
|
||||||
|
input_tokens = sum(int(record.get("est_input_tokens", 0) or 0) for record in records)
|
||||||
|
output_tokens = sum(int(record.get("est_output_tokens", 0) or 0) for record in records)
|
||||||
|
total_tokens = input_tokens + output_tokens
|
||||||
|
latencies = [float(record.get("latency_s", 0) or 0) for record in records if record.get("latency_s") is not None]
|
||||||
|
throughputs = [
|
||||||
|
float(record.get("tokens_per_second", 0) or 0)
|
||||||
|
for record in records
|
||||||
|
if record.get("tokens_per_second")
|
||||||
|
]
|
||||||
|
|
||||||
|
by_caller: dict[str, dict] = {}
|
||||||
|
by_model: dict[str, dict] = {}
|
||||||
|
for record in records:
|
||||||
|
caller = record.get("caller", "unknown")
|
||||||
|
model = record.get("model", "unknown")
|
||||||
|
bucket_tokens = int(record.get("est_input_tokens", 0) or 0) + int(record.get("est_output_tokens", 0) or 0)
|
||||||
|
for key, table in ((caller, by_caller), (model, by_model)):
|
||||||
|
if key not in table:
|
||||||
|
table[key] = {"calls": 0, "successful_calls": 0, "failed_calls": 0, "total_tokens": 0}
|
||||||
|
table[key]["calls"] += 1
|
||||||
|
table[key]["total_tokens"] += bucket_tokens
|
||||||
|
if record.get("success"):
|
||||||
|
table[key]["successful_calls"] += 1
|
||||||
|
else:
|
||||||
|
table[key]["failed_calls"] += 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_calls": total_calls,
|
||||||
|
"successful_calls": successful_calls,
|
||||||
|
"failed_calls": failed_calls,
|
||||||
|
"input_tokens": input_tokens,
|
||||||
|
"output_tokens": output_tokens,
|
||||||
|
"total_tokens": total_tokens,
|
||||||
|
"avg_latency_s": round(sum(latencies) / len(latencies), 2) if latencies else None,
|
||||||
|
"avg_tokens_per_second": round(sum(throughputs) / len(throughputs), 2) if throughputs else None,
|
||||||
|
"by_caller": by_caller,
|
||||||
|
"by_model": by_model,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def is_local_model(model: str | None) -> bool:
|
||||||
|
if not model:
|
||||||
|
return False
|
||||||
|
costs = COST_TABLE.get(model, {})
|
||||||
|
if costs.get("input", 1) == 0 and costs.get("output", 1) == 0:
|
||||||
|
return True
|
||||||
|
return ":" in model and "/" not in model and "claude" not in model
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_session_rows(rows: list[tuple]) -> dict:
|
||||||
|
total_sessions = 0
|
||||||
|
local_sessions = 0
|
||||||
|
cloud_sessions = 0
|
||||||
|
local_est_tokens = 0
|
||||||
|
cloud_est_tokens = 0
|
||||||
|
cloud_est_cost_usd = 0.0
|
||||||
|
for model, source, sessions, messages, tool_calls in rows:
|
||||||
|
sessions = int(sessions or 0)
|
||||||
|
messages = int(messages or 0)
|
||||||
|
est_tokens = messages * 500
|
||||||
|
total_sessions += sessions
|
||||||
|
if is_local_model(model):
|
||||||
|
local_sessions += sessions
|
||||||
|
local_est_tokens += est_tokens
|
||||||
|
else:
|
||||||
|
cloud_sessions += sessions
|
||||||
|
cloud_est_tokens += est_tokens
|
||||||
|
pricing = COST_TABLE.get(model, {"input": 5.0, "output": 15.0})
|
||||||
|
cloud_est_cost_usd += (est_tokens / 1_000_000) * ((pricing["input"] + pricing["output"]) / 2)
|
||||||
|
return {
|
||||||
|
"total_sessions": total_sessions,
|
||||||
|
"local_sessions": local_sessions,
|
||||||
|
"cloud_sessions": cloud_sessions,
|
||||||
|
"local_est_tokens": local_est_tokens,
|
||||||
|
"cloud_est_tokens": cloud_est_tokens,
|
||||||
|
"cloud_est_cost_usd": round(cloud_est_cost_usd, 4),
|
||||||
|
}
|
||||||
38
tasks.py
38
tasks.py
@@ -5,12 +5,14 @@ import glob
|
|||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from orchestration import huey
|
from orchestration import huey
|
||||||
from huey import crontab
|
from huey import crontab
|
||||||
from gitea_client import GiteaClient
|
from gitea_client import GiteaClient
|
||||||
|
from metrics_helpers import build_local_metric_record
|
||||||
|
|
||||||
HERMES_HOME = Path.home() / ".hermes"
|
HERMES_HOME = Path.home() / ".hermes"
|
||||||
TIMMY_HOME = Path.home() / ".timmy"
|
TIMMY_HOME = Path.home() / ".timmy"
|
||||||
@@ -57,6 +59,7 @@ def run_hermes_local(
|
|||||||
_model = model or HEARTBEAT_MODEL
|
_model = model or HEARTBEAT_MODEL
|
||||||
tagged = f"[{caller_tag}] {prompt}" if caller_tag else prompt
|
tagged = f"[{caller_tag}] {prompt}" if caller_tag else prompt
|
||||||
|
|
||||||
|
started = time.time()
|
||||||
try:
|
try:
|
||||||
runner = """
|
runner = """
|
||||||
import io
|
import io
|
||||||
@@ -167,15 +170,15 @@ sys.exit(exit_code)
|
|||||||
# Log to metrics jsonl
|
# Log to metrics jsonl
|
||||||
METRICS_DIR.mkdir(parents=True, exist_ok=True)
|
METRICS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl"
|
metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl"
|
||||||
record = {
|
record = build_local_metric_record(
|
||||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
prompt=prompt,
|
||||||
"model": _model,
|
response=response,
|
||||||
"caller": caller_tag or "unknown",
|
model=_model,
|
||||||
"prompt_len": len(prompt),
|
caller=caller_tag or "unknown",
|
||||||
"response_len": len(response),
|
session_id=session_id,
|
||||||
"session_id": session_id,
|
latency_s=time.time() - started,
|
||||||
"success": bool(response),
|
success=bool(response),
|
||||||
}
|
)
|
||||||
with open(metrics_file, "a") as f:
|
with open(metrics_file, "a") as f:
|
||||||
f.write(json.dumps(record) + "\n")
|
f.write(json.dumps(record) + "\n")
|
||||||
|
|
||||||
@@ -190,13 +193,16 @@ sys.exit(exit_code)
|
|||||||
# Log failure
|
# Log failure
|
||||||
METRICS_DIR.mkdir(parents=True, exist_ok=True)
|
METRICS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl"
|
metrics_file = METRICS_DIR / f"local_{datetime.now().strftime('%Y%m%d')}.jsonl"
|
||||||
record = {
|
record = build_local_metric_record(
|
||||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
prompt=prompt,
|
||||||
"model": _model,
|
response="",
|
||||||
"caller": caller_tag or "unknown",
|
model=_model,
|
||||||
"error": str(e),
|
caller=caller_tag or "unknown",
|
||||||
"success": False,
|
session_id=None,
|
||||||
}
|
latency_s=time.time() - started,
|
||||||
|
success=False,
|
||||||
|
error=str(e),
|
||||||
|
)
|
||||||
with open(metrics_file, "a") as f:
|
with open(metrics_file, "a") as f:
|
||||||
f.write(json.dumps(record) + "\n")
|
f.write(json.dumps(record) + "\n")
|
||||||
return None
|
return None
|
||||||
|
|||||||
93
tests/test_metrics_helpers.py
Normal file
93
tests/test_metrics_helpers.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
from metrics_helpers import (
|
||||||
|
build_local_metric_record,
|
||||||
|
estimate_tokens_from_chars,
|
||||||
|
summarize_local_metrics,
|
||||||
|
summarize_session_rows,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_estimate_tokens_from_chars_uses_simple_local_heuristic() -> None:
|
||||||
|
assert estimate_tokens_from_chars(0) == 0
|
||||||
|
assert estimate_tokens_from_chars(1) == 1
|
||||||
|
assert estimate_tokens_from_chars(4) == 1
|
||||||
|
assert estimate_tokens_from_chars(5) == 2
|
||||||
|
assert estimate_tokens_from_chars(401) == 101
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_local_metric_record_adds_token_and_throughput_estimates() -> None:
|
||||||
|
record = build_local_metric_record(
|
||||||
|
prompt="abcd" * 10,
|
||||||
|
response="xyz" * 20,
|
||||||
|
model="hermes4:14b",
|
||||||
|
caller="heartbeat_tick",
|
||||||
|
session_id="session-123",
|
||||||
|
latency_s=2.0,
|
||||||
|
success=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert record["model"] == "hermes4:14b"
|
||||||
|
assert record["caller"] == "heartbeat_tick"
|
||||||
|
assert record["session_id"] == "session-123"
|
||||||
|
assert record["est_input_tokens"] == 10
|
||||||
|
assert record["est_output_tokens"] == 15
|
||||||
|
assert record["tokens_per_second"] == 12.5
|
||||||
|
|
||||||
|
|
||||||
|
def test_summarize_local_metrics_rolls_up_tokens_and_latency() -> None:
|
||||||
|
records = [
|
||||||
|
{
|
||||||
|
"caller": "heartbeat_tick",
|
||||||
|
"model": "hermes4:14b",
|
||||||
|
"success": True,
|
||||||
|
"est_input_tokens": 100,
|
||||||
|
"est_output_tokens": 40,
|
||||||
|
"latency_s": 2.0,
|
||||||
|
"tokens_per_second": 20.0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"caller": "heartbeat_tick",
|
||||||
|
"model": "hermes4:14b",
|
||||||
|
"success": False,
|
||||||
|
"est_input_tokens": 30,
|
||||||
|
"est_output_tokens": 0,
|
||||||
|
"latency_s": 1.0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"caller": "session_export",
|
||||||
|
"model": "hermes3:8b",
|
||||||
|
"success": True,
|
||||||
|
"est_input_tokens": 50,
|
||||||
|
"est_output_tokens": 25,
|
||||||
|
"latency_s": 5.0,
|
||||||
|
"tokens_per_second": 5.0,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
summary = summarize_local_metrics(records)
|
||||||
|
|
||||||
|
assert summary["total_calls"] == 3
|
||||||
|
assert summary["successful_calls"] == 2
|
||||||
|
assert summary["failed_calls"] == 1
|
||||||
|
assert summary["input_tokens"] == 180
|
||||||
|
assert summary["output_tokens"] == 65
|
||||||
|
assert summary["total_tokens"] == 245
|
||||||
|
assert summary["avg_latency_s"] == 2.67
|
||||||
|
assert summary["avg_tokens_per_second"] == 12.5
|
||||||
|
assert summary["by_caller"]["heartbeat_tick"]["total_tokens"] == 170
|
||||||
|
assert summary["by_model"]["hermes4:14b"]["failed_calls"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_summarize_session_rows_separates_local_and_cloud_estimates() -> None:
|
||||||
|
rows = [
|
||||||
|
("hermes4:14b", "local", 2, 10, 4),
|
||||||
|
("claude-sonnet-4-6", "cli", 3, 9, 2),
|
||||||
|
]
|
||||||
|
|
||||||
|
summary = summarize_session_rows(rows)
|
||||||
|
|
||||||
|
assert summary["total_sessions"] == 5
|
||||||
|
assert summary["local_sessions"] == 2
|
||||||
|
assert summary["cloud_sessions"] == 3
|
||||||
|
assert summary["local_est_tokens"] == 5000
|
||||||
|
assert summary["cloud_est_tokens"] == 4500
|
||||||
|
assert summary["cloud_est_cost_usd"] > 0
|
||||||
Reference in New Issue
Block a user