94 lines
2.9 KiB
Python
94 lines
2.9 KiB
Python
from metrics_helpers import (
|
|
build_local_metric_record,
|
|
estimate_tokens_from_chars,
|
|
summarize_local_metrics,
|
|
summarize_session_rows,
|
|
)
|
|
|
|
|
|
def test_estimate_tokens_from_chars_uses_simple_local_heuristic() -> None:
|
|
assert estimate_tokens_from_chars(0) == 0
|
|
assert estimate_tokens_from_chars(1) == 1
|
|
assert estimate_tokens_from_chars(4) == 1
|
|
assert estimate_tokens_from_chars(5) == 2
|
|
assert estimate_tokens_from_chars(401) == 101
|
|
|
|
|
|
def test_build_local_metric_record_adds_token_and_throughput_estimates() -> None:
|
|
record = build_local_metric_record(
|
|
prompt="abcd" * 10,
|
|
response="xyz" * 20,
|
|
model="hermes4:14b",
|
|
caller="heartbeat_tick",
|
|
session_id="session-123",
|
|
latency_s=2.0,
|
|
success=True,
|
|
)
|
|
|
|
assert record["model"] == "hermes4:14b"
|
|
assert record["caller"] == "heartbeat_tick"
|
|
assert record["session_id"] == "session-123"
|
|
assert record["est_input_tokens"] == 10
|
|
assert record["est_output_tokens"] == 15
|
|
assert record["tokens_per_second"] == 12.5
|
|
|
|
|
|
def test_summarize_local_metrics_rolls_up_tokens_and_latency() -> None:
|
|
records = [
|
|
{
|
|
"caller": "heartbeat_tick",
|
|
"model": "hermes4:14b",
|
|
"success": True,
|
|
"est_input_tokens": 100,
|
|
"est_output_tokens": 40,
|
|
"latency_s": 2.0,
|
|
"tokens_per_second": 20.0,
|
|
},
|
|
{
|
|
"caller": "heartbeat_tick",
|
|
"model": "hermes4:14b",
|
|
"success": False,
|
|
"est_input_tokens": 30,
|
|
"est_output_tokens": 0,
|
|
"latency_s": 1.0,
|
|
},
|
|
{
|
|
"caller": "session_export",
|
|
"model": "hermes3:8b",
|
|
"success": True,
|
|
"est_input_tokens": 50,
|
|
"est_output_tokens": 25,
|
|
"latency_s": 5.0,
|
|
"tokens_per_second": 5.0,
|
|
},
|
|
]
|
|
|
|
summary = summarize_local_metrics(records)
|
|
|
|
assert summary["total_calls"] == 3
|
|
assert summary["successful_calls"] == 2
|
|
assert summary["failed_calls"] == 1
|
|
assert summary["input_tokens"] == 180
|
|
assert summary["output_tokens"] == 65
|
|
assert summary["total_tokens"] == 245
|
|
assert summary["avg_latency_s"] == 2.67
|
|
assert summary["avg_tokens_per_second"] == 12.5
|
|
assert summary["by_caller"]["heartbeat_tick"]["total_tokens"] == 170
|
|
assert summary["by_model"]["hermes4:14b"]["failed_calls"] == 1
|
|
|
|
|
|
def test_summarize_session_rows_separates_local_and_cloud_estimates() -> None:
|
|
rows = [
|
|
("hermes4:14b", "local", 2, 10, 4),
|
|
("claude-sonnet-4-6", "cli", 3, 9, 2),
|
|
]
|
|
|
|
summary = summarize_session_rows(rows)
|
|
|
|
assert summary["total_sessions"] == 5
|
|
assert summary["local_sessions"] == 2
|
|
assert summary["cloud_sessions"] == 3
|
|
assert summary["local_est_tokens"] == 5000
|
|
assert summary["cloud_est_tokens"] == 4500
|
|
assert summary["cloud_est_cost_usd"] > 0
|