from __future__ import annotations import math from datetime import datetime, timezone COST_TABLE = { "claude-opus-4-6": {"input": 15.0, "output": 75.0}, "claude-sonnet-4-6": {"input": 3.0, "output": 15.0}, "claude-sonnet-4-20250514": {"input": 3.0, "output": 15.0}, "claude-haiku-4-20250414": {"input": 0.25, "output": 1.25}, "hermes4:14b": {"input": 0.0, "output": 0.0}, "hermes3:8b": {"input": 0.0, "output": 0.0}, "hermes3:latest": {"input": 0.0, "output": 0.0}, "qwen3:30b": {"input": 0.0, "output": 0.0}, } def estimate_tokens_from_chars(char_count: int) -> int: if char_count <= 0: return 0 return math.ceil(char_count / 4) def build_local_metric_record( *, prompt: str, response: str, model: str, caller: str, session_id: str | None, latency_s: float, success: bool, error: str | None = None, ) -> dict: input_tokens = estimate_tokens_from_chars(len(prompt)) output_tokens = estimate_tokens_from_chars(len(response)) total_tokens = input_tokens + output_tokens tokens_per_second = round(total_tokens / latency_s, 2) if latency_s > 0 else None return { "timestamp": datetime.now(timezone.utc).isoformat(), "model": model, "caller": caller, "prompt_len": len(prompt), "response_len": len(response), "session_id": session_id, "latency_s": round(latency_s, 3), "est_input_tokens": input_tokens, "est_output_tokens": output_tokens, "tokens_per_second": tokens_per_second, "success": success, "error": error, } def summarize_local_metrics(records: list[dict]) -> dict: total_calls = len(records) successful_calls = sum(1 for record in records if record.get("success")) failed_calls = total_calls - successful_calls input_tokens = sum(int(record.get("est_input_tokens", 0) or 0) for record in records) output_tokens = sum(int(record.get("est_output_tokens", 0) or 0) for record in records) total_tokens = input_tokens + output_tokens latencies = [float(record.get("latency_s", 0) or 0) for record in records if record.get("latency_s") is not None] throughputs = [ float(record.get("tokens_per_second", 0) or 0) for record in records if record.get("tokens_per_second") ] by_caller: dict[str, dict] = {} by_model: dict[str, dict] = {} for record in records: caller = record.get("caller", "unknown") model = record.get("model", "unknown") bucket_tokens = int(record.get("est_input_tokens", 0) or 0) + int(record.get("est_output_tokens", 0) or 0) for key, table in ((caller, by_caller), (model, by_model)): if key not in table: table[key] = {"calls": 0, "successful_calls": 0, "failed_calls": 0, "total_tokens": 0} table[key]["calls"] += 1 table[key]["total_tokens"] += bucket_tokens if record.get("success"): table[key]["successful_calls"] += 1 else: table[key]["failed_calls"] += 1 return { "total_calls": total_calls, "successful_calls": successful_calls, "failed_calls": failed_calls, "input_tokens": input_tokens, "output_tokens": output_tokens, "total_tokens": total_tokens, "avg_latency_s": round(sum(latencies) / len(latencies), 2) if latencies else None, "avg_tokens_per_second": round(sum(throughputs) / len(throughputs), 2) if throughputs else None, "by_caller": by_caller, "by_model": by_model, } def is_local_model(model: str | None) -> bool: if not model: return False costs = COST_TABLE.get(model, {}) if costs.get("input", 1) == 0 and costs.get("output", 1) == 0: return True return ":" in model and "/" not in model and "claude" not in model def summarize_session_rows(rows: list[tuple]) -> dict: total_sessions = 0 local_sessions = 0 cloud_sessions = 0 local_est_tokens = 0 cloud_est_tokens = 0 cloud_est_cost_usd = 0.0 for model, source, sessions, messages, tool_calls in rows: sessions = int(sessions or 0) messages = int(messages or 0) est_tokens = messages * 500 total_sessions += sessions if is_local_model(model): local_sessions += sessions local_est_tokens += est_tokens else: cloud_sessions += sessions cloud_est_tokens += est_tokens pricing = COST_TABLE.get(model, {"input": 5.0, "output": 15.0}) cloud_est_cost_usd += (est_tokens / 1_000_000) * ((pricing["input"] + pricing["output"]) / 2) return { "total_sessions": total_sessions, "local_sessions": local_sessions, "cloud_sessions": cloud_sessions, "local_est_tokens": local_est_tokens, "cloud_est_tokens": cloud_est_tokens, "cloud_est_cost_usd": round(cloud_est_cost_usd, 4), }