diff --git a/scripts/claude_quota_check.sh b/scripts/claude_quota_check.sh new file mode 100755 index 00000000..a11c281c --- /dev/null +++ b/scripts/claude_quota_check.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# claude_quota_check.sh — Quick CLI check of Claude API quota and metabolic mode. +# +# Usage: +# ./scripts/claude_quota_check.sh # Human-readable report +# ./scripts/claude_quota_check.sh --mode # Print current mode only (BURST/ACTIVE/RESTING) +# ./scripts/claude_quota_check.sh --json # JSON output for scripting +# +# Refs: #1074, #972 + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +SRC="${REPO_ROOT}/src" + +# Ensure we can import the project Python modules +export PYTHONPATH="${SRC}:${PYTHONPATH:-}" + +MODE_ONLY=0 +JSON_OUTPUT=0 + +for arg in "$@"; do + case "$arg" in + --mode) MODE_ONLY=1 ;; + --json) JSON_OUTPUT=1 ;; + -h|--help) + echo "Usage: $0 [--mode|--json]" + echo " (no flags) Human-readable quota report" + echo " --mode Print current metabolic mode only" + echo " --json JSON output for scripting" + exit 0 + ;; + *) + echo "Unknown flag: $arg" >&2 + exit 1 + ;; + esac +done + +if [[ $MODE_ONLY -eq 1 ]]; then + python3 - <<'PYEOF' +from infrastructure.claude_quota import current_mode +print(current_mode()) +PYEOF + +elif [[ $JSON_OUTPUT -eq 1 ]]; then + python3 - <<'PYEOF' +import json +from infrastructure.claude_quota import get_quota_store +store = get_quota_store() +today = store.today_summary() +month = store.month_summary() +print(json.dumps({ + "today": today.as_dict(), + "month": month.as_dict(), + "current_mode": today.mode, +})) +PYEOF + +else + python3 - <<'PYEOF' +from infrastructure.claude_quota import quota_report +print(quota_report()) +PYEOF +fi diff --git a/src/infrastructure/claude_quota.py b/src/infrastructure/claude_quota.py new file mode 100644 index 00000000..ef034282 --- /dev/null +++ b/src/infrastructure/claude_quota.py @@ -0,0 +1,302 @@ +"""Claude API quota tracker and metabolic mode advisor. + +Tracks Claude API usage (tokens, cost, calls) in a local SQLite database. +Provides a metabolic mode recommendation (BURST / ACTIVE / RESTING) based on +daily spend thresholds so the orchestrator can decide when to use cloud inference +vs. local Ollama. + +Metabolic protocol (from issue #1074): + BURST — daily spend < burst_threshold → use Claude freely + ACTIVE — daily spend < active_threshold → prefer Groq / cheap tier + RESTING — daily spend >= active_threshold → local only, no API calls + +Refs: #1074, #972 +""" + +import json +import logging +import sqlite3 +from contextlib import closing +from dataclasses import dataclass, field +from datetime import UTC, date, datetime +from pathlib import Path +from typing import Literal + +from config import settings + +logger = logging.getLogger(__name__) + +# ── Cost table (USD per million tokens, approximate) ───────────────────────── +_MODEL_COSTS: dict[str, dict[str, float]] = { + # haiku aliases + "haiku": {"input": 0.25, "output": 1.25}, + "claude-haiku-4-5": {"input": 0.25, "output": 1.25}, + "claude-haiku-4-5-20251001": {"input": 0.25, "output": 1.25}, + # sonnet aliases + "sonnet": {"input": 3.00, "output": 15.00}, + "claude-sonnet-4-6": {"input": 3.00, "output": 15.00}, + # opus aliases + "opus": {"input": 15.00, "output": 75.00}, + "claude-opus-4-6": {"input": 15.00, "output": 75.00}, +} +_DEFAULT_COST = {"input": 3.00, "output": 15.00} # conservative default + +MetabolicMode = Literal["BURST", "ACTIVE", "RESTING"] + +DB_PATH = Path(settings.repo_root) / "data" / "claude_quota.db" + +# Daily spend thresholds (USD) — tune via env or subclass Settings +BURST_THRESHOLD: float = 1.00 # < $1/day → BURST mode, use Claude freely +ACTIVE_THRESHOLD: float = 5.00 # < $5/day → ACTIVE mode, prefer cheaper tier + +_SCHEMA = """ +CREATE TABLE IF NOT EXISTS claude_calls ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + ts TEXT NOT NULL, + model TEXT NOT NULL, + input_tok INTEGER NOT NULL DEFAULT 0, + output_tok INTEGER NOT NULL DEFAULT 0, + cost_usd REAL NOT NULL DEFAULT 0.0, + task_label TEXT DEFAULT '', + metadata TEXT DEFAULT '{}' +); +CREATE INDEX IF NOT EXISTS idx_cc_ts ON claude_calls(ts); +CREATE INDEX IF NOT EXISTS idx_cc_model ON claude_calls(model); +""" + + +@dataclass +class ClaudeCall: + """Record of a single Claude API call.""" + + model: str + input_tokens: int + output_tokens: int + task_label: str = "" + ts: str = field(default_factory=lambda: datetime.now(UTC).isoformat()) + metadata: dict = field(default_factory=dict) + + @property + def cost_usd(self) -> float: + costs = _MODEL_COSTS.get(self.model, _DEFAULT_COST) + return ( + self.input_tokens * costs["input"] + + self.output_tokens * costs["output"] + ) / 1_000_000 + + +@dataclass +class QuotaSummary: + """Aggregated quota status for a time window.""" + + period: str # "today" | "month" + calls: int + input_tokens: int + output_tokens: int + cost_usd: float + mode: MetabolicMode + burst_threshold: float + active_threshold: float + + def as_dict(self) -> dict: + return { + "period": self.period, + "calls": self.calls, + "input_tokens": self.input_tokens, + "output_tokens": self.output_tokens, + "cost_usd": round(self.cost_usd, 4), + "mode": self.mode, + "burst_threshold": self.burst_threshold, + "active_threshold": self.active_threshold, + } + + +def _mode_for_cost(daily_cost: float) -> MetabolicMode: + if daily_cost < BURST_THRESHOLD: + return "BURST" + if daily_cost < ACTIVE_THRESHOLD: + return "ACTIVE" + return "RESTING" + + +class ClaudeQuotaStore: + """SQLite-backed store for Claude API usage tracking. + + Thread-safe: creates a new connection per operation. + """ + + def __init__(self, db_path: Path | None = None) -> None: + self._db_path = db_path or DB_PATH + self._init_db() + + def _init_db(self) -> None: + try: + self._db_path.parent.mkdir(parents=True, exist_ok=True) + with closing(sqlite3.connect(str(self._db_path))) as conn: + conn.execute("PRAGMA journal_mode=WAL") + conn.execute(f"PRAGMA busy_timeout={settings.db_busy_timeout_ms}") + conn.executescript(_SCHEMA) + conn.commit() + except Exception as exc: + logger.warning("Failed to initialize claude_quota DB: %s", exc) + + def _connect(self) -> sqlite3.Connection: + conn = sqlite3.connect(str(self._db_path)) + conn.row_factory = sqlite3.Row + conn.execute(f"PRAGMA busy_timeout={settings.db_busy_timeout_ms}") + return conn + + def record_call(self, call: ClaudeCall) -> None: + """Persist a completed Claude API call.""" + try: + with closing(self._connect()) as conn: + conn.execute( + "INSERT INTO claude_calls " + "(ts, model, input_tok, output_tok, cost_usd, task_label, metadata) " + "VALUES (?, ?, ?, ?, ?, ?, ?)", + ( + call.ts, + call.model, + call.input_tokens, + call.output_tokens, + call.cost_usd, + call.task_label, + json.dumps(call.metadata), + ), + ) + conn.commit() + except Exception as exc: + logger.warning("Failed to record Claude call: %s", exc) + + def _aggregate(self, where_clause: str, params: tuple) -> dict: + """Return aggregated stats for a WHERE clause.""" + try: + with closing(self._connect()) as conn: + row = conn.execute( + f"SELECT COUNT(*) as calls, " + f"COALESCE(SUM(input_tok),0) as input_tok, " + f"COALESCE(SUM(output_tok),0) as output_tok, " + f"COALESCE(SUM(cost_usd),0.0) as cost_usd " + f"FROM claude_calls {where_clause}", + params, + ).fetchone() + if row: + return dict(row) + except Exception as exc: + logger.warning("Failed to aggregate Claude quota: %s", exc) + return {"calls": 0, "input_tok": 0, "output_tok": 0, "cost_usd": 0.0} + + def today_summary(self) -> QuotaSummary: + """Return quota summary for today (UTC).""" + today = date.today().isoformat() + agg = self._aggregate("WHERE ts >= ?", (today,)) + return QuotaSummary( + period="today", + calls=agg["calls"], + input_tokens=agg["input_tok"], + output_tokens=agg["output_tok"], + cost_usd=agg["cost_usd"], + mode=_mode_for_cost(agg["cost_usd"]), + burst_threshold=BURST_THRESHOLD, + active_threshold=ACTIVE_THRESHOLD, + ) + + def month_summary(self) -> QuotaSummary: + """Return quota summary for the current calendar month (UTC).""" + month_prefix = date.today().strftime("%Y-%m") + agg = self._aggregate("WHERE ts >= ?", (month_prefix,)) + return QuotaSummary( + period="month", + calls=agg["calls"], + input_tokens=agg["input_tok"], + output_tokens=agg["output_tok"], + cost_usd=agg["cost_usd"], + mode=_mode_for_cost(agg["cost_usd"] / 30), # amortised daily + burst_threshold=BURST_THRESHOLD, + active_threshold=ACTIVE_THRESHOLD, + ) + + def current_mode(self) -> MetabolicMode: + """Return the current metabolic mode based on today's spend.""" + return self.today_summary().mode + + +# ── Module-level singleton ──────────────────────────────────────────────────── +_store: ClaudeQuotaStore | None = None + + +def get_quota_store() -> ClaudeQuotaStore: + """Return the module-level quota store, creating it on first access.""" + global _store + if _store is None: + _store = ClaudeQuotaStore() + return _store + + +def record_usage( + model: str, + input_tokens: int, + output_tokens: int, + task_label: str = "", + metadata: dict | None = None, +) -> None: + """Convenience function to record a Claude API call. + + Silently degrades if the quota DB is unavailable. + """ + call = ClaudeCall( + model=model, + input_tokens=input_tokens, + output_tokens=output_tokens, + task_label=task_label, + metadata=metadata or {}, + ) + get_quota_store().record_call(call) + logger.debug( + "Claude call recorded: model=%s in=%d out=%d cost=$%.4f", + model, + input_tokens, + output_tokens, + call.cost_usd, + ) + + +def current_mode() -> MetabolicMode: + """Return the current metabolic mode. + + BURST → Claude is cheap today, use freely. + ACTIVE → Approaching daily budget, prefer Groq / cheaper tier. + RESTING → Daily limit reached, use local Ollama only. + """ + try: + return get_quota_store().current_mode() + except Exception as exc: + logger.warning("Quota mode check failed, defaulting to BURST: %s", exc) + return "BURST" + + +def quota_report() -> str: + """Return a human-readable quota report for CLI / dashboard display.""" + try: + store = get_quota_store() + today = store.today_summary() + month = store.month_summary() + + lines = [ + "═══════════════════════════════════════", + " Claude API Quota — Metabolic Report ", + "═══════════════════════════════════════", + f" Today {today.calls:>6} calls " + f"${today.cost_usd:>7.4f} [{today.mode}]", + f" This month {month.calls:>5} calls " + f"${month.cost_usd:>7.4f}", + "───────────────────────────────────────", + f" BURST threshold : ${today.burst_threshold:.2f}/day", + f" ACTIVE threshold : ${today.active_threshold:.2f}/day", + "───────────────────────────────────────", + f" Current mode : {today.mode}", + "═══════════════════════════════════════", + ] + return "\n".join(lines) + except Exception as exc: + return f"Quota report unavailable: {exc}" diff --git a/tests/infrastructure/test_claude_quota.py b/tests/infrastructure/test_claude_quota.py new file mode 100644 index 00000000..7439d1cf --- /dev/null +++ b/tests/infrastructure/test_claude_quota.py @@ -0,0 +1,139 @@ +"""Tests for the Claude quota tracker and metabolic mode advisor. + +Refs: #1074 +""" + +import pytest + +from infrastructure.claude_quota import ( + ACTIVE_THRESHOLD, + BURST_THRESHOLD, + ClaudeCall, + ClaudeQuotaStore, + MetabolicMode, + _mode_for_cost, + current_mode, + quota_report, + record_usage, +) + + +@pytest.fixture +def store(tmp_path): + """Fresh quota store backed by a temp DB.""" + return ClaudeQuotaStore(db_path=tmp_path / "test_quota.db") + + +# ── Unit: cost calculation ──────────────────────────────────────────────────── + + +class TestClaudeCallCost: + def test_haiku_cost(self): + call = ClaudeCall(model="haiku", input_tokens=1_000_000, output_tokens=0) + assert call.cost_usd == pytest.approx(0.25) + + def test_sonnet_output_cost(self): + call = ClaudeCall(model="sonnet", input_tokens=0, output_tokens=1_000_000) + assert call.cost_usd == pytest.approx(15.00) + + def test_opus_combined_cost(self): + call = ClaudeCall(model="opus", input_tokens=100_000, output_tokens=50_000) + # input: 100k * 15/1M = 1.50, output: 50k * 75/1M = 3.75 → 5.25 + assert call.cost_usd == pytest.approx(5.25) + + def test_unknown_model_uses_default(self): + call = ClaudeCall(model="unknown-model-xyz", input_tokens=1_000_000, output_tokens=0) + assert call.cost_usd == pytest.approx(3.00) # default input cost + + def test_zero_tokens_zero_cost(self): + call = ClaudeCall(model="haiku", input_tokens=0, output_tokens=0) + assert call.cost_usd == 0.0 + + +# ── Unit: metabolic mode thresholds ────────────────────────────────────────── + + +class TestMetabolicMode: + def test_under_burst_threshold(self): + assert _mode_for_cost(0.0) == "BURST" + assert _mode_for_cost(BURST_THRESHOLD - 0.01) == "BURST" + + def test_at_burst_threshold_is_active(self): + assert _mode_for_cost(BURST_THRESHOLD) == "ACTIVE" + + def test_between_thresholds(self): + mid = (BURST_THRESHOLD + ACTIVE_THRESHOLD) / 2 + assert _mode_for_cost(mid) == "ACTIVE" + + def test_at_active_threshold_is_resting(self): + assert _mode_for_cost(ACTIVE_THRESHOLD) == "RESTING" + + def test_over_active_threshold(self): + assert _mode_for_cost(ACTIVE_THRESHOLD + 10) == "RESTING" + + +# ── Store: record and query ─────────────────────────────────────────────────── + + +class TestClaudeQuotaStore: + def test_record_call(self, store): + call = ClaudeCall(model="haiku", input_tokens=1000, output_tokens=500) + store.record_call(call) + summary = store.today_summary() + assert summary.calls == 1 + assert summary.input_tokens == 1000 + assert summary.output_tokens == 500 + assert summary.cost_usd > 0 + + def test_today_summary_empty_db(self, store): + summary = store.today_summary() + assert summary.calls == 0 + assert summary.cost_usd == 0.0 + assert summary.mode == "BURST" + + def test_month_summary_aggregates_multiple_calls(self, store): + for _ in range(5): + store.record_call(ClaudeCall(model="haiku", input_tokens=100, output_tokens=50)) + month = store.month_summary() + assert month.calls == 5 + assert month.input_tokens == 500 + assert month.output_tokens == 250 + + def test_current_mode_burst_when_empty(self, store): + assert store.current_mode() == "BURST" + + def test_current_mode_resting_when_expensive(self, store): + # Record enough usage to push past ACTIVE_THRESHOLD + # ACTIVE_THRESHOLD = 5.00, opus input = 15/1M + # Need >5.00: 5.00/15 * 1M ≈ 333_334 input tokens + store.record_call( + ClaudeCall(model="opus", input_tokens=400_000, output_tokens=0) + ) + mode = store.current_mode() + assert mode == "RESTING" + + def test_summary_as_dict(self, store): + summary = store.today_summary() + d = summary.as_dict() + assert "period" in d + assert "calls" in d + assert "cost_usd" in d + assert "mode" in d + + +# ── Convenience functions ───────────────────────────────────────────────────── + + +class TestConvenienceFunctions: + def test_record_usage_does_not_raise(self): + # Uses module-level store; should not raise even if DB path issues + record_usage(model="haiku", input_tokens=10, output_tokens=5, task_label="test") + + def test_current_mode_returns_valid_mode(self): + mode = current_mode() + assert mode in ("BURST", "ACTIVE", "RESTING") + + def test_quota_report_returns_string(self): + report = quota_report() + assert isinstance(report, str) + assert "BURST" in report or "ACTIVE" in report or "RESTING" in report