feat: add Claude quota tracker and metabolic mode advisor (#1074 )

Add two tools from the March 23 operational briefing: - src/infrastructure/claude_quota.py: SQLite-backed tracker for Claude API usage (tokens, cost, calls) per day/month. Exposes current_mode() which returns BURST / ACTIVE / RESTING based on daily spend thresholds, enabling the orchestrator to route inference requests according to the metabolic protocol (issue #972). - scripts/claude_quota_check.sh: CLI wrapper with --mode (print mode only) and --json (machine-readable) flags for quick quota inspection from the shell or CI scripts. - tests/infrastructure/test_claude_quota.py: 19 unit tests covering cost calculation, mode thresholds, store CRUD, and convenience functions. Refs #1074
2026-03-23 11:18:13 -04:00
3 changed files with 507 additions and 0 deletions
--- a/scripts/claude_quota_check.sh
+++ b/scripts/claude_quota_check.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+# claude_quota_check.sh — Quick CLI check of Claude API quota and metabolic mode.
+#
+# Usage:
+#   ./scripts/claude_quota_check.sh          # Human-readable report
+#   ./scripts/claude_quota_check.sh --mode   # Print current mode only (BURST/ACTIVE/RESTING)
+#   ./scripts/claude_quota_check.sh --json   # JSON output for scripting
+#
+# Refs: #1074, #972
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+SRC="${REPO_ROOT}/src"
+
+# Ensure we can import the project Python modules
+export PYTHONPATH="${SRC}:${PYTHONPATH:-}"
+
+MODE_ONLY=0
+JSON_OUTPUT=0
+
+for arg in "$@"; do
+  case "$arg" in
+    --mode) MODE_ONLY=1 ;;
+    --json) JSON_OUTPUT=1 ;;
+    -h|--help)
+      echo "Usage: $0 [--mode|--json]"
+      echo "  (no flags)  Human-readable quota report"
+      echo "  --mode      Print current metabolic mode only"
+      echo "  --json      JSON output for scripting"
+      exit 0
+      ;;
+    *)
+      echo "Unknown flag: $arg" >&2
+      exit 1
+      ;;
+  esac
+done
+
+if [[ $MODE_ONLY -eq 1 ]]; then
+  python3 - <<'PYEOF'
+from infrastructure.claude_quota import current_mode
+print(current_mode())
+PYEOF
+
+elif [[ $JSON_OUTPUT -eq 1 ]]; then
+  python3 - <<'PYEOF'
+import json
+from infrastructure.claude_quota import get_quota_store
+store = get_quota_store()
+today = store.today_summary()
+month = store.month_summary()
+print(json.dumps({
+    "today": today.as_dict(),
+    "month": month.as_dict(),
+    "current_mode": today.mode,
+}))
+PYEOF
+
+else
+  python3 - <<'PYEOF'
+from infrastructure.claude_quota import quota_report
+print(quota_report())
+PYEOF
+fi
--- a/src/infrastructure/claude_quota.py
+++ b/src/infrastructure/claude_quota.py
@@ -0,0 +1,302 @@
+"""Claude API quota tracker and metabolic mode advisor.
+
+Tracks Claude API usage (tokens, cost, calls) in a local SQLite database.
+Provides a metabolic mode recommendation (BURST / ACTIVE / RESTING) based on
+daily spend thresholds so the orchestrator can decide when to use cloud inference
+vs. local Ollama.
+
+Metabolic protocol (from issue #1074):
+  BURST   — daily spend < burst_threshold    → use Claude freely
+  ACTIVE  — daily spend < active_threshold   → prefer Groq / cheap tier
+  RESTING — daily spend >= active_threshold  → local only, no API calls
+
+Refs: #1074, #972
+"""
+
+import json
+import logging
+import sqlite3
+from contextlib import closing
+from dataclasses import dataclass, field
+from datetime import UTC, date, datetime
+from pathlib import Path
+from typing import Literal
+
+from config import settings
+
+logger = logging.getLogger(__name__)
+
+# ── Cost table (USD per million tokens, approximate) ─────────────────────────
+_MODEL_COSTS: dict[str, dict[str, float]] = {
+    # haiku aliases
+    "haiku": {"input": 0.25, "output": 1.25},
+    "claude-haiku-4-5": {"input": 0.25, "output": 1.25},
+    "claude-haiku-4-5-20251001": {"input": 0.25, "output": 1.25},
+    # sonnet aliases
+    "sonnet": {"input": 3.00, "output": 15.00},
+    "claude-sonnet-4-6": {"input": 3.00, "output": 15.00},
+    # opus aliases
+    "opus": {"input": 15.00, "output": 75.00},
+    "claude-opus-4-6": {"input": 15.00, "output": 75.00},
+}
+_DEFAULT_COST = {"input": 3.00, "output": 15.00}  # conservative default
+
+MetabolicMode = Literal["BURST", "ACTIVE", "RESTING"]
+
+DB_PATH = Path(settings.repo_root) / "data" / "claude_quota.db"
+
+# Daily spend thresholds (USD) — tune via env or subclass Settings
+BURST_THRESHOLD: float = 1.00   # < $1/day  → BURST mode, use Claude freely
+ACTIVE_THRESHOLD: float = 5.00  # < $5/day  → ACTIVE mode, prefer cheaper tier
+
+_SCHEMA = """
+CREATE TABLE IF NOT EXISTS claude_calls (
+    id          INTEGER PRIMARY KEY AUTOINCREMENT,
+    ts          TEXT    NOT NULL,
+    model       TEXT    NOT NULL,
+    input_tok   INTEGER NOT NULL DEFAULT 0,
+    output_tok  INTEGER NOT NULL DEFAULT 0,
+    cost_usd    REAL    NOT NULL DEFAULT 0.0,
+    task_label  TEXT    DEFAULT '',
+    metadata    TEXT    DEFAULT '{}'
+);
+CREATE INDEX IF NOT EXISTS idx_cc_ts    ON claude_calls(ts);
+CREATE INDEX IF NOT EXISTS idx_cc_model ON claude_calls(model);
+"""
+
+
+@dataclass
+class ClaudeCall:
+    """Record of a single Claude API call."""
+
+    model: str
+    input_tokens: int
+    output_tokens: int
+    task_label: str = ""
+    ts: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
+    metadata: dict = field(default_factory=dict)
+
+    @property
+    def cost_usd(self) -> float:
+        costs = _MODEL_COSTS.get(self.model, _DEFAULT_COST)
+        return (
+            self.input_tokens * costs["input"]
+            + self.output_tokens * costs["output"]
+        ) / 1_000_000
+
+
+@dataclass
+class QuotaSummary:
+    """Aggregated quota status for a time window."""
+
+    period: str            # "today" | "month"
+    calls: int
+    input_tokens: int
+    output_tokens: int
+    cost_usd: float
+    mode: MetabolicMode
+    burst_threshold: float
+    active_threshold: float
+
+    def as_dict(self) -> dict:
+        return {
+            "period": self.period,
+            "calls": self.calls,
+            "input_tokens": self.input_tokens,
+            "output_tokens": self.output_tokens,
+            "cost_usd": round(self.cost_usd, 4),
+            "mode": self.mode,
+            "burst_threshold": self.burst_threshold,
+            "active_threshold": self.active_threshold,
+        }
+
+
+def _mode_for_cost(daily_cost: float) -> MetabolicMode:
+    if daily_cost < BURST_THRESHOLD:
+        return "BURST"
+    if daily_cost < ACTIVE_THRESHOLD:
+        return "ACTIVE"
+    return "RESTING"
+
+
+class ClaudeQuotaStore:
+    """SQLite-backed store for Claude API usage tracking.
+
+    Thread-safe: creates a new connection per operation.
+    """
+
+    def __init__(self, db_path: Path | None = None) -> None:
+        self._db_path = db_path or DB_PATH
+        self._init_db()
+
+    def _init_db(self) -> None:
+        try:
+            self._db_path.parent.mkdir(parents=True, exist_ok=True)
+            with closing(sqlite3.connect(str(self._db_path))) as conn:
+                conn.execute("PRAGMA journal_mode=WAL")
+                conn.execute(f"PRAGMA busy_timeout={settings.db_busy_timeout_ms}")
+                conn.executescript(_SCHEMA)
+                conn.commit()
+        except Exception as exc:
+            logger.warning("Failed to initialize claude_quota DB: %s", exc)
+
+    def _connect(self) -> sqlite3.Connection:
+        conn = sqlite3.connect(str(self._db_path))
+        conn.row_factory = sqlite3.Row
+        conn.execute(f"PRAGMA busy_timeout={settings.db_busy_timeout_ms}")
+        return conn
+
+    def record_call(self, call: ClaudeCall) -> None:
+        """Persist a completed Claude API call."""
+        try:
+            with closing(self._connect()) as conn:
+                conn.execute(
+                    "INSERT INTO claude_calls "
+                    "(ts, model, input_tok, output_tok, cost_usd, task_label, metadata) "
+                    "VALUES (?, ?, ?, ?, ?, ?, ?)",
+                    (
+                        call.ts,
+                        call.model,
+                        call.input_tokens,
+                        call.output_tokens,
+                        call.cost_usd,
+                        call.task_label,
+                        json.dumps(call.metadata),
+                    ),
+                )
+                conn.commit()
+        except Exception as exc:
+            logger.warning("Failed to record Claude call: %s", exc)
+
+    def _aggregate(self, where_clause: str, params: tuple) -> dict:
+        """Return aggregated stats for a WHERE clause."""
+        try:
+            with closing(self._connect()) as conn:
+                row = conn.execute(
+                    f"SELECT COUNT(*) as calls, "
+                    f"COALESCE(SUM(input_tok),0) as input_tok, "
+                    f"COALESCE(SUM(output_tok),0) as output_tok, "
+                    f"COALESCE(SUM(cost_usd),0.0) as cost_usd "
+                    f"FROM claude_calls {where_clause}",
+                    params,
+                ).fetchone()
+                if row:
+                    return dict(row)
+        except Exception as exc:
+            logger.warning("Failed to aggregate Claude quota: %s", exc)
+        return {"calls": 0, "input_tok": 0, "output_tok": 0, "cost_usd": 0.0}
+
+    def today_summary(self) -> QuotaSummary:
+        """Return quota summary for today (UTC)."""
+        today = date.today().isoformat()
+        agg = self._aggregate("WHERE ts >= ?", (today,))
+        return QuotaSummary(
+            period="today",
+            calls=agg["calls"],
+            input_tokens=agg["input_tok"],
+            output_tokens=agg["output_tok"],
+            cost_usd=agg["cost_usd"],
+            mode=_mode_for_cost(agg["cost_usd"]),
+            burst_threshold=BURST_THRESHOLD,
+            active_threshold=ACTIVE_THRESHOLD,
+        )
+
+    def month_summary(self) -> QuotaSummary:
+        """Return quota summary for the current calendar month (UTC)."""
+        month_prefix = date.today().strftime("%Y-%m")
+        agg = self._aggregate("WHERE ts >= ?", (month_prefix,))
+        return QuotaSummary(
+            period="month",
+            calls=agg["calls"],
+            input_tokens=agg["input_tok"],
+            output_tokens=agg["output_tok"],
+            cost_usd=agg["cost_usd"],
+            mode=_mode_for_cost(agg["cost_usd"] / 30),  # amortised daily
+            burst_threshold=BURST_THRESHOLD,
+            active_threshold=ACTIVE_THRESHOLD,
+        )
+
+    def current_mode(self) -> MetabolicMode:
+        """Return the current metabolic mode based on today's spend."""
+        return self.today_summary().mode
+
+
+# ── Module-level singleton ────────────────────────────────────────────────────
+_store: ClaudeQuotaStore | None = None
+
+
+def get_quota_store() -> ClaudeQuotaStore:
+    """Return the module-level quota store, creating it on first access."""
+    global _store
+    if _store is None:
+        _store = ClaudeQuotaStore()
+    return _store
+
+
+def record_usage(
+    model: str,
+    input_tokens: int,
+    output_tokens: int,
+    task_label: str = "",
+    metadata: dict | None = None,
+) -> None:
+    """Convenience function to record a Claude API call.
+
+    Silently degrades if the quota DB is unavailable.
+    """
+    call = ClaudeCall(
+        model=model,
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        task_label=task_label,
+        metadata=metadata or {},
+    )
+    get_quota_store().record_call(call)
+    logger.debug(
+        "Claude call recorded: model=%s in=%d out=%d cost=$%.4f",
+        model,
+        input_tokens,
+        output_tokens,
+        call.cost_usd,
+    )
+
+
+def current_mode() -> MetabolicMode:
+    """Return the current metabolic mode.
+
+    BURST   → Claude is cheap today, use freely.
+    ACTIVE  → Approaching daily budget, prefer Groq / cheaper tier.
+    RESTING → Daily limit reached, use local Ollama only.
+    """
+    try:
+        return get_quota_store().current_mode()
+    except Exception as exc:
+        logger.warning("Quota mode check failed, defaulting to BURST: %s", exc)
+        return "BURST"
+
+
+def quota_report() -> str:
+    """Return a human-readable quota report for CLI / dashboard display."""
+    try:
+        store = get_quota_store()
+        today = store.today_summary()
+        month = store.month_summary()
+
+        lines = [
+            "═══════════════════════════════════════",
+            "  Claude API Quota — Metabolic Report  ",
+            "═══════════════════════════════════════",
+            f"  Today     {today.calls:>6} calls  "
+            f"${today.cost_usd:>7.4f}  [{today.mode}]",
+            f"  This month {month.calls:>5} calls  "
+            f"${month.cost_usd:>7.4f}",
+            "───────────────────────────────────────",
+            f"  BURST  threshold : ${today.burst_threshold:.2f}/day",
+            f"  ACTIVE threshold : ${today.active_threshold:.2f}/day",
+            "───────────────────────────────────────",
+            f"  Current mode     : {today.mode}",
+            "═══════════════════════════════════════",
+        ]
+        return "\n".join(lines)
+    except Exception as exc:
+        return f"Quota report unavailable: {exc}"
--- a/tests/infrastructure/test_claude_quota.py
+++ b/tests/infrastructure/test_claude_quota.py
@@ -0,0 +1,139 @@
+"""Tests for the Claude quota tracker and metabolic mode advisor.
+
+Refs: #1074
+"""
+
+import pytest
+
+from infrastructure.claude_quota import (
+    ACTIVE_THRESHOLD,
+    BURST_THRESHOLD,
+    ClaudeCall,
+    ClaudeQuotaStore,
+    MetabolicMode,
+    _mode_for_cost,
+    current_mode,
+    quota_report,
+    record_usage,
+)
+
+
+@pytest.fixture
+def store(tmp_path):
+    """Fresh quota store backed by a temp DB."""
+    return ClaudeQuotaStore(db_path=tmp_path / "test_quota.db")
+
+
+# ── Unit: cost calculation ────────────────────────────────────────────────────
+
+
+class TestClaudeCallCost:
+    def test_haiku_cost(self):
+        call = ClaudeCall(model="haiku", input_tokens=1_000_000, output_tokens=0)
+        assert call.cost_usd == pytest.approx(0.25)
+
+    def test_sonnet_output_cost(self):
+        call = ClaudeCall(model="sonnet", input_tokens=0, output_tokens=1_000_000)
+        assert call.cost_usd == pytest.approx(15.00)
+
+    def test_opus_combined_cost(self):
+        call = ClaudeCall(model="opus", input_tokens=100_000, output_tokens=50_000)
+        # input: 100k * 15/1M = 1.50, output: 50k * 75/1M = 3.75 → 5.25
+        assert call.cost_usd == pytest.approx(5.25)
+
+    def test_unknown_model_uses_default(self):
+        call = ClaudeCall(model="unknown-model-xyz", input_tokens=1_000_000, output_tokens=0)
+        assert call.cost_usd == pytest.approx(3.00)  # default input cost
+
+    def test_zero_tokens_zero_cost(self):
+        call = ClaudeCall(model="haiku", input_tokens=0, output_tokens=0)
+        assert call.cost_usd == 0.0
+
+
+# ── Unit: metabolic mode thresholds ──────────────────────────────────────────
+
+
+class TestMetabolicMode:
+    def test_under_burst_threshold(self):
+        assert _mode_for_cost(0.0) == "BURST"
+        assert _mode_for_cost(BURST_THRESHOLD - 0.01) == "BURST"
+
+    def test_at_burst_threshold_is_active(self):
+        assert _mode_for_cost(BURST_THRESHOLD) == "ACTIVE"
+
+    def test_between_thresholds(self):
+        mid = (BURST_THRESHOLD + ACTIVE_THRESHOLD) / 2
+        assert _mode_for_cost(mid) == "ACTIVE"
+
+    def test_at_active_threshold_is_resting(self):
+        assert _mode_for_cost(ACTIVE_THRESHOLD) == "RESTING"
+
+    def test_over_active_threshold(self):
+        assert _mode_for_cost(ACTIVE_THRESHOLD + 10) == "RESTING"
+
+
+# ── Store: record and query ───────────────────────────────────────────────────
+
+
+class TestClaudeQuotaStore:
+    def test_record_call(self, store):
+        call = ClaudeCall(model="haiku", input_tokens=1000, output_tokens=500)
+        store.record_call(call)
+        summary = store.today_summary()
+        assert summary.calls == 1
+        assert summary.input_tokens == 1000
+        assert summary.output_tokens == 500
+        assert summary.cost_usd > 0
+
+    def test_today_summary_empty_db(self, store):
+        summary = store.today_summary()
+        assert summary.calls == 0
+        assert summary.cost_usd == 0.0
+        assert summary.mode == "BURST"
+
+    def test_month_summary_aggregates_multiple_calls(self, store):
+        for _ in range(5):
+            store.record_call(ClaudeCall(model="haiku", input_tokens=100, output_tokens=50))
+        month = store.month_summary()
+        assert month.calls == 5
+        assert month.input_tokens == 500
+        assert month.output_tokens == 250
+
+    def test_current_mode_burst_when_empty(self, store):
+        assert store.current_mode() == "BURST"
+
+    def test_current_mode_resting_when_expensive(self, store):
+        # Record enough usage to push past ACTIVE_THRESHOLD
+        # ACTIVE_THRESHOLD = 5.00, opus input = 15/1M
+        # Need >5.00: 5.00/15 * 1M ≈ 333_334 input tokens
+        store.record_call(
+            ClaudeCall(model="opus", input_tokens=400_000, output_tokens=0)
+        )
+        mode = store.current_mode()
+        assert mode == "RESTING"
+
+    def test_summary_as_dict(self, store):
+        summary = store.today_summary()
+        d = summary.as_dict()
+        assert "period" in d
+        assert "calls" in d
+        assert "cost_usd" in d
+        assert "mode" in d
+
+
+# ── Convenience functions ─────────────────────────────────────────────────────
+
+
+class TestConvenienceFunctions:
+    def test_record_usage_does_not_raise(self):
+        # Uses module-level store; should not raise even if DB path issues
+        record_usage(model="haiku", input_tokens=10, output_tokens=5, task_label="test")
+
+    def test_current_mode_returns_valid_mode(self):
+        mode = current_mode()
+        assert mode in ("BURST", "ACTIVE", "RESTING")
+
+    def test_quota_report_returns_string(self):
+        report = quota_report()
+        assert isinstance(report, str)
+        assert "BURST" in report or "ACTIVE" in report or "RESTING" in report