1
0

Compare commits

...

1 Commits

Author SHA1 Message Date
Alexander Whitestone
466683e14d feat: add Claude quota tracker and metabolic mode advisor (#1074)
Add two tools from the March 23 operational briefing:

- src/infrastructure/claude_quota.py: SQLite-backed tracker for Claude API
  usage (tokens, cost, calls) per day/month.  Exposes current_mode() which
  returns BURST / ACTIVE / RESTING based on daily spend thresholds, enabling
  the orchestrator to route inference requests according to the metabolic
  protocol (issue #972).

- scripts/claude_quota_check.sh: CLI wrapper with --mode (print mode only)
  and --json (machine-readable) flags for quick quota inspection from the
  shell or CI scripts.

- tests/infrastructure/test_claude_quota.py: 19 unit tests covering cost
  calculation, mode thresholds, store CRUD, and convenience functions.

Refs #1074
2026-03-23 11:18:13 -04:00
3 changed files with 507 additions and 0 deletions

66
scripts/claude_quota_check.sh Executable file
View File

@@ -0,0 +1,66 @@
#!/usr/bin/env bash
# claude_quota_check.sh — Quick CLI check of Claude API quota and metabolic mode.
#
# Usage:
# ./scripts/claude_quota_check.sh # Human-readable report
# ./scripts/claude_quota_check.sh --mode # Print current mode only (BURST/ACTIVE/RESTING)
# ./scripts/claude_quota_check.sh --json # JSON output for scripting
#
# Refs: #1074, #972
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
SRC="${REPO_ROOT}/src"
# Ensure we can import the project Python modules
export PYTHONPATH="${SRC}:${PYTHONPATH:-}"
MODE_ONLY=0
JSON_OUTPUT=0
for arg in "$@"; do
case "$arg" in
--mode) MODE_ONLY=1 ;;
--json) JSON_OUTPUT=1 ;;
-h|--help)
echo "Usage: $0 [--mode|--json]"
echo " (no flags) Human-readable quota report"
echo " --mode Print current metabolic mode only"
echo " --json JSON output for scripting"
exit 0
;;
*)
echo "Unknown flag: $arg" >&2
exit 1
;;
esac
done
if [[ $MODE_ONLY -eq 1 ]]; then
python3 - <<'PYEOF'
from infrastructure.claude_quota import current_mode
print(current_mode())
PYEOF
elif [[ $JSON_OUTPUT -eq 1 ]]; then
python3 - <<'PYEOF'
import json
from infrastructure.claude_quota import get_quota_store
store = get_quota_store()
today = store.today_summary()
month = store.month_summary()
print(json.dumps({
"today": today.as_dict(),
"month": month.as_dict(),
"current_mode": today.mode,
}))
PYEOF
else
python3 - <<'PYEOF'
from infrastructure.claude_quota import quota_report
print(quota_report())
PYEOF
fi

View File

@@ -0,0 +1,302 @@
"""Claude API quota tracker and metabolic mode advisor.
Tracks Claude API usage (tokens, cost, calls) in a local SQLite database.
Provides a metabolic mode recommendation (BURST / ACTIVE / RESTING) based on
daily spend thresholds so the orchestrator can decide when to use cloud inference
vs. local Ollama.
Metabolic protocol (from issue #1074):
BURST — daily spend < burst_threshold → use Claude freely
ACTIVE — daily spend < active_threshold → prefer Groq / cheap tier
RESTING — daily spend >= active_threshold → local only, no API calls
Refs: #1074, #972
"""
import json
import logging
import sqlite3
from contextlib import closing
from dataclasses import dataclass, field
from datetime import UTC, date, datetime
from pathlib import Path
from typing import Literal
from config import settings
logger = logging.getLogger(__name__)
# ── Cost table (USD per million tokens, approximate) ─────────────────────────
_MODEL_COSTS: dict[str, dict[str, float]] = {
# haiku aliases
"haiku": {"input": 0.25, "output": 1.25},
"claude-haiku-4-5": {"input": 0.25, "output": 1.25},
"claude-haiku-4-5-20251001": {"input": 0.25, "output": 1.25},
# sonnet aliases
"sonnet": {"input": 3.00, "output": 15.00},
"claude-sonnet-4-6": {"input": 3.00, "output": 15.00},
# opus aliases
"opus": {"input": 15.00, "output": 75.00},
"claude-opus-4-6": {"input": 15.00, "output": 75.00},
}
_DEFAULT_COST = {"input": 3.00, "output": 15.00} # conservative default
MetabolicMode = Literal["BURST", "ACTIVE", "RESTING"]
DB_PATH = Path(settings.repo_root) / "data" / "claude_quota.db"
# Daily spend thresholds (USD) — tune via env or subclass Settings
BURST_THRESHOLD: float = 1.00 # < $1/day → BURST mode, use Claude freely
ACTIVE_THRESHOLD: float = 5.00 # < $5/day → ACTIVE mode, prefer cheaper tier
_SCHEMA = """
CREATE TABLE IF NOT EXISTS claude_calls (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ts TEXT NOT NULL,
model TEXT NOT NULL,
input_tok INTEGER NOT NULL DEFAULT 0,
output_tok INTEGER NOT NULL DEFAULT 0,
cost_usd REAL NOT NULL DEFAULT 0.0,
task_label TEXT DEFAULT '',
metadata TEXT DEFAULT '{}'
);
CREATE INDEX IF NOT EXISTS idx_cc_ts ON claude_calls(ts);
CREATE INDEX IF NOT EXISTS idx_cc_model ON claude_calls(model);
"""
@dataclass
class ClaudeCall:
"""Record of a single Claude API call."""
model: str
input_tokens: int
output_tokens: int
task_label: str = ""
ts: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
metadata: dict = field(default_factory=dict)
@property
def cost_usd(self) -> float:
costs = _MODEL_COSTS.get(self.model, _DEFAULT_COST)
return (
self.input_tokens * costs["input"]
+ self.output_tokens * costs["output"]
) / 1_000_000
@dataclass
class QuotaSummary:
"""Aggregated quota status for a time window."""
period: str # "today" | "month"
calls: int
input_tokens: int
output_tokens: int
cost_usd: float
mode: MetabolicMode
burst_threshold: float
active_threshold: float
def as_dict(self) -> dict:
return {
"period": self.period,
"calls": self.calls,
"input_tokens": self.input_tokens,
"output_tokens": self.output_tokens,
"cost_usd": round(self.cost_usd, 4),
"mode": self.mode,
"burst_threshold": self.burst_threshold,
"active_threshold": self.active_threshold,
}
def _mode_for_cost(daily_cost: float) -> MetabolicMode:
if daily_cost < BURST_THRESHOLD:
return "BURST"
if daily_cost < ACTIVE_THRESHOLD:
return "ACTIVE"
return "RESTING"
class ClaudeQuotaStore:
"""SQLite-backed store for Claude API usage tracking.
Thread-safe: creates a new connection per operation.
"""
def __init__(self, db_path: Path | None = None) -> None:
self._db_path = db_path or DB_PATH
self._init_db()
def _init_db(self) -> None:
try:
self._db_path.parent.mkdir(parents=True, exist_ok=True)
with closing(sqlite3.connect(str(self._db_path))) as conn:
conn.execute("PRAGMA journal_mode=WAL")
conn.execute(f"PRAGMA busy_timeout={settings.db_busy_timeout_ms}")
conn.executescript(_SCHEMA)
conn.commit()
except Exception as exc:
logger.warning("Failed to initialize claude_quota DB: %s", exc)
def _connect(self) -> sqlite3.Connection:
conn = sqlite3.connect(str(self._db_path))
conn.row_factory = sqlite3.Row
conn.execute(f"PRAGMA busy_timeout={settings.db_busy_timeout_ms}")
return conn
def record_call(self, call: ClaudeCall) -> None:
"""Persist a completed Claude API call."""
try:
with closing(self._connect()) as conn:
conn.execute(
"INSERT INTO claude_calls "
"(ts, model, input_tok, output_tok, cost_usd, task_label, metadata) "
"VALUES (?, ?, ?, ?, ?, ?, ?)",
(
call.ts,
call.model,
call.input_tokens,
call.output_tokens,
call.cost_usd,
call.task_label,
json.dumps(call.metadata),
),
)
conn.commit()
except Exception as exc:
logger.warning("Failed to record Claude call: %s", exc)
def _aggregate(self, where_clause: str, params: tuple) -> dict:
"""Return aggregated stats for a WHERE clause."""
try:
with closing(self._connect()) as conn:
row = conn.execute(
f"SELECT COUNT(*) as calls, "
f"COALESCE(SUM(input_tok),0) as input_tok, "
f"COALESCE(SUM(output_tok),0) as output_tok, "
f"COALESCE(SUM(cost_usd),0.0) as cost_usd "
f"FROM claude_calls {where_clause}",
params,
).fetchone()
if row:
return dict(row)
except Exception as exc:
logger.warning("Failed to aggregate Claude quota: %s", exc)
return {"calls": 0, "input_tok": 0, "output_tok": 0, "cost_usd": 0.0}
def today_summary(self) -> QuotaSummary:
"""Return quota summary for today (UTC)."""
today = date.today().isoformat()
agg = self._aggregate("WHERE ts >= ?", (today,))
return QuotaSummary(
period="today",
calls=agg["calls"],
input_tokens=agg["input_tok"],
output_tokens=agg["output_tok"],
cost_usd=agg["cost_usd"],
mode=_mode_for_cost(agg["cost_usd"]),
burst_threshold=BURST_THRESHOLD,
active_threshold=ACTIVE_THRESHOLD,
)
def month_summary(self) -> QuotaSummary:
"""Return quota summary for the current calendar month (UTC)."""
month_prefix = date.today().strftime("%Y-%m")
agg = self._aggregate("WHERE ts >= ?", (month_prefix,))
return QuotaSummary(
period="month",
calls=agg["calls"],
input_tokens=agg["input_tok"],
output_tokens=agg["output_tok"],
cost_usd=agg["cost_usd"],
mode=_mode_for_cost(agg["cost_usd"] / 30), # amortised daily
burst_threshold=BURST_THRESHOLD,
active_threshold=ACTIVE_THRESHOLD,
)
def current_mode(self) -> MetabolicMode:
"""Return the current metabolic mode based on today's spend."""
return self.today_summary().mode
# ── Module-level singleton ────────────────────────────────────────────────────
_store: ClaudeQuotaStore | None = None
def get_quota_store() -> ClaudeQuotaStore:
"""Return the module-level quota store, creating it on first access."""
global _store
if _store is None:
_store = ClaudeQuotaStore()
return _store
def record_usage(
model: str,
input_tokens: int,
output_tokens: int,
task_label: str = "",
metadata: dict | None = None,
) -> None:
"""Convenience function to record a Claude API call.
Silently degrades if the quota DB is unavailable.
"""
call = ClaudeCall(
model=model,
input_tokens=input_tokens,
output_tokens=output_tokens,
task_label=task_label,
metadata=metadata or {},
)
get_quota_store().record_call(call)
logger.debug(
"Claude call recorded: model=%s in=%d out=%d cost=$%.4f",
model,
input_tokens,
output_tokens,
call.cost_usd,
)
def current_mode() -> MetabolicMode:
"""Return the current metabolic mode.
BURST → Claude is cheap today, use freely.
ACTIVE → Approaching daily budget, prefer Groq / cheaper tier.
RESTING → Daily limit reached, use local Ollama only.
"""
try:
return get_quota_store().current_mode()
except Exception as exc:
logger.warning("Quota mode check failed, defaulting to BURST: %s", exc)
return "BURST"
def quota_report() -> str:
"""Return a human-readable quota report for CLI / dashboard display."""
try:
store = get_quota_store()
today = store.today_summary()
month = store.month_summary()
lines = [
"═══════════════════════════════════════",
" Claude API Quota — Metabolic Report ",
"═══════════════════════════════════════",
f" Today {today.calls:>6} calls "
f"${today.cost_usd:>7.4f} [{today.mode}]",
f" This month {month.calls:>5} calls "
f"${month.cost_usd:>7.4f}",
"───────────────────────────────────────",
f" BURST threshold : ${today.burst_threshold:.2f}/day",
f" ACTIVE threshold : ${today.active_threshold:.2f}/day",
"───────────────────────────────────────",
f" Current mode : {today.mode}",
"═══════════════════════════════════════",
]
return "\n".join(lines)
except Exception as exc:
return f"Quota report unavailable: {exc}"

View File

@@ -0,0 +1,139 @@
"""Tests for the Claude quota tracker and metabolic mode advisor.
Refs: #1074
"""
import pytest
from infrastructure.claude_quota import (
ACTIVE_THRESHOLD,
BURST_THRESHOLD,
ClaudeCall,
ClaudeQuotaStore,
MetabolicMode,
_mode_for_cost,
current_mode,
quota_report,
record_usage,
)
@pytest.fixture
def store(tmp_path):
"""Fresh quota store backed by a temp DB."""
return ClaudeQuotaStore(db_path=tmp_path / "test_quota.db")
# ── Unit: cost calculation ────────────────────────────────────────────────────
class TestClaudeCallCost:
def test_haiku_cost(self):
call = ClaudeCall(model="haiku", input_tokens=1_000_000, output_tokens=0)
assert call.cost_usd == pytest.approx(0.25)
def test_sonnet_output_cost(self):
call = ClaudeCall(model="sonnet", input_tokens=0, output_tokens=1_000_000)
assert call.cost_usd == pytest.approx(15.00)
def test_opus_combined_cost(self):
call = ClaudeCall(model="opus", input_tokens=100_000, output_tokens=50_000)
# input: 100k * 15/1M = 1.50, output: 50k * 75/1M = 3.75 → 5.25
assert call.cost_usd == pytest.approx(5.25)
def test_unknown_model_uses_default(self):
call = ClaudeCall(model="unknown-model-xyz", input_tokens=1_000_000, output_tokens=0)
assert call.cost_usd == pytest.approx(3.00) # default input cost
def test_zero_tokens_zero_cost(self):
call = ClaudeCall(model="haiku", input_tokens=0, output_tokens=0)
assert call.cost_usd == 0.0
# ── Unit: metabolic mode thresholds ──────────────────────────────────────────
class TestMetabolicMode:
def test_under_burst_threshold(self):
assert _mode_for_cost(0.0) == "BURST"
assert _mode_for_cost(BURST_THRESHOLD - 0.01) == "BURST"
def test_at_burst_threshold_is_active(self):
assert _mode_for_cost(BURST_THRESHOLD) == "ACTIVE"
def test_between_thresholds(self):
mid = (BURST_THRESHOLD + ACTIVE_THRESHOLD) / 2
assert _mode_for_cost(mid) == "ACTIVE"
def test_at_active_threshold_is_resting(self):
assert _mode_for_cost(ACTIVE_THRESHOLD) == "RESTING"
def test_over_active_threshold(self):
assert _mode_for_cost(ACTIVE_THRESHOLD + 10) == "RESTING"
# ── Store: record and query ───────────────────────────────────────────────────
class TestClaudeQuotaStore:
def test_record_call(self, store):
call = ClaudeCall(model="haiku", input_tokens=1000, output_tokens=500)
store.record_call(call)
summary = store.today_summary()
assert summary.calls == 1
assert summary.input_tokens == 1000
assert summary.output_tokens == 500
assert summary.cost_usd > 0
def test_today_summary_empty_db(self, store):
summary = store.today_summary()
assert summary.calls == 0
assert summary.cost_usd == 0.0
assert summary.mode == "BURST"
def test_month_summary_aggregates_multiple_calls(self, store):
for _ in range(5):
store.record_call(ClaudeCall(model="haiku", input_tokens=100, output_tokens=50))
month = store.month_summary()
assert month.calls == 5
assert month.input_tokens == 500
assert month.output_tokens == 250
def test_current_mode_burst_when_empty(self, store):
assert store.current_mode() == "BURST"
def test_current_mode_resting_when_expensive(self, store):
# Record enough usage to push past ACTIVE_THRESHOLD
# ACTIVE_THRESHOLD = 5.00, opus input = 15/1M
# Need >5.00: 5.00/15 * 1M ≈ 333_334 input tokens
store.record_call(
ClaudeCall(model="opus", input_tokens=400_000, output_tokens=0)
)
mode = store.current_mode()
assert mode == "RESTING"
def test_summary_as_dict(self, store):
summary = store.today_summary()
d = summary.as_dict()
assert "period" in d
assert "calls" in d
assert "cost_usd" in d
assert "mode" in d
# ── Convenience functions ─────────────────────────────────────────────────────
class TestConvenienceFunctions:
def test_record_usage_does_not_raise(self):
# Uses module-level store; should not raise even if DB path issues
record_usage(model="haiku", input_tokens=10, output_tokens=5, task_label="test")
def test_current_mode_returns_valid_mode(self):
mode = current_mode()
assert mode in ("BURST", "ACTIVE", "RESTING")
def test_quota_report_returns_string(self):
report = quota_report()
assert isinstance(report, str)
assert "BURST" in report or "ACTIVE" in report or "RESTING" in report