forked from Rockachopa/Timmy-time-dashboard
Co-authored-by: Claude (Opus 4.6) <claude@hermes.local> Co-committed-by: Claude (Opus 4.6) <claude@hermes.local>
265 lines
8.7 KiB
Python
265 lines
8.7 KiB
Python
"""
|
|
claude_quota.py — Claude Code / Claude.ai Quota Monitor
|
|
|
|
Drop into src/infrastructure/ in the Timmy Time Dashboard repo.
|
|
|
|
Provides real-time quota visibility and metabolic protocol decisions.
|
|
|
|
Usage:
|
|
from infrastructure.claude_quota import QuotaMonitor
|
|
|
|
monitor = QuotaMonitor()
|
|
status = monitor.check()
|
|
print(status.five_hour_pct) # 42
|
|
print(status.five_hour_resets_in) # "2h 15m"
|
|
print(status.seven_day_pct) # 29
|
|
print(status.recommended_tier) # MetabolicTier.BURST
|
|
|
|
# Metabolic protocol: auto-select model based on quota
|
|
model = monitor.select_model(task_complexity="high")
|
|
# Returns "claude-sonnet-4-6" if quota allows, else "qwen3:14b"
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import subprocess
|
|
import urllib.request
|
|
from dataclasses import dataclass
|
|
from datetime import UTC, datetime
|
|
from enum import StrEnum
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class MetabolicTier(StrEnum):
|
|
"""The three-tier metabolic protocol from the Timmy Time architecture."""
|
|
|
|
BURST = "burst" # Cloud API (Claude/Groq) — expensive, best quality
|
|
ACTIVE = "active" # Local 14B (Qwen3-14B) — free, good quality
|
|
RESTING = "resting" # Local 8B (Qwen3-8B) — free, fast, adequate
|
|
|
|
|
|
@dataclass
|
|
class QuotaStatus:
|
|
"""Current Claude quota state."""
|
|
|
|
five_hour_utilization: float # 0.0 to 1.0
|
|
five_hour_resets_at: str | None
|
|
seven_day_utilization: float # 0.0 to 1.0
|
|
seven_day_resets_at: str | None
|
|
raw_response: dict
|
|
fetched_at: datetime
|
|
|
|
@property
|
|
def five_hour_pct(self) -> int:
|
|
return int(self.five_hour_utilization * 100)
|
|
|
|
@property
|
|
def seven_day_pct(self) -> int:
|
|
return int(self.seven_day_utilization * 100)
|
|
|
|
@property
|
|
def five_hour_resets_in(self) -> str:
|
|
return _time_remaining(self.five_hour_resets_at)
|
|
|
|
@property
|
|
def seven_day_resets_in(self) -> str:
|
|
return _time_remaining(self.seven_day_resets_at)
|
|
|
|
@property
|
|
def recommended_tier(self) -> MetabolicTier:
|
|
"""Metabolic protocol: determine which inference tier to use."""
|
|
# If weekly quota is critical, go full local
|
|
if self.seven_day_utilization >= 0.80:
|
|
return MetabolicTier.RESTING
|
|
# If 5-hour window is critical or past half, use local
|
|
if self.five_hour_utilization >= 0.50:
|
|
return MetabolicTier.ACTIVE
|
|
# Quota healthy — cloud available for high-value tasks
|
|
return MetabolicTier.BURST
|
|
|
|
def summary(self) -> str:
|
|
"""Human-readable status string."""
|
|
return (
|
|
f"5h: {self.five_hour_pct}% (resets {self.five_hour_resets_in}) | "
|
|
f"7d: {self.seven_day_pct}% (resets {self.seven_day_resets_in}) | "
|
|
f"tier: {self.recommended_tier.value}"
|
|
)
|
|
|
|
|
|
class QuotaMonitor:
|
|
"""
|
|
Monitors Claude Code / Claude.ai quota via the internal OAuth API.
|
|
|
|
The token is read from macOS Keychain where Claude Code stores it.
|
|
Falls back gracefully if credentials aren't available (e.g., on Linux VPS).
|
|
"""
|
|
|
|
API_URL = "https://api.anthropic.com/api/oauth/usage"
|
|
KEYCHAIN_SERVICE = "Claude Code-credentials"
|
|
USER_AGENT = "claude-code/2.0.32"
|
|
|
|
def __init__(self) -> None:
|
|
self._token: str | None = None
|
|
self._last_status: QuotaStatus | None = None
|
|
self._cache_seconds = 30 # Don't hammer the API
|
|
|
|
def _get_token(self) -> str | None:
|
|
"""Extract OAuth token from macOS Keychain."""
|
|
if self._token:
|
|
return self._token
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
["security", "find-generic-password", "-s", self.KEYCHAIN_SERVICE, "-w"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=5,
|
|
)
|
|
if result.returncode != 0:
|
|
logger.warning("Claude Code credentials not found in Keychain")
|
|
return None
|
|
|
|
creds = json.loads(result.stdout.strip())
|
|
oauth = creds.get("claudeAiOauth", creds)
|
|
self._token = oauth.get("accessToken")
|
|
return self._token
|
|
|
|
except (
|
|
json.JSONDecodeError,
|
|
KeyError,
|
|
FileNotFoundError,
|
|
subprocess.TimeoutExpired,
|
|
) as exc:
|
|
logger.warning("Could not read Claude Code credentials: %s", exc)
|
|
return None
|
|
|
|
def check(self, force: bool = False) -> QuotaStatus | None:
|
|
"""
|
|
Fetch current quota status.
|
|
|
|
Returns None if credentials aren't available (graceful degradation).
|
|
Caches results for 30 seconds to avoid rate limiting the quota API itself.
|
|
"""
|
|
# Return cached if fresh
|
|
if not force and self._last_status:
|
|
age = (datetime.now(UTC) - self._last_status.fetched_at).total_seconds()
|
|
if age < self._cache_seconds:
|
|
return self._last_status
|
|
|
|
token = self._get_token()
|
|
if not token:
|
|
return None
|
|
|
|
try:
|
|
req = urllib.request.Request(
|
|
self.API_URL,
|
|
headers={
|
|
"Accept": "application/json",
|
|
"Content-Type": "application/json",
|
|
"User-Agent": self.USER_AGENT,
|
|
"Authorization": f"Bearer {token}",
|
|
"anthropic-beta": "oauth-2025-04-20",
|
|
},
|
|
)
|
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
data = json.loads(resp.read().decode())
|
|
|
|
five_hour = data.get("five_hour") or {}
|
|
seven_day = data.get("seven_day") or {}
|
|
|
|
self._last_status = QuotaStatus(
|
|
five_hour_utilization=float(five_hour.get("utilization", 0.0)),
|
|
five_hour_resets_at=five_hour.get("resets_at"),
|
|
seven_day_utilization=float(seven_day.get("utilization", 0.0)),
|
|
seven_day_resets_at=seven_day.get("resets_at"),
|
|
raw_response=data,
|
|
fetched_at=datetime.now(UTC),
|
|
)
|
|
return self._last_status
|
|
|
|
except Exception as exc:
|
|
logger.warning("Failed to fetch quota: %s", exc)
|
|
return self._last_status # Return stale data if available
|
|
|
|
def select_model(self, task_complexity: str = "medium") -> str:
|
|
"""
|
|
Metabolic protocol: select the right model based on quota + task complexity.
|
|
|
|
Returns an Ollama model tag or "claude-sonnet-4-6" for cloud.
|
|
|
|
task_complexity: "low" | "medium" | "high"
|
|
"""
|
|
status = self.check()
|
|
|
|
# No quota info available — assume local only (sovereign default)
|
|
if status is None:
|
|
return "qwen3:14b" if task_complexity == "high" else "qwen3:8b"
|
|
|
|
tier = status.recommended_tier
|
|
|
|
if tier == MetabolicTier.BURST and task_complexity == "high":
|
|
return "claude-sonnet-4-6" # Cloud — best quality
|
|
elif tier == MetabolicTier.BURST and task_complexity == "medium":
|
|
return "qwen3:14b" # Save cloud for truly hard tasks
|
|
elif tier == MetabolicTier.ACTIVE:
|
|
return "qwen3:14b" # Local 14B — good enough
|
|
else: # RESTING
|
|
return "qwen3:8b" # Local 8B — conserve everything
|
|
|
|
def should_use_cloud(self, task_value: str = "normal") -> bool:
|
|
"""
|
|
Simple yes/no: should this task use cloud API?
|
|
|
|
task_value: "critical" | "high" | "normal" | "routine"
|
|
"""
|
|
status = self.check()
|
|
|
|
if status is None:
|
|
return False # No credentials = local only
|
|
|
|
if task_value == "critical":
|
|
return status.seven_day_utilization < 0.95 # Almost always yes
|
|
elif task_value == "high":
|
|
return status.five_hour_utilization < 0.60
|
|
elif task_value == "normal":
|
|
return status.five_hour_utilization < 0.30
|
|
else: # routine
|
|
return False # Never waste cloud on routine
|
|
|
|
|
|
def _time_remaining(reset_at: str | None) -> str:
|
|
"""Format time until reset as human-readable string."""
|
|
if not reset_at or reset_at == "null":
|
|
return "unknown"
|
|
|
|
try:
|
|
reset = datetime.fromisoformat(reset_at.replace("Z", "+00:00"))
|
|
now = datetime.now(UTC)
|
|
diff = reset - now
|
|
|
|
if diff.total_seconds() <= 0:
|
|
return "resetting now"
|
|
|
|
hours = int(diff.total_seconds() // 3600)
|
|
mins = int((diff.total_seconds() % 3600) // 60)
|
|
|
|
if hours > 0:
|
|
return f"{hours}h {mins}m"
|
|
return f"{mins}m"
|
|
|
|
except (ValueError, TypeError):
|
|
return "unknown"
|
|
|
|
|
|
# Module-level singleton
|
|
_quota_monitor: QuotaMonitor | None = None
|
|
|
|
|
|
def get_quota_monitor() -> QuotaMonitor:
|
|
"""Get or create the quota monitor singleton."""
|
|
global _quota_monitor
|
|
if _quota_monitor is None:
|
|
_quota_monitor = QuotaMonitor()
|
|
return _quota_monitor
|