Timmy-time-dashboard/src/infrastructure/claude_quota.py

"""
claude_quota.py — Claude Code / Claude.ai Quota Monitor

Drop into src/infrastructure/ in the Timmy Time Dashboard repo.

Provides real-time quota visibility and metabolic protocol decisions.

Usage:
    from infrastructure.claude_quota import QuotaMonitor

    monitor = QuotaMonitor()
    status = monitor.check()
    print(status.five_hour_pct)       # 42
    print(status.five_hour_resets_in) # "2h 15m"
    print(status.seven_day_pct)       # 29
    print(status.recommended_tier)    # MetabolicTier.BURST

    # Metabolic protocol: auto-select model based on quota
    model = monitor.select_model(task_complexity="high")
    # Returns "claude-sonnet-4-6" if quota allows, else "qwen3:14b"
"""

import json
import logging
import subprocess
import urllib.request
from dataclasses import dataclass
from datetime import UTC, datetime
from enum import StrEnum

logger = logging.getLogger(__name__)


class MetabolicTier(StrEnum):
    """The three-tier metabolic protocol from the Timmy Time architecture."""

    BURST = "burst"  # Cloud API (Claude/Groq) — expensive, best quality
    ACTIVE = "active"  # Local 14B (Qwen3-14B) — free, good quality
    RESTING = "resting"  # Local 8B (Qwen3-8B) — free, fast, adequate


@dataclass
class QuotaStatus:
    """Current Claude quota state."""

    five_hour_utilization: float  # 0.0 to 1.0
    five_hour_resets_at: str | None
    seven_day_utilization: float  # 0.0 to 1.0
    seven_day_resets_at: str | None
    raw_response: dict
    fetched_at: datetime

    @property
    def five_hour_pct(self) -> int:
        return int(self.five_hour_utilization * 100)

    @property
    def seven_day_pct(self) -> int:
        return int(self.seven_day_utilization * 100)

    @property
    def five_hour_resets_in(self) -> str:
        return _time_remaining(self.five_hour_resets_at)

    @property
    def seven_day_resets_in(self) -> str:
        return _time_remaining(self.seven_day_resets_at)

    @property
    def recommended_tier(self) -> MetabolicTier:
        """Metabolic protocol: determine which inference tier to use."""
        # If weekly quota is critical, go full local
        if self.seven_day_utilization >= 0.80:
            return MetabolicTier.RESTING
        # If 5-hour window is critical or past half, use local
        if self.five_hour_utilization >= 0.50:
            return MetabolicTier.ACTIVE
        # Quota healthy — cloud available for high-value tasks
        return MetabolicTier.BURST

    def summary(self) -> str:
        """Human-readable status string."""
        return (
            f"5h: {self.five_hour_pct}% (resets {self.five_hour_resets_in}) | "
            f"7d: {self.seven_day_pct}% (resets {self.seven_day_resets_in}) | "
            f"tier: {self.recommended_tier.value}"
        )


class QuotaMonitor:
    """
    Monitors Claude Code / Claude.ai quota via the internal OAuth API.

    The token is read from macOS Keychain where Claude Code stores it.
    Falls back gracefully if credentials aren't available (e.g., on Linux VPS).
    """

    API_URL = "https://api.anthropic.com/api/oauth/usage"
    KEYCHAIN_SERVICE = "Claude Code-credentials"
    USER_AGENT = "claude-code/2.0.32"

    def __init__(self) -> None:
        self._token: str | None = None
        self._last_status: QuotaStatus | None = None
        self._cache_seconds = 30  # Don't hammer the API

    def _get_token(self) -> str | None:
        """Extract OAuth token from macOS Keychain."""
        if self._token:
            return self._token

        try:
            result = subprocess.run(
                ["security", "find-generic-password", "-s", self.KEYCHAIN_SERVICE, "-w"],
                capture_output=True,
                text=True,
                timeout=5,
            )
            if result.returncode != 0:
                logger.warning("Claude Code credentials not found in Keychain")
                return None

            creds = json.loads(result.stdout.strip())
            oauth = creds.get("claudeAiOauth", creds)
            self._token = oauth.get("accessToken")
            return self._token

        except (
            json.JSONDecodeError,
            KeyError,
            FileNotFoundError,
            subprocess.TimeoutExpired,
        ) as exc:
            logger.warning("Could not read Claude Code credentials: %s", exc)
            return None

    def check(self, force: bool = False) -> QuotaStatus | None:
        """
        Fetch current quota status.

        Returns None if credentials aren't available (graceful degradation).
        Caches results for 30 seconds to avoid rate limiting the quota API itself.
        """
        # Return cached if fresh
        if not force and self._last_status:
            age = (datetime.now(UTC) - self._last_status.fetched_at).total_seconds()
            if age < self._cache_seconds:
                return self._last_status

        token = self._get_token()
        if not token:
            return None

        try:
            req = urllib.request.Request(
                self.API_URL,
                headers={
                    "Accept": "application/json",
                    "Content-Type": "application/json",
                    "User-Agent": self.USER_AGENT,
                    "Authorization": f"Bearer {token}",
                    "anthropic-beta": "oauth-2025-04-20",
                },
            )
            with urllib.request.urlopen(req, timeout=10) as resp:
                data = json.loads(resp.read().decode())

            five_hour = data.get("five_hour") or {}
            seven_day = data.get("seven_day") or {}

            self._last_status = QuotaStatus(
                five_hour_utilization=float(five_hour.get("utilization", 0.0)),
                five_hour_resets_at=five_hour.get("resets_at"),
                seven_day_utilization=float(seven_day.get("utilization", 0.0)),
                seven_day_resets_at=seven_day.get("resets_at"),
                raw_response=data,
                fetched_at=datetime.now(UTC),
            )
            return self._last_status

        except Exception as exc:
            logger.warning("Failed to fetch quota: %s", exc)
            return self._last_status  # Return stale data if available

    def select_model(self, task_complexity: str = "medium") -> str:
        """
        Metabolic protocol: select the right model based on quota + task complexity.

        Returns an Ollama model tag or "claude-sonnet-4-6" for cloud.

        task_complexity: "low" | "medium" | "high"
        """
        status = self.check()

        # No quota info available — assume local only (sovereign default)
        if status is None:
            return "qwen3:14b" if task_complexity == "high" else "qwen3:8b"

        tier = status.recommended_tier

        if tier == MetabolicTier.BURST and task_complexity == "high":
            return "claude-sonnet-4-6"  # Cloud — best quality
        elif tier == MetabolicTier.BURST and task_complexity == "medium":
            return "qwen3:14b"  # Save cloud for truly hard tasks
        elif tier == MetabolicTier.ACTIVE:
            return "qwen3:14b"  # Local 14B — good enough
        else:  # RESTING
            return "qwen3:8b"  # Local 8B — conserve everything

    def should_use_cloud(self, task_value: str = "normal") -> bool:
        """
        Simple yes/no: should this task use cloud API?

        task_value: "critical" | "high" | "normal" | "routine"
        """
        status = self.check()

        if status is None:
            return False  # No credentials = local only

        if task_value == "critical":
            return status.seven_day_utilization < 0.95  # Almost always yes
        elif task_value == "high":
            return status.five_hour_utilization < 0.60
        elif task_value == "normal":
            return status.five_hour_utilization < 0.30
        else:  # routine
            return False  # Never waste cloud on routine


def _time_remaining(reset_at: str | None) -> str:
    """Format time until reset as human-readable string."""
    if not reset_at or reset_at == "null":
        return "unknown"

    try:
        reset = datetime.fromisoformat(reset_at.replace("Z", "+00:00"))
        now = datetime.now(UTC)
        diff = reset - now

        if diff.total_seconds() <= 0:
            return "resetting now"

        hours = int(diff.total_seconds() // 3600)
        mins = int((diff.total_seconds() % 3600) // 60)

        if hours > 0:
            return f"{hours}h {mins}m"
        return f"{mins}m"

    except (ValueError, TypeError):
        return "unknown"


# Module-level singleton
_quota_monitor: QuotaMonitor | None = None


def get_quota_monitor() -> QuotaMonitor:
    """Get or create the quota monitor singleton."""
    global _quota_monitor
    if _quota_monitor is None:
        _quota_monitor = QuotaMonitor()
    return _quota_monitor