Adds `QuotaMonitor` class (src/infrastructure/claude_quota.py) that reads the Claude Code OAuth token from macOS Keychain, calls the Anthropic usage API with 30s caching, and applies the Metabolic Protocol to auto-select the right inference tier: - BURST (5h < 50%): cloud available for high-value tasks - ACTIVE (5h 50-80%): local Qwen3-14B only - RESTING (7d > 80%): local Qwen3-8B only `select_model(task_complexity)` returns an Ollama tag or "claude-sonnet-4-6". `should_use_cloud(task_value)` provides a boolean gate for cloud calls. Integrates into cascade.py: before routing to anthropic/openai/grok providers the router calls `_quota_allows_cloud()`, skipping cloud when quota is low. Degrades gracefully on Linux (no Keychain) — returns local-only defaults. Also adds `scripts/claude_quota_check.sh`: CLI tool with color-coded usage bars, `--json` and `--watch` modes for monitoring from the terminal. Fixes #1075 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
187 lines
6.1 KiB
Bash
Executable File
187 lines
6.1 KiB
Bash
Executable File
#!/bin/bash
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# claude_quota_check.sh — Check Claude Code / Claude.ai quota
|
|
#
|
|
# Usage:
|
|
# ./claude_quota_check.sh # Human-readable output
|
|
# ./claude_quota_check.sh --json # Raw JSON for piping
|
|
# ./claude_quota_check.sh --watch # Refresh every 60s
|
|
#
|
|
# Requires: macOS with Claude Code authenticated, python3
|
|
# Token is read from macOS Keychain (same as Claude Code uses)
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
set -euo pipefail
|
|
|
|
# ── Extract OAuth token from macOS Keychain ──
|
|
get_token() {
|
|
local creds
|
|
creds=$(security find-generic-password -s "Claude Code-credentials" -w 2>/dev/null) || {
|
|
echo "ERROR: No Claude Code credentials found in Keychain." >&2
|
|
echo "Run 'claude' and authenticate first." >&2
|
|
exit 1
|
|
}
|
|
|
|
echo "$creds" | python3 -c "
|
|
import sys, json
|
|
data = json.load(sys.stdin)
|
|
oauth = data.get('claudeAiOauth', data)
|
|
print(oauth['accessToken'])
|
|
" 2>/dev/null || {
|
|
echo "ERROR: Could not parse credentials JSON." >&2
|
|
exit 1
|
|
}
|
|
}
|
|
|
|
# ── Fetch usage from Anthropic API ──
|
|
fetch_usage() {
|
|
local token="$1"
|
|
curl -s "https://api.anthropic.com/api/oauth/usage" \
|
|
-H "Accept: application/json" \
|
|
-H "Content-Type: application/json" \
|
|
-H "User-Agent: claude-code/2.0.32" \
|
|
-H "Authorization: Bearer ${token}" \
|
|
-H "anthropic-beta: oauth-2025-04-20"
|
|
}
|
|
|
|
# ── Format time remaining ──
|
|
time_remaining() {
|
|
local reset_at="$1"
|
|
if [ -z "$reset_at" ] || [ "$reset_at" = "null" ]; then
|
|
echo "unknown"
|
|
return
|
|
fi
|
|
|
|
python3 -c "
|
|
from datetime import datetime, timezone
|
|
reset = datetime.fromisoformat('${reset_at}'.replace('Z', '+00:00'))
|
|
now = datetime.now(timezone.utc)
|
|
diff = reset - now
|
|
if diff.total_seconds() <= 0:
|
|
print('resetting now')
|
|
else:
|
|
hours = int(diff.total_seconds() // 3600)
|
|
mins = int((diff.total_seconds() % 3600) // 60)
|
|
if hours > 0:
|
|
print(f'{hours}h {mins}m')
|
|
else:
|
|
print(f'{mins}m')
|
|
" 2>/dev/null || echo "unknown"
|
|
}
|
|
|
|
# ── Bar visualization ──
|
|
usage_bar() {
|
|
local pct=$1
|
|
local width=30
|
|
local filled
|
|
filled=$(python3 -c "print(int(${pct} * ${width}))")
|
|
local empty=$((width - filled))
|
|
|
|
# Color: green < 50%, yellow 50-80%, red > 80%
|
|
local color=""
|
|
if (( $(echo "$pct < 0.50" | bc -l) )); then
|
|
color="\033[32m" # green
|
|
elif (( $(echo "$pct < 0.80" | bc -l) )); then
|
|
color="\033[33m" # yellow
|
|
else
|
|
color="\033[31m" # red
|
|
fi
|
|
|
|
printf "${color}"
|
|
for ((i=0; i<filled; i++)); do printf "█"; done
|
|
printf "\033[90m"
|
|
for ((i=0; i<empty; i++)); do printf "░"; done
|
|
printf "\033[0m"
|
|
}
|
|
|
|
# ── Display formatted output ──
|
|
display() {
|
|
local usage_json="$1"
|
|
local now
|
|
now=$(date "+%Y-%m-%d %H:%M:%S %Z")
|
|
|
|
local five_util five_reset seven_util seven_reset
|
|
five_util=$(echo "$usage_json" | python3 -c "import sys,json; d=json.load(sys.stdin); h=d.get('five_hour') or {}; print(h.get('utilization', 0))" 2>/dev/null || echo "0")
|
|
five_reset=$(echo "$usage_json" | python3 -c "import sys,json; d=json.load(sys.stdin); h=d.get('five_hour') or {}; print(h.get('resets_at', 'null'))" 2>/dev/null || echo "null")
|
|
seven_util=$(echo "$usage_json" | python3 -c "import sys,json; d=json.load(sys.stdin); h=d.get('seven_day') or {}; print(h.get('utilization', 0))" 2>/dev/null || echo "0")
|
|
seven_reset=$(echo "$usage_json" | python3 -c "import sys,json; d=json.load(sys.stdin); h=d.get('seven_day') or {}; print(h.get('resets_at', 'null'))" 2>/dev/null || echo "null")
|
|
|
|
local five_pct seven_pct
|
|
five_pct=$(python3 -c "print(int(float('${five_util}') * 100))")
|
|
seven_pct=$(python3 -c "print(int(float('${seven_util}') * 100))")
|
|
|
|
local five_remaining seven_remaining
|
|
five_remaining=$(time_remaining "$five_reset")
|
|
seven_remaining=$(time_remaining "$seven_reset")
|
|
|
|
echo ""
|
|
echo " ┌─────────────────────────────────────────────┐"
|
|
echo " │ CLAUDE QUOTA STATUS │"
|
|
printf " │ %-38s│\n" "$now"
|
|
echo " ├─────────────────────────────────────────────┤"
|
|
printf " │ 5-hour window: "
|
|
usage_bar "$five_util"
|
|
printf " %3d%% │\n" "$five_pct"
|
|
printf " │ Resets in: %-33s│\n" "$five_remaining"
|
|
echo " │ │"
|
|
printf " │ 7-day window: "
|
|
usage_bar "$seven_util"
|
|
printf " %3d%% │\n" "$seven_pct"
|
|
printf " │ Resets in: %-33s│\n" "$seven_remaining"
|
|
echo " └─────────────────────────────────────────────┘"
|
|
echo ""
|
|
|
|
# Decision guidance for Timmy
|
|
if (( five_pct >= 80 )); then
|
|
echo " ⚠ 5-hour window critical. Switch to local Qwen3-14B."
|
|
echo " Reserve remaining quota for high-value tasks only."
|
|
elif (( five_pct >= 50 )); then
|
|
echo " ~ 5-hour window half spent. Batch remaining requests."
|
|
else
|
|
echo " ✓ 5-hour window healthy. Full speed ahead."
|
|
fi
|
|
|
|
if (( seven_pct >= 80 )); then
|
|
echo " ⚠ Weekly quota critical! Operate in local-only mode."
|
|
elif (( seven_pct >= 60 )); then
|
|
echo " ~ Weekly quota past 60%. Plan usage carefully."
|
|
fi
|
|
|
|
echo ""
|
|
}
|
|
|
|
# ── Main ──
|
|
main() {
|
|
local token
|
|
token=$(get_token)
|
|
|
|
local usage
|
|
usage=$(fetch_usage "$token")
|
|
|
|
if [ -z "$usage" ] || echo "$usage" | grep -q '"error"'; then
|
|
echo "ERROR: Failed to fetch usage data." >&2
|
|
echo "$usage" >&2
|
|
exit 1
|
|
fi
|
|
|
|
case "${1:-}" in
|
|
--json)
|
|
echo "$usage" | python3 -m json.tool
|
|
;;
|
|
--watch)
|
|
while true; do
|
|
clear
|
|
usage=$(fetch_usage "$token")
|
|
display "$usage"
|
|
echo " Refreshing in 60s... (Ctrl+C to stop)"
|
|
sleep 60
|
|
done
|
|
;;
|
|
*)
|
|
display "$usage"
|
|
;;
|
|
esac
|
|
}
|
|
|
|
main "$@"
|