Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 29s
PR Checklist / pr-checklist (pull_request) Failing after 7m23s
Smoke Test / smoke (pull_request) Failing after 20s
Validate Config / YAML Lint (pull_request) Failing after 14s
Validate Config / JSON Validate (pull_request) Successful in 15s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 1m1s
Validate Config / Shell Script Lint (pull_request) Failing after 46s
Validate Config / Cron Syntax Check (pull_request) Successful in 9s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 10s
Validate Config / Playbook Schema Validation (pull_request) Successful in 28s
Architecture Lint / Lint Repository (pull_request) Has been cancelled
Validate Config / Python Test Suite (pull_request) Has been cancelled
- Checks OpenRouter balance via /api/v1/auth/key - Tests Nous and Anthropic API keys - Verifies Ollama is running - Pre-flight check before session launch - Returns exit code for automation Closes #508
272 lines
8.7 KiB
Python
272 lines
8.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Pre-Flight Provider Check Script
|
|
Issue #508: [Robustness] Credential drain detection — provider health checks
|
|
|
|
Pre-flight check before session launch: verifies provider credentials and balance.
|
|
|
|
Usage:
|
|
python3 preflight-provider-check.py # Check all providers
|
|
python3 preflight-provider-check.py --launch # Check and return exit code
|
|
python3 preflight-provider-check.py --balance # Check OpenRouter balance
|
|
"""
|
|
|
|
import os, sys, json, yaml, urllib.request
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
# Configuration
|
|
HERMES_HOME = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
|
LOG_DIR = Path.home() / ".local" / "timmy" / "fleet-health"
|
|
LOG_FILE = LOG_DIR / "preflight-check.log"
|
|
|
|
def log(msg):
|
|
"""Log message to file and optionally console."""
|
|
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
|
log_entry = "[" + timestamp + "] " + msg
|
|
|
|
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
with open(LOG_FILE, "a") as f:
|
|
f.write(log_entry + "\n")
|
|
|
|
if "--quiet" not in sys.argv:
|
|
print(log_entry)
|
|
|
|
def get_provider_api_key(provider):
|
|
"""Get API key for a provider from .env or environment."""
|
|
env_file = HERMES_HOME / ".env"
|
|
if env_file.exists():
|
|
with open(env_file) as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line.startswith(provider.upper() + "_API_KEY="):
|
|
return line.split("=", 1)[1].strip().strip("'\"")
|
|
|
|
return os.environ.get(provider.upper() + "_API_KEY")
|
|
|
|
def check_openrouter_balance(api_key):
|
|
"""Check OpenRouter balance via /api/v1/auth/key."""
|
|
if not api_key:
|
|
return False, "No API key", 0
|
|
|
|
try:
|
|
req = urllib.request.Request(
|
|
"https://openrouter.ai/api/v1/auth/key",
|
|
headers={"Authorization": "Bearer " + api_key}
|
|
)
|
|
resp = urllib.request.urlopen(req, timeout=10)
|
|
data = json.loads(resp.read())
|
|
|
|
# Check for credits
|
|
credits = data.get("data", {}).get("limit", 0)
|
|
usage = data.get("data", {}).get("usage", 0)
|
|
remaining = credits - usage if credits else None
|
|
|
|
if remaining is not None and remaining <= 0:
|
|
return False, "No credits remaining", 0
|
|
elif remaining is not None:
|
|
return True, "Credits available", remaining
|
|
else:
|
|
return True, "Unlimited or unknown balance", None
|
|
|
|
except urllib.error.HTTPError as e:
|
|
if e.code == 401:
|
|
return False, "Invalid API key", 0
|
|
else:
|
|
return False, "HTTP " + str(e.code), 0
|
|
except Exception as e:
|
|
return False, str(e)[:100], 0
|
|
|
|
def check_nous_key(api_key):
|
|
"""Check Nous API key with minimal test call."""
|
|
if not api_key:
|
|
return False, "No API key"
|
|
|
|
try:
|
|
req = urllib.request.Request(
|
|
"https://inference.nousresearch.com/v1/models",
|
|
headers={"Authorization": "Bearer " + api_key}
|
|
)
|
|
resp = urllib.request.urlopen(req, timeout=10)
|
|
|
|
if resp.status == 200:
|
|
return True, "Valid key"
|
|
else:
|
|
return False, "HTTP " + str(resp.status)
|
|
|
|
except urllib.error.HTTPError as e:
|
|
if e.code == 401:
|
|
return False, "Invalid API key"
|
|
elif e.code == 403:
|
|
return False, "Forbidden"
|
|
else:
|
|
return False, "HTTP " + str(e.code)
|
|
except Exception as e:
|
|
return False, str(e)[:100]
|
|
|
|
def check_anthropic_key(api_key):
|
|
"""Check Anthropic API key with minimal test call."""
|
|
if not api_key:
|
|
return False, "No API key"
|
|
|
|
try:
|
|
req = urllib.request.Request(
|
|
"https://api.anthropic.com/v1/models",
|
|
headers={
|
|
"x-api-key": api_key,
|
|
"anthropic-version": "2023-06-01"
|
|
}
|
|
)
|
|
resp = urllib.request.urlopen(req, timeout=10)
|
|
|
|
if resp.status == 200:
|
|
return True, "Valid key"
|
|
else:
|
|
return False, "HTTP " + str(resp.status)
|
|
|
|
except urllib.error.HTTPError as e:
|
|
if e.code == 401:
|
|
return False, "Invalid API key"
|
|
elif e.code == 403:
|
|
return False, "Forbidden"
|
|
else:
|
|
return False, "HTTP " + str(e.code)
|
|
except Exception as e:
|
|
return False, str(e)[:100]
|
|
|
|
def check_ollama():
|
|
"""Check if Ollama is running."""
|
|
try:
|
|
req = urllib.request.Request("http://localhost:11434/api/tags")
|
|
resp = urllib.request.urlopen(req, timeout=5)
|
|
|
|
if resp.status == 200:
|
|
data = json.loads(resp.read())
|
|
models = data.get("models", [])
|
|
return True, str(len(models)) + " models loaded"
|
|
else:
|
|
return False, "HTTP " + str(resp.status)
|
|
|
|
except Exception as e:
|
|
return False, str(e)[:100]
|
|
|
|
def get_configured_provider():
|
|
"""Get the configured provider from global config."""
|
|
config_file = HERMES_HOME / "config.yaml"
|
|
if not config_file.exists():
|
|
return None
|
|
|
|
try:
|
|
with open(config_file) as f:
|
|
config = yaml.safe_load(f)
|
|
|
|
model_config = config.get("model", {})
|
|
if isinstance(model_config, dict):
|
|
return model_config.get("provider")
|
|
except:
|
|
pass
|
|
|
|
return None
|
|
|
|
def run_preflight_check():
|
|
"""Run pre-flight check on all providers."""
|
|
log("=== Pre-Flight Provider Check ===")
|
|
|
|
results = {}
|
|
|
|
# Check OpenRouter
|
|
or_key = get_provider_api_key("openrouter")
|
|
or_ok, or_msg, or_balance = check_openrouter_balance(or_key)
|
|
results["openrouter"] = {"healthy": or_ok, "message": or_msg, "balance": or_balance}
|
|
|
|
# Check Nous
|
|
nous_key = get_provider_api_key("nous")
|
|
nous_ok, nous_msg = check_nous_key(nous_key)
|
|
results["nous"] = {"healthy": nous_ok, "message": nous_msg}
|
|
|
|
# Check Anthropic
|
|
anthropic_key = get_provider_api_key("anthropic")
|
|
anthropic_ok, anthropic_msg = check_anthropic_key(anthropic_key)
|
|
results["anthropic"] = {"healthy": anthropic_ok, "message": anthropic_msg}
|
|
|
|
# Check Ollama
|
|
ollama_ok, ollama_msg = check_ollama()
|
|
results["ollama"] = {"healthy": ollama_ok, "message": ollama_msg}
|
|
|
|
# Get configured provider
|
|
configured = get_configured_provider()
|
|
|
|
# Summary
|
|
healthy_count = sum(1 for r in results.values() if r["healthy"])
|
|
total_count = len(results)
|
|
|
|
log("Results: " + str(healthy_count) + "/" + str(total_count) + " providers healthy")
|
|
|
|
for provider, result in results.items():
|
|
status = "HEALTHY" if result["healthy"] else "UNHEALTHY"
|
|
extra = ""
|
|
if provider == "openrouter" and result.get("balance") is not None:
|
|
extra = " (balance: " + str(result["balance"]) + ")"
|
|
|
|
log(" " + provider + ": " + status + " - " + result["message"] + extra)
|
|
|
|
if configured:
|
|
log("Configured provider: " + configured)
|
|
if configured in results and not results[configured]["healthy"]:
|
|
log("WARNING: Configured provider " + configured + " is UNHEALTHY!")
|
|
|
|
return results, configured
|
|
|
|
def check_launch_readiness():
|
|
"""Check if we're ready to launch sessions."""
|
|
results, configured = run_preflight_check()
|
|
|
|
# Check if configured provider is healthy
|
|
if configured and configured in results:
|
|
if not results[configured]["healthy"]:
|
|
log("LAUNCH BLOCKED: Configured provider " + configured + " is unhealthy")
|
|
return False, configured + " is unhealthy"
|
|
|
|
# Check if at least one provider is healthy
|
|
healthy_providers = [p for p, r in results.items() if r["healthy"]]
|
|
if not healthy_providers:
|
|
log("LAUNCH BLOCKED: No healthy providers available")
|
|
return False, "No healthy providers"
|
|
|
|
log("LAUNCH READY: " + str(len(healthy_providers)) + " healthy providers available")
|
|
return True, "Ready"
|
|
|
|
def show_balance():
|
|
"""Show OpenRouter balance."""
|
|
api_key = get_provider_api_key("openrouter")
|
|
if not api_key:
|
|
print("No OpenRouter API key found")
|
|
return
|
|
|
|
ok, msg, balance = check_openrouter_balance(api_key)
|
|
|
|
if ok:
|
|
if balance is not None:
|
|
print("OpenRouter balance: " + str(balance) + " credits")
|
|
else:
|
|
print("OpenRouter: " + msg)
|
|
else:
|
|
print("OpenRouter: " + msg)
|
|
|
|
def main():
|
|
if "--balance" in sys.argv:
|
|
show_balance()
|
|
elif "--launch" in sys.argv:
|
|
ready, message = check_launch_readiness()
|
|
if ready:
|
|
print("READY")
|
|
sys.exit(0)
|
|
else:
|
|
print("BLOCKED: " + message)
|
|
sys.exit(1)
|
|
else:
|
|
run_preflight_check()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|