This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/src/infrastructure/router/api.py

228 lines
6.7 KiB
Python
Raw Normal View History

"""API endpoints for Cascade Router monitoring and control."""
import asyncio
import logging
from typing import Annotated, Any
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from .cascade import CascadeRouter, get_router
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/v1/router", tags=["router"])
class CompletionRequest(BaseModel):
"""Request body for completions."""
messages: list[dict[str, str]]
model: str | None = None
temperature: float = 0.7
max_tokens: int | None = None
class CompletionResponse(BaseModel):
"""Response from completion endpoint."""
content: str
provider: str
model: str
latency_ms: float
class ProviderControl(BaseModel):
"""Control a provider's status."""
action: str # "enable", "disable", "reset_circuit"
async def get_cascade_router() -> CascadeRouter:
"""Dependency to get the cascade router."""
return get_router()
@router.post("/complete", response_model=CompletionResponse)
async def complete(
request: CompletionRequest,
cascade: Annotated[CascadeRouter, Depends(get_cascade_router)],
) -> dict[str, Any]:
"""Complete a conversation with automatic failover.
Routes through providers in priority order until one succeeds.
"""
try:
result = await cascade.complete(
messages=request.messages,
model=request.model,
temperature=request.temperature,
max_tokens=request.max_tokens,
)
return result
except RuntimeError as exc:
ruff (#169) * polish: streamline nav, extract inline styles, improve tablet UX - Restructure desktop nav from 8+ flat links + overflow dropdown into 5 grouped dropdowns (Core, Agents, Intel, System, More) matching the mobile menu structure to reduce decision fatigue - Extract all inline styles from mission_control.html and base.html notification elements into mission-control.css with semantic classes - Replace JS-built innerHTML with secure DOM construction in notification loader and chat history - Add CONNECTING state to connection indicator (amber) instead of showing OFFLINE before WebSocket connects - Add tablet breakpoint (1024px) with larger touch targets for Apple Pencil / stylus use and safe-area padding for iPad toolbar - Add active-link highlighting in desktop dropdown menus - Rename "Mission Control" page title to "System Overview" to disambiguate from the chat home page - Add "Home — Timmy Time" page title to index.html https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * fix(security): move auth-gate credentials to environment variables Hardcoded username, password, and HMAC secret in auth-gate.py replaced with os.environ lookups. Startup now refuses to run if any variable is unset. Added AUTH_GATE_SECRET/USER/PASS to .env.example. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h * refactor(tooling): migrate from black+isort+bandit to ruff Replace three separate linting/formatting tools with a single ruff invocation. Updates tox.ini (lint, format, pre-push, pre-commit envs), .pre-commit-config.yaml, and CI workflow. Fixes all ruff errors including unused imports, missing raise-from, and undefined names. Ruff config maps existing bandit skips to equivalent S-rules. https://claude.ai/code/session_015uPUoKyYa8M2UAcyk5Gt6h --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-11 12:23:35 -04:00
raise HTTPException(status_code=503, detail=str(exc)) from exc
@router.get("/status")
async def get_status(
cascade: Annotated[CascadeRouter, Depends(get_cascade_router)],
) -> dict[str, Any]:
"""Get router status and provider health."""
return cascade.get_status()
@router.get("/metrics")
async def get_metrics(
cascade: Annotated[CascadeRouter, Depends(get_cascade_router)],
) -> dict[str, Any]:
"""Get detailed metrics for all providers."""
return cascade.get_metrics()
@router.get("/providers")
async def list_providers(
cascade: Annotated[CascadeRouter, Depends(get_cascade_router)],
) -> list[dict[str, Any]]:
"""List all configured providers."""
return [
{
"name": p.name,
"type": p.type,
"enabled": p.enabled,
"priority": p.priority,
"status": p.status.value,
"circuit_state": p.circuit_state.value,
"default_model": p.get_default_model(),
"models": [m["name"] for m in p.models],
}
for p in cascade.providers
]
@router.post("/providers/{provider_name}/control")
async def control_provider(
provider_name: str,
control: ProviderControl,
cascade: Annotated[CascadeRouter, Depends(get_cascade_router)],
) -> dict[str, str]:
"""Control a provider (enable/disable/reset)."""
provider = None
for p in cascade.providers:
if p.name == provider_name:
provider = p
break
if not provider:
raise HTTPException(status_code=404, detail=f"Provider {provider_name} not found")
if control.action == "enable":
provider.enabled = True
provider.status = provider.status.__class__.HEALTHY
return {"message": f"Provider {provider_name} enabled"}
elif control.action == "disable":
provider.enabled = False
from .cascade import ProviderStatus
provider.status = ProviderStatus.DISABLED
return {"message": f"Provider {provider_name} disabled"}
elif control.action == "reset_circuit":
from .cascade import CircuitState, ProviderStatus
provider.circuit_state = CircuitState.CLOSED
provider.circuit_opened_at = None
provider.half_open_calls = 0
provider.metrics.consecutive_failures = 0
provider.status = ProviderStatus.HEALTHY
return {"message": f"Circuit breaker reset for {provider_name}"}
else:
raise HTTPException(status_code=400, detail=f"Unknown action: {control.action}")
@router.post("/health-check")
async def run_health_check(
cascade: Annotated[CascadeRouter, Depends(get_cascade_router)],
) -> dict[str, Any]:
"""Run health checks on all providers."""
results = []
for provider in cascade.providers:
# Quick ping to check availability
is_healthy = cascade._check_provider_available(provider)
from .cascade import ProviderStatus
if is_healthy:
if provider.status == ProviderStatus.UNHEALTHY:
# Reset circuit if it was open but now healthy
provider.circuit_state = provider.circuit_state.__class__.CLOSED
provider.circuit_opened_at = None
provider.status = (
ProviderStatus.HEALTHY
if provider.metrics.error_rate < 0.1
else ProviderStatus.DEGRADED
)
else:
provider.status = ProviderStatus.UNHEALTHY
results.append(
{
"name": provider.name,
"type": provider.type,
"healthy": is_healthy,
"status": provider.status.value,
}
)
return {
"checked_at": asyncio.get_event_loop().time(),
"providers": results,
"healthy_count": sum(1 for r in results if r["healthy"]),
}
@router.post("/reload")
async def reload_config(
cascade: Annotated[CascadeRouter, Depends(get_cascade_router)],
) -> dict[str, Any]:
"""Hot-reload providers.yaml without restart.
Preserves circuit breaker state and metrics for existing providers.
"""
try:
result = cascade.reload_config()
return {"status": "ok", **result}
except Exception as exc:
logger.error("Config reload failed: %s", exc)
raise HTTPException(status_code=500, detail=f"Reload failed: {exc}") from exc
@router.get("/config")
async def get_config(
cascade: Annotated[CascadeRouter, Depends(get_cascade_router)],
) -> dict[str, Any]:
"""Get router configuration (without secrets)."""
cfg = cascade.config
return {
"timeout_seconds": cfg.timeout_seconds,
"max_retries_per_provider": cfg.max_retries_per_provider,
"retry_delay_seconds": cfg.retry_delay_seconds,
"circuit_breaker": {
"failure_threshold": cfg.circuit_breaker_failure_threshold,
"recovery_timeout": cfg.circuit_breaker_recovery_timeout,
"half_open_max_calls": cfg.circuit_breaker_half_open_max_calls,
},
"providers": [
{
"name": p.name,
"type": p.type,
"priority": p.priority,
"enabled": p.enabled,
}
for p in cascade.providers
],
}