forked from Rockachopa/Timmy-time-dashboard
Automated salvage commit — agent session ended (exit 124). Work in progress, may need continuation.
124 lines
4.0 KiB
Python
124 lines
4.0 KiB
Python
"""Config loading helpers for the Cascade LLM Router.
|
|
|
|
Parses providers.yaml, expands env vars, and checks provider availability.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
|
|
from infrastructure.router.models import Provider, RouterConfig
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
import yaml
|
|
except ImportError:
|
|
yaml = None # type: ignore
|
|
|
|
try:
|
|
import requests
|
|
except ImportError:
|
|
requests = None # type: ignore
|
|
|
|
|
|
def expand_env_vars(content: str) -> str:
|
|
"""Expand ${VAR} syntax in YAML content.
|
|
|
|
Uses os.environ directly (not settings) because this is a generic
|
|
YAML config loader that must expand arbitrary variable references.
|
|
"""
|
|
import os
|
|
import re
|
|
|
|
def replace_var(match: "re.Match[str]") -> str:
|
|
var_name = match.group(1)
|
|
return os.environ.get(var_name, match.group(0))
|
|
|
|
return re.sub(r"\$\{(\w+)\}", replace_var, content)
|
|
|
|
|
|
def parse_router_config(data: dict) -> RouterConfig:
|
|
"""Build a RouterConfig from parsed YAML data."""
|
|
cascade = data.get("cascade", {})
|
|
cb = cascade.get("circuit_breaker", {})
|
|
multimodal = data.get("multimodal", {})
|
|
|
|
return RouterConfig(
|
|
timeout_seconds=cascade.get("timeout_seconds", 30),
|
|
max_retries_per_provider=cascade.get("max_retries_per_provider", 2),
|
|
retry_delay_seconds=cascade.get("retry_delay_seconds", 1),
|
|
circuit_breaker_failure_threshold=cb.get("failure_threshold", 5),
|
|
circuit_breaker_recovery_timeout=cb.get("recovery_timeout", 60),
|
|
circuit_breaker_half_open_max_calls=cb.get("half_open_max_calls", 2),
|
|
auto_pull_models=multimodal.get("auto_pull", True),
|
|
fallback_chains=data.get("fallback_chains", {}),
|
|
)
|
|
|
|
|
|
def load_providers(data: dict) -> list[Provider]:
|
|
"""Load and filter providers from parsed YAML data (unsorted)."""
|
|
providers: list[Provider] = []
|
|
for p_data in data.get("providers", []):
|
|
if not p_data.get("enabled", False):
|
|
continue
|
|
|
|
provider = Provider(
|
|
name=p_data["name"],
|
|
type=p_data["type"],
|
|
enabled=p_data.get("enabled", True),
|
|
priority=p_data.get("priority", 99),
|
|
tier=p_data.get("tier"),
|
|
url=p_data.get("url"),
|
|
api_key=p_data.get("api_key"),
|
|
base_url=p_data.get("base_url"),
|
|
models=p_data.get("models", []),
|
|
)
|
|
|
|
if check_provider_available(provider):
|
|
providers.append(provider)
|
|
else:
|
|
logger.warning("Provider %s not available, skipping", provider.name)
|
|
|
|
return providers
|
|
|
|
|
|
def check_provider_available(provider: Provider) -> bool:
|
|
"""Check if a provider is actually available."""
|
|
from config import settings
|
|
|
|
if provider.type == "ollama":
|
|
# Check if Ollama is running
|
|
if requests is None:
|
|
# Can't check without requests, assume available
|
|
return True
|
|
try:
|
|
url = provider.url or settings.ollama_url
|
|
response = requests.get(f"{url}/api/tags", timeout=5)
|
|
return response.status_code == 200
|
|
except Exception as exc:
|
|
logger.debug("Ollama provider check error: %s", exc)
|
|
return False
|
|
|
|
elif provider.type == "vllm_mlx":
|
|
# Check if local vllm-mlx server is running (OpenAI-compatible)
|
|
if requests is None:
|
|
return True
|
|
try:
|
|
base_url = provider.base_url or provider.url or "http://localhost:8000"
|
|
# Strip /v1 suffix — health endpoint is at the root
|
|
server_root = base_url.rstrip("/")
|
|
if server_root.endswith("/v1"):
|
|
server_root = server_root[:-3]
|
|
response = requests.get(f"{server_root}/health", timeout=5)
|
|
return response.status_code == 200
|
|
except Exception as exc:
|
|
logger.debug("vllm-mlx provider check error: %s", exc)
|
|
return False
|
|
|
|
elif provider.type in ("openai", "anthropic", "grok"):
|
|
# Check if API key is set
|
|
return provider.api_key is not None and provider.api_key != ""
|
|
|
|
return True
|