"""Config loading helpers for the Cascade LLM Router. Parses providers.yaml, expands env vars, and checks provider availability. """ from __future__ import annotations import logging from infrastructure.router.models import Provider, RouterConfig logger = logging.getLogger(__name__) try: import yaml except ImportError: yaml = None # type: ignore try: import requests except ImportError: requests = None # type: ignore def expand_env_vars(content: str) -> str: """Expand ${VAR} syntax in YAML content. Uses os.environ directly (not settings) because this is a generic YAML config loader that must expand arbitrary variable references. """ import os import re def replace_var(match: "re.Match[str]") -> str: var_name = match.group(1) return os.environ.get(var_name, match.group(0)) return re.sub(r"\$\{(\w+)\}", replace_var, content) def parse_router_config(data: dict) -> RouterConfig: """Build a RouterConfig from parsed YAML data.""" cascade = data.get("cascade", {}) cb = cascade.get("circuit_breaker", {}) multimodal = data.get("multimodal", {}) return RouterConfig( timeout_seconds=cascade.get("timeout_seconds", 30), max_retries_per_provider=cascade.get("max_retries_per_provider", 2), retry_delay_seconds=cascade.get("retry_delay_seconds", 1), circuit_breaker_failure_threshold=cb.get("failure_threshold", 5), circuit_breaker_recovery_timeout=cb.get("recovery_timeout", 60), circuit_breaker_half_open_max_calls=cb.get("half_open_max_calls", 2), auto_pull_models=multimodal.get("auto_pull", True), fallback_chains=data.get("fallback_chains", {}), ) def load_providers(data: dict) -> list[Provider]: """Load and filter providers from parsed YAML data (unsorted).""" providers: list[Provider] = [] for p_data in data.get("providers", []): if not p_data.get("enabled", False): continue provider = Provider( name=p_data["name"], type=p_data["type"], enabled=p_data.get("enabled", True), priority=p_data.get("priority", 99), tier=p_data.get("tier"), url=p_data.get("url"), api_key=p_data.get("api_key"), base_url=p_data.get("base_url"), models=p_data.get("models", []), ) if check_provider_available(provider): providers.append(provider) else: logger.warning("Provider %s not available, skipping", provider.name) return providers def check_provider_available(provider: Provider) -> bool: """Check if a provider is actually available.""" from config import settings if provider.type == "ollama": # Check if Ollama is running if requests is None: # Can't check without requests, assume available return True try: url = provider.url or settings.ollama_url response = requests.get(f"{url}/api/tags", timeout=5) return response.status_code == 200 except Exception as exc: logger.debug("Ollama provider check error: %s", exc) return False elif provider.type == "vllm_mlx": # Check if local vllm-mlx server is running (OpenAI-compatible) if requests is None: return True try: base_url = provider.base_url or provider.url or "http://localhost:8000" # Strip /v1 suffix — health endpoint is at the root server_root = base_url.rstrip("/") if server_root.endswith("/v1"): server_root = server_root[:-3] response = requests.get(f"{server_root}/health", timeout=5) return response.status_code == 200 except Exception as exc: logger.debug("vllm-mlx provider check error: %s", exc) return False elif provider.type in ("openai", "anthropic", "grok"): # Check if API key is set return provider.api_key is not None and provider.api_key != "" return True