From 37c8ec7ecaec5a7cb0d6ea5d3ac2c867cb48c267 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Mon, 23 Mar 2026 14:19:23 -0400 Subject: [PATCH] feat: Add Claude API fallback tier to cascade.py Adds a parameter to the method in . When is set to , the router will only use the provider. This allows for a fallback to a more powerful model for high-stakes tasks, while still using local models for the majority of requests. Fixes #980 --- config/providers.yaml | 4 ++++ poetry.lock | 30 +++++++++++++++++++++++++++- pyproject.toml | 1 + src/infrastructure/router/cascade.py | 18 ++++++++++++++++- 4 files changed, 51 insertions(+), 2 deletions(-) diff --git a/config/providers.yaml b/config/providers.yaml index b2f3f5c5..722952b7 100644 --- a/config/providers.yaml +++ b/config/providers.yaml @@ -22,6 +22,7 @@ providers: type: ollama enabled: true priority: 1 + tier: local url: "http://localhost:11434" models: # Text + Tools models @@ -97,6 +98,7 @@ providers: type: vllm_mlx enabled: false # Enable when vllm-mlx server is running priority: 2 + tier: local base_url: "http://localhost:8000/v1" models: - name: Qwen/Qwen2.5-14B-Instruct-MLX @@ -112,6 +114,7 @@ providers: type: openai enabled: false # Enable by setting OPENAI_API_KEY priority: 3 + tier: standard_cloud api_key: "${OPENAI_API_KEY}" # Loaded from environment base_url: null # Use default OpenAI endpoint models: @@ -128,6 +131,7 @@ providers: type: anthropic enabled: false # Enable by setting ANTHROPIC_API_KEY priority: 4 + tier: frontier api_key: "${ANTHROPIC_API_KEY}" models: - name: claude-3-haiku-20240307 diff --git a/poetry.lock b/poetry.lock index b3a964e6..f6dfc263 100644 --- a/poetry.lock +++ b/poetry.lock @@ -419,6 +419,34 @@ files = [ {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, ] +[[package]] +name = "anthropic" +version = "0.86.0" +description = "The official Python library for the anthropic API" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "anthropic-0.86.0-py3-none-any.whl", hash = "sha256:9d2bbd339446acce98858c5627d33056efe01f70435b22b63546fe7edae0cd57"}, + {file = "anthropic-0.86.0.tar.gz", hash = "sha256:60023a7e879aa4fbb1fed99d487fe407b2ebf6569603e5047cfe304cebdaa0e5"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<5" +distro = ">=1.7.0,<2" +docstring-parser = ">=0.15,<1" +httpx = ">=0.25.0,<1" +jiter = ">=0.4.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +typing-extensions = ">=4.14,<5" + +[package.extras] +aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.9)"] +bedrock = ["boto3 (>=1.28.57)", "botocore (>=1.31.57)"] +mcp = ["mcp (>=1.0) ; python_version >= \"3.10\""] +vertex = ["google-auth[requests] (>=2,<3)"] + [[package]] name = "anyio" version = "4.12.1" @@ -9672,4 +9700,4 @@ voice = ["openai-whisper", "piper-tts", "pyttsx3", "sounddevice"] [metadata] lock-version = "2.1" python-versions = ">=3.11,<4" -content-hash = "008bc91ad0301d57d26339ec74ba1a09fb717a36447282fd2885682270b7b8df" +content-hash = "cc50755f322b8755e85ab7bdf0668609612d885552aba14caf175326eedfa216" diff --git a/pyproject.toml b/pyproject.toml index cc69e855..d617c350 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ pytest-timeout = { version = ">=2.3.0", optional = true } selenium = { version = ">=4.20.0", optional = true } pytest-randomly = { version = ">=3.16.0", optional = true } pytest-xdist = { version = ">=3.5.0", optional = true } +anthropic = "^0.86.0" [tool.poetry.extras] telegram = ["python-telegram-bot"] diff --git a/src/infrastructure/router/cascade.py b/src/infrastructure/router/cascade.py index 4f7510ae..84f07e90 100644 --- a/src/infrastructure/router/cascade.py +++ b/src/infrastructure/router/cascade.py @@ -114,6 +114,7 @@ class Provider: type: str # ollama, openai, anthropic enabled: bool priority: int + tier: str | None = None # e.g., "local", "standard_cloud", "frontier" url: str | None = None api_key: str | None = None base_url: str | None = None @@ -267,6 +268,7 @@ class CascadeRouter: type=p_data["type"], enabled=p_data.get("enabled", True), priority=p_data.get("priority", 99), + tier=p_data.get("tier"), url=p_data.get("url"), api_key=p_data.get("api_key"), base_url=p_data.get("base_url"), @@ -532,6 +534,7 @@ class CascadeRouter: model: str | None = None, temperature: float = 0.7, max_tokens: int | None = None, + cascade_tier: str | None = None, ) -> dict: """Complete a chat conversation with automatic failover. @@ -545,6 +548,8 @@ class CascadeRouter: model: Preferred model (tries this first, then provider defaults) temperature: Sampling temperature max_tokens: Maximum tokens to generate + cascade_tier: If specified, filters providers by this tier. + - "frontier_required": Uses only Anthropic provider for top-tier models. Returns: Dict with content, provider_used, and metrics @@ -558,7 +563,18 @@ class CascadeRouter: errors = [] - for provider in self.providers: + providers = self.providers + if cascade_tier == "frontier_required": + providers = [p for p in self.providers if p.type == "anthropic"] + if not providers: + raise RuntimeError("No Anthropic provider configured for 'frontier_required' tier.") + elif cascade_tier: + providers = [p for p in self.providers if p.tier == cascade_tier] + if not providers: + raise RuntimeError(f"No providers found for tier: {cascade_tier}") + + + for provider in providers: if not self._is_provider_available(provider): continue