feat: Add Claude API fallback tier to cascade.py
Some checks failed
Tests / lint (pull_request) Failing after 15s
Tests / test (pull_request) Has been skipped

Adds a  parameter to the  method in .
When  is set to , the router will only use
the  provider.

This allows for a fallback to a more powerful model for high-stakes tasks,
while still using local models for the majority of requests.

Fixes #980
This commit is contained in:
Alexander Whitestone
2026-03-23 14:19:23 -04:00
parent 32a5b092d0
commit 37c8ec7eca
4 changed files with 51 additions and 2 deletions

View File

@@ -22,6 +22,7 @@ providers:
type: ollama type: ollama
enabled: true enabled: true
priority: 1 priority: 1
tier: local
url: "http://localhost:11434" url: "http://localhost:11434"
models: models:
# Text + Tools models # Text + Tools models
@@ -97,6 +98,7 @@ providers:
type: vllm_mlx type: vllm_mlx
enabled: false # Enable when vllm-mlx server is running enabled: false # Enable when vllm-mlx server is running
priority: 2 priority: 2
tier: local
base_url: "http://localhost:8000/v1" base_url: "http://localhost:8000/v1"
models: models:
- name: Qwen/Qwen2.5-14B-Instruct-MLX - name: Qwen/Qwen2.5-14B-Instruct-MLX
@@ -112,6 +114,7 @@ providers:
type: openai type: openai
enabled: false # Enable by setting OPENAI_API_KEY enabled: false # Enable by setting OPENAI_API_KEY
priority: 3 priority: 3
tier: standard_cloud
api_key: "${OPENAI_API_KEY}" # Loaded from environment api_key: "${OPENAI_API_KEY}" # Loaded from environment
base_url: null # Use default OpenAI endpoint base_url: null # Use default OpenAI endpoint
models: models:
@@ -128,6 +131,7 @@ providers:
type: anthropic type: anthropic
enabled: false # Enable by setting ANTHROPIC_API_KEY enabled: false # Enable by setting ANTHROPIC_API_KEY
priority: 4 priority: 4
tier: frontier
api_key: "${ANTHROPIC_API_KEY}" api_key: "${ANTHROPIC_API_KEY}"
models: models:
- name: claude-3-haiku-20240307 - name: claude-3-haiku-20240307

30
poetry.lock generated
View File

@@ -419,6 +419,34 @@ files = [
{file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
] ]
[[package]]
name = "anthropic"
version = "0.86.0"
description = "The official Python library for the anthropic API"
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "anthropic-0.86.0-py3-none-any.whl", hash = "sha256:9d2bbd339446acce98858c5627d33056efe01f70435b22b63546fe7edae0cd57"},
{file = "anthropic-0.86.0.tar.gz", hash = "sha256:60023a7e879aa4fbb1fed99d487fe407b2ebf6569603e5047cfe304cebdaa0e5"},
]
[package.dependencies]
anyio = ">=3.5.0,<5"
distro = ">=1.7.0,<2"
docstring-parser = ">=0.15,<1"
httpx = ">=0.25.0,<1"
jiter = ">=0.4.0,<1"
pydantic = ">=1.9.0,<3"
sniffio = "*"
typing-extensions = ">=4.14,<5"
[package.extras]
aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.9)"]
bedrock = ["boto3 (>=1.28.57)", "botocore (>=1.31.57)"]
mcp = ["mcp (>=1.0) ; python_version >= \"3.10\""]
vertex = ["google-auth[requests] (>=2,<3)"]
[[package]] [[package]]
name = "anyio" name = "anyio"
version = "4.12.1" version = "4.12.1"
@@ -9672,4 +9700,4 @@ voice = ["openai-whisper", "piper-tts", "pyttsx3", "sounddevice"]
[metadata] [metadata]
lock-version = "2.1" lock-version = "2.1"
python-versions = ">=3.11,<4" python-versions = ">=3.11,<4"
content-hash = "008bc91ad0301d57d26339ec74ba1a09fb717a36447282fd2885682270b7b8df" content-hash = "cc50755f322b8755e85ab7bdf0668609612d885552aba14caf175326eedfa216"

View File

@@ -59,6 +59,7 @@ pytest-timeout = { version = ">=2.3.0", optional = true }
selenium = { version = ">=4.20.0", optional = true } selenium = { version = ">=4.20.0", optional = true }
pytest-randomly = { version = ">=3.16.0", optional = true } pytest-randomly = { version = ">=3.16.0", optional = true }
pytest-xdist = { version = ">=3.5.0", optional = true } pytest-xdist = { version = ">=3.5.0", optional = true }
anthropic = "^0.86.0"
[tool.poetry.extras] [tool.poetry.extras]
telegram = ["python-telegram-bot"] telegram = ["python-telegram-bot"]

View File

@@ -114,6 +114,7 @@ class Provider:
type: str # ollama, openai, anthropic type: str # ollama, openai, anthropic
enabled: bool enabled: bool
priority: int priority: int
tier: str | None = None # e.g., "local", "standard_cloud", "frontier"
url: str | None = None url: str | None = None
api_key: str | None = None api_key: str | None = None
base_url: str | None = None base_url: str | None = None
@@ -267,6 +268,7 @@ class CascadeRouter:
type=p_data["type"], type=p_data["type"],
enabled=p_data.get("enabled", True), enabled=p_data.get("enabled", True),
priority=p_data.get("priority", 99), priority=p_data.get("priority", 99),
tier=p_data.get("tier"),
url=p_data.get("url"), url=p_data.get("url"),
api_key=p_data.get("api_key"), api_key=p_data.get("api_key"),
base_url=p_data.get("base_url"), base_url=p_data.get("base_url"),
@@ -532,6 +534,7 @@ class CascadeRouter:
model: str | None = None, model: str | None = None,
temperature: float = 0.7, temperature: float = 0.7,
max_tokens: int | None = None, max_tokens: int | None = None,
cascade_tier: str | None = None,
) -> dict: ) -> dict:
"""Complete a chat conversation with automatic failover. """Complete a chat conversation with automatic failover.
@@ -545,6 +548,8 @@ class CascadeRouter:
model: Preferred model (tries this first, then provider defaults) model: Preferred model (tries this first, then provider defaults)
temperature: Sampling temperature temperature: Sampling temperature
max_tokens: Maximum tokens to generate max_tokens: Maximum tokens to generate
cascade_tier: If specified, filters providers by this tier.
- "frontier_required": Uses only Anthropic provider for top-tier models.
Returns: Returns:
Dict with content, provider_used, and metrics Dict with content, provider_used, and metrics
@@ -558,7 +563,18 @@ class CascadeRouter:
errors = [] errors = []
for provider in self.providers: providers = self.providers
if cascade_tier == "frontier_required":
providers = [p for p in self.providers if p.type == "anthropic"]
if not providers:
raise RuntimeError("No Anthropic provider configured for 'frontier_required' tier.")
elif cascade_tier:
providers = [p for p in self.providers if p.tier == cascade_tier]
if not providers:
raise RuntimeError(f"No providers found for tier: {cascade_tier}")
for provider in providers:
if not self._is_provider_available(provider): if not self._is_provider_available(provider):
continue continue