feat: Add Claude API fallback tier to cascade.py
Adds a parameter to the method in . When is set to , the router will only use the provider. This allows for a fallback to a more powerful model for high-stakes tasks, while still using local models for the majority of requests. Fixes #980
This commit is contained in:
@@ -22,6 +22,7 @@ providers:
|
|||||||
type: ollama
|
type: ollama
|
||||||
enabled: true
|
enabled: true
|
||||||
priority: 1
|
priority: 1
|
||||||
|
tier: local
|
||||||
url: "http://localhost:11434"
|
url: "http://localhost:11434"
|
||||||
models:
|
models:
|
||||||
# Text + Tools models
|
# Text + Tools models
|
||||||
@@ -97,6 +98,7 @@ providers:
|
|||||||
type: vllm_mlx
|
type: vllm_mlx
|
||||||
enabled: false # Enable when vllm-mlx server is running
|
enabled: false # Enable when vllm-mlx server is running
|
||||||
priority: 2
|
priority: 2
|
||||||
|
tier: local
|
||||||
base_url: "http://localhost:8000/v1"
|
base_url: "http://localhost:8000/v1"
|
||||||
models:
|
models:
|
||||||
- name: Qwen/Qwen2.5-14B-Instruct-MLX
|
- name: Qwen/Qwen2.5-14B-Instruct-MLX
|
||||||
@@ -112,6 +114,7 @@ providers:
|
|||||||
type: openai
|
type: openai
|
||||||
enabled: false # Enable by setting OPENAI_API_KEY
|
enabled: false # Enable by setting OPENAI_API_KEY
|
||||||
priority: 3
|
priority: 3
|
||||||
|
tier: standard_cloud
|
||||||
api_key: "${OPENAI_API_KEY}" # Loaded from environment
|
api_key: "${OPENAI_API_KEY}" # Loaded from environment
|
||||||
base_url: null # Use default OpenAI endpoint
|
base_url: null # Use default OpenAI endpoint
|
||||||
models:
|
models:
|
||||||
@@ -128,6 +131,7 @@ providers:
|
|||||||
type: anthropic
|
type: anthropic
|
||||||
enabled: false # Enable by setting ANTHROPIC_API_KEY
|
enabled: false # Enable by setting ANTHROPIC_API_KEY
|
||||||
priority: 4
|
priority: 4
|
||||||
|
tier: frontier
|
||||||
api_key: "${ANTHROPIC_API_KEY}"
|
api_key: "${ANTHROPIC_API_KEY}"
|
||||||
models:
|
models:
|
||||||
- name: claude-3-haiku-20240307
|
- name: claude-3-haiku-20240307
|
||||||
|
|||||||
30
poetry.lock
generated
30
poetry.lock
generated
@@ -419,6 +419,34 @@ files = [
|
|||||||
{file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
|
{file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anthropic"
|
||||||
|
version = "0.86.0"
|
||||||
|
description = "The official Python library for the anthropic API"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.9"
|
||||||
|
groups = ["main"]
|
||||||
|
files = [
|
||||||
|
{file = "anthropic-0.86.0-py3-none-any.whl", hash = "sha256:9d2bbd339446acce98858c5627d33056efe01f70435b22b63546fe7edae0cd57"},
|
||||||
|
{file = "anthropic-0.86.0.tar.gz", hash = "sha256:60023a7e879aa4fbb1fed99d487fe407b2ebf6569603e5047cfe304cebdaa0e5"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
anyio = ">=3.5.0,<5"
|
||||||
|
distro = ">=1.7.0,<2"
|
||||||
|
docstring-parser = ">=0.15,<1"
|
||||||
|
httpx = ">=0.25.0,<1"
|
||||||
|
jiter = ">=0.4.0,<1"
|
||||||
|
pydantic = ">=1.9.0,<3"
|
||||||
|
sniffio = "*"
|
||||||
|
typing-extensions = ">=4.14,<5"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.9)"]
|
||||||
|
bedrock = ["boto3 (>=1.28.57)", "botocore (>=1.31.57)"]
|
||||||
|
mcp = ["mcp (>=1.0) ; python_version >= \"3.10\""]
|
||||||
|
vertex = ["google-auth[requests] (>=2,<3)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anyio"
|
name = "anyio"
|
||||||
version = "4.12.1"
|
version = "4.12.1"
|
||||||
@@ -9672,4 +9700,4 @@ voice = ["openai-whisper", "piper-tts", "pyttsx3", "sounddevice"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = ">=3.11,<4"
|
python-versions = ">=3.11,<4"
|
||||||
content-hash = "008bc91ad0301d57d26339ec74ba1a09fb717a36447282fd2885682270b7b8df"
|
content-hash = "cc50755f322b8755e85ab7bdf0668609612d885552aba14caf175326eedfa216"
|
||||||
|
|||||||
@@ -59,6 +59,7 @@ pytest-timeout = { version = ">=2.3.0", optional = true }
|
|||||||
selenium = { version = ">=4.20.0", optional = true }
|
selenium = { version = ">=4.20.0", optional = true }
|
||||||
pytest-randomly = { version = ">=3.16.0", optional = true }
|
pytest-randomly = { version = ">=3.16.0", optional = true }
|
||||||
pytest-xdist = { version = ">=3.5.0", optional = true }
|
pytest-xdist = { version = ">=3.5.0", optional = true }
|
||||||
|
anthropic = "^0.86.0"
|
||||||
|
|
||||||
[tool.poetry.extras]
|
[tool.poetry.extras]
|
||||||
telegram = ["python-telegram-bot"]
|
telegram = ["python-telegram-bot"]
|
||||||
|
|||||||
@@ -114,6 +114,7 @@ class Provider:
|
|||||||
type: str # ollama, openai, anthropic
|
type: str # ollama, openai, anthropic
|
||||||
enabled: bool
|
enabled: bool
|
||||||
priority: int
|
priority: int
|
||||||
|
tier: str | None = None # e.g., "local", "standard_cloud", "frontier"
|
||||||
url: str | None = None
|
url: str | None = None
|
||||||
api_key: str | None = None
|
api_key: str | None = None
|
||||||
base_url: str | None = None
|
base_url: str | None = None
|
||||||
@@ -267,6 +268,7 @@ class CascadeRouter:
|
|||||||
type=p_data["type"],
|
type=p_data["type"],
|
||||||
enabled=p_data.get("enabled", True),
|
enabled=p_data.get("enabled", True),
|
||||||
priority=p_data.get("priority", 99),
|
priority=p_data.get("priority", 99),
|
||||||
|
tier=p_data.get("tier"),
|
||||||
url=p_data.get("url"),
|
url=p_data.get("url"),
|
||||||
api_key=p_data.get("api_key"),
|
api_key=p_data.get("api_key"),
|
||||||
base_url=p_data.get("base_url"),
|
base_url=p_data.get("base_url"),
|
||||||
@@ -532,6 +534,7 @@ class CascadeRouter:
|
|||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
temperature: float = 0.7,
|
temperature: float = 0.7,
|
||||||
max_tokens: int | None = None,
|
max_tokens: int | None = None,
|
||||||
|
cascade_tier: str | None = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Complete a chat conversation with automatic failover.
|
"""Complete a chat conversation with automatic failover.
|
||||||
|
|
||||||
@@ -545,6 +548,8 @@ class CascadeRouter:
|
|||||||
model: Preferred model (tries this first, then provider defaults)
|
model: Preferred model (tries this first, then provider defaults)
|
||||||
temperature: Sampling temperature
|
temperature: Sampling temperature
|
||||||
max_tokens: Maximum tokens to generate
|
max_tokens: Maximum tokens to generate
|
||||||
|
cascade_tier: If specified, filters providers by this tier.
|
||||||
|
- "frontier_required": Uses only Anthropic provider for top-tier models.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict with content, provider_used, and metrics
|
Dict with content, provider_used, and metrics
|
||||||
@@ -558,7 +563,18 @@ class CascadeRouter:
|
|||||||
|
|
||||||
errors = []
|
errors = []
|
||||||
|
|
||||||
for provider in self.providers:
|
providers = self.providers
|
||||||
|
if cascade_tier == "frontier_required":
|
||||||
|
providers = [p for p in self.providers if p.type == "anthropic"]
|
||||||
|
if not providers:
|
||||||
|
raise RuntimeError("No Anthropic provider configured for 'frontier_required' tier.")
|
||||||
|
elif cascade_tier:
|
||||||
|
providers = [p for p in self.providers if p.tier == cascade_tier]
|
||||||
|
if not providers:
|
||||||
|
raise RuntimeError(f"No providers found for tier: {cascade_tier}")
|
||||||
|
|
||||||
|
|
||||||
|
for provider in providers:
|
||||||
if not self._is_provider_available(provider):
|
if not self._is_provider_available(provider):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user