1
0

feat: code quality audit + autoresearch integration + infra hardening (#150)

This commit is contained in:
Alexander Whitestone
2026-03-08 12:50:44 -04:00
committed by GitHub
parent fd0ede0d51
commit ae3bb1cc21
186 changed files with 5129 additions and 3289 deletions

View File

@@ -33,6 +33,7 @@ logger = logging.getLogger(__name__)
class ProviderStatus(Enum):
"""Health status of a provider."""
HEALTHY = "healthy"
DEGRADED = "degraded" # Working but slow or occasional errors
UNHEALTHY = "unhealthy" # Circuit breaker open
@@ -41,22 +42,25 @@ class ProviderStatus(Enum):
class CircuitState(Enum):
"""Circuit breaker state."""
CLOSED = "closed" # Normal operation
OPEN = "open" # Failing, rejecting requests
CLOSED = "closed" # Normal operation
OPEN = "open" # Failing, rejecting requests
HALF_OPEN = "half_open" # Testing if recovered
class ContentType(Enum):
"""Type of content in the request."""
TEXT = "text"
VISION = "vision" # Contains images
AUDIO = "audio" # Contains audio
VISION = "vision" # Contains images
AUDIO = "audio" # Contains audio
MULTIMODAL = "multimodal" # Multiple content types
@dataclass
class ProviderMetrics:
"""Metrics for a single provider."""
total_requests: int = 0
successful_requests: int = 0
failed_requests: int = 0
@@ -64,13 +68,13 @@ class ProviderMetrics:
last_request_time: Optional[str] = None
last_error_time: Optional[str] = None
consecutive_failures: int = 0
@property
def avg_latency_ms(self) -> float:
if self.total_requests == 0:
return 0.0
return self.total_latency_ms / self.total_requests
@property
def error_rate(self) -> float:
if self.total_requests == 0:
@@ -81,6 +85,7 @@ class ProviderMetrics:
@dataclass
class ModelCapability:
"""Capabilities a model supports."""
name: str
supports_vision: bool = False
supports_audio: bool = False
@@ -93,6 +98,7 @@ class ModelCapability:
@dataclass
class Provider:
"""LLM provider configuration and state."""
name: str
type: str # ollama, openai, anthropic, airllm
enabled: bool
@@ -101,14 +107,14 @@ class Provider:
api_key: Optional[str] = None
base_url: Optional[str] = None
models: list[dict] = field(default_factory=list)
# Runtime state
status: ProviderStatus = ProviderStatus.HEALTHY
metrics: ProviderMetrics = field(default_factory=ProviderMetrics)
circuit_state: CircuitState = CircuitState.CLOSED
circuit_opened_at: Optional[float] = None
half_open_calls: int = 0
def get_default_model(self) -> Optional[str]:
"""Get the default model for this provider."""
for model in self.models:
@@ -117,7 +123,7 @@ class Provider:
if self.models:
return self.models[0]["name"]
return None
def get_model_with_capability(self, capability: str) -> Optional[str]:
"""Get a model that supports the given capability."""
for model in self.models:
@@ -126,7 +132,7 @@ class Provider:
return model["name"]
# Fall back to default
return self.get_default_model()
def model_has_capability(self, model_name: str, capability: str) -> bool:
"""Check if a specific model has a capability."""
for model in self.models:
@@ -139,6 +145,7 @@ class Provider:
@dataclass
class RouterConfig:
"""Cascade router configuration."""
timeout_seconds: int = 30
max_retries_per_provider: int = 2
retry_delay_seconds: int = 1
@@ -154,22 +161,22 @@ class RouterConfig:
class CascadeRouter:
"""Routes LLM requests with automatic failover.
Now with multi-modal support:
- Automatically detects content type (text, vision, audio)
- Selects appropriate models based on capabilities
- Falls back through capability-specific model chains
- Supports image URLs and base64 encoding
Usage:
router = CascadeRouter()
# Text request
response = await router.complete(
messages=[{"role": "user", "content": "Hello"}],
model="llama3.2"
)
# Vision request (automatically detects and selects vision model)
response = await router.complete(
messages=[{
@@ -179,68 +186,75 @@ class CascadeRouter:
}],
model="llava:7b"
)
# Check metrics
metrics = router.get_metrics()
"""
def __init__(self, config_path: Optional[Path] = None) -> None:
self.config_path = config_path or Path("config/providers.yaml")
self.providers: list[Provider] = []
self.config: RouterConfig = RouterConfig()
self._load_config()
# Initialize multi-modal manager if available
self._mm_manager: Optional[Any] = None
try:
from infrastructure.models.multimodal import get_multimodal_manager
self._mm_manager = get_multimodal_manager()
except Exception as exc:
logger.debug("Multi-modal manager not available: %s", exc)
logger.info("CascadeRouter initialized with %d providers", len(self.providers))
def _load_config(self) -> None:
"""Load configuration from YAML."""
if not self.config_path.exists():
logger.warning("Config not found: %s, using defaults", self.config_path)
return
try:
if yaml is None:
raise RuntimeError("PyYAML not installed")
content = self.config_path.read_text()
# Expand environment variables
content = self._expand_env_vars(content)
data = yaml.safe_load(content)
# Load cascade settings
cascade = data.get("cascade", {})
# Load fallback chains
fallback_chains = data.get("fallback_chains", {})
# Load multi-modal settings
multimodal = data.get("multimodal", {})
self.config = RouterConfig(
timeout_seconds=cascade.get("timeout_seconds", 30),
max_retries_per_provider=cascade.get("max_retries_per_provider", 2),
retry_delay_seconds=cascade.get("retry_delay_seconds", 1),
circuit_breaker_failure_threshold=cascade.get("circuit_breaker", {}).get("failure_threshold", 5),
circuit_breaker_recovery_timeout=cascade.get("circuit_breaker", {}).get("recovery_timeout", 60),
circuit_breaker_half_open_max_calls=cascade.get("circuit_breaker", {}).get("half_open_max_calls", 2),
circuit_breaker_failure_threshold=cascade.get("circuit_breaker", {}).get(
"failure_threshold", 5
),
circuit_breaker_recovery_timeout=cascade.get("circuit_breaker", {}).get(
"recovery_timeout", 60
),
circuit_breaker_half_open_max_calls=cascade.get("circuit_breaker", {}).get(
"half_open_max_calls", 2
),
auto_pull_models=multimodal.get("auto_pull", True),
fallback_chains=fallback_chains,
)
# Load providers
for p_data in data.get("providers", []):
# Skip disabled providers
if not p_data.get("enabled", False):
continue
provider = Provider(
name=p_data["name"],
type=p_data["type"],
@@ -251,30 +265,34 @@ class CascadeRouter:
base_url=p_data.get("base_url"),
models=p_data.get("models", []),
)
# Check if provider is actually available
if self._check_provider_available(provider):
self.providers.append(provider)
else:
logger.warning("Provider %s not available, skipping", provider.name)
# Sort by priority
self.providers.sort(key=lambda p: p.priority)
except Exception as exc:
logger.error("Failed to load config: %s", exc)
def _expand_env_vars(self, content: str) -> str:
"""Expand ${VAR} syntax in YAML content."""
"""Expand ${VAR} syntax in YAML content.
Uses os.environ directly (not settings) because this is a generic
YAML config loader that must expand arbitrary variable references.
"""
import os
import re
def replace_var(match):
def replace_var(match: "re.Match[str]") -> str:
var_name = match.group(1)
return os.environ.get(var_name, match.group(0))
return re.sub(r"\$\{(\w+)\}", replace_var, content)
def _check_provider_available(self, provider: Provider) -> bool:
"""Check if a provider is actually available."""
if provider.type == "ollama":
@@ -288,48 +306,49 @@ class CascadeRouter:
return response.status_code == 200
except Exception:
return False
elif provider.type == "airllm":
# Check if airllm is installed
try:
import airllm
return True
except ImportError:
return False
elif provider.type in ("openai", "anthropic", "grok"):
# Check if API key is set
return provider.api_key is not None and provider.api_key != ""
return True
def _detect_content_type(self, messages: list[dict]) -> ContentType:
"""Detect the type of content in the messages.
Checks for images, audio, etc. in the message content.
"""
has_image = False
has_audio = False
for msg in messages:
content = msg.get("content", "")
# Check for image URLs/paths
if msg.get("images"):
has_image = True
# Check for image URLs in content
if isinstance(content, str):
image_extensions = ('.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp')
image_extensions = (".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp")
if any(ext in content.lower() for ext in image_extensions):
has_image = True
if content.startswith("data:image/"):
has_image = True
# Check for audio
if msg.get("audio"):
has_audio = True
# Check for multimodal content structure
if isinstance(content, list):
for item in content:
@@ -338,7 +357,7 @@ class CascadeRouter:
has_image = True
elif item.get("type") == "audio":
has_audio = True
if has_image and has_audio:
return ContentType.MULTIMODAL
elif has_image:
@@ -346,12 +365,9 @@ class CascadeRouter:
elif has_audio:
return ContentType.AUDIO
return ContentType.TEXT
def _get_fallback_model(
self,
provider: Provider,
original_model: str,
content_type: ContentType
self, provider: Provider, original_model: str, content_type: ContentType
) -> Optional[str]:
"""Get a fallback model for the given content type."""
# Map content type to capability
@@ -360,24 +376,24 @@ class CascadeRouter:
ContentType.AUDIO: "audio",
ContentType.MULTIMODAL: "vision", # Vision models often do both
}
capability = capability_map.get(content_type)
if not capability:
return None
# Check provider's models for capability
fallback_model = provider.get_model_with_capability(capability)
if fallback_model and fallback_model != original_model:
return fallback_model
# Use fallback chains from config
fallback_chain = self.config.fallback_chains.get(capability, [])
for model_name in fallback_chain:
if provider.model_has_capability(model_name, capability):
return model_name
return None
async def complete(
self,
messages: list[dict],
@@ -386,21 +402,21 @@ class CascadeRouter:
max_tokens: Optional[int] = None,
) -> dict:
"""Complete a chat conversation with automatic failover.
Multi-modal support:
- Automatically detects if messages contain images
- Falls back to vision-capable models when needed
- Supports image URLs, paths, and base64 encoding
Args:
messages: List of message dicts with role and content
model: Preferred model (tries this first, then provider defaults)
temperature: Sampling temperature
max_tokens: Maximum tokens to generate
Returns:
Dict with content, provider_used, and metrics
Raises:
RuntimeError: If all providers fail
"""
@@ -408,15 +424,15 @@ class CascadeRouter:
content_type = self._detect_content_type(messages)
if content_type != ContentType.TEXT:
logger.debug("Detected %s content, selecting appropriate model", content_type.value)
errors = []
for provider in self.providers:
# Skip disabled providers
if not provider.enabled:
logger.debug("Skipping %s (disabled)", provider.name)
continue
# Skip unhealthy providers (circuit breaker)
if provider.status == ProviderStatus.UNHEALTHY:
# Check if circuit breaker can close
@@ -427,16 +443,16 @@ class CascadeRouter:
else:
logger.debug("Skipping %s (circuit open)", provider.name)
continue
# Determine which model to use
selected_model = model or provider.get_default_model()
is_fallback_model = False
# For non-text content, check if model supports it
if content_type != ContentType.TEXT and selected_model:
if provider.type == "ollama" and self._mm_manager:
from infrastructure.models.multimodal import ModelCapability
# Check if selected model supports the required capability
if content_type == ContentType.VISION:
supports = self._mm_manager.model_supports(
@@ -450,16 +466,17 @@ class CascadeRouter:
if fallback:
logger.info(
"Model %s doesn't support vision, falling back to %s",
selected_model, fallback
selected_model,
fallback,
)
selected_model = fallback
is_fallback_model = True
else:
logger.warning(
"No vision-capable model found on %s, trying anyway",
provider.name
provider.name,
)
# Try this provider
for attempt in range(self.config.max_retries_per_provider):
try:
@@ -471,34 +488,35 @@ class CascadeRouter:
max_tokens=max_tokens,
content_type=content_type,
)
# Success! Update metrics and return
self._record_success(provider, result.get("latency_ms", 0))
return {
"content": result["content"],
"provider": provider.name,
"model": result.get("model", selected_model or provider.get_default_model()),
"model": result.get(
"model", selected_model or provider.get_default_model()
),
"latency_ms": result.get("latency_ms", 0),
"is_fallback_model": is_fallback_model,
}
except Exception as exc:
error_msg = str(exc)
logger.warning(
"Provider %s attempt %d failed: %s",
provider.name, attempt + 1, error_msg
"Provider %s attempt %d failed: %s", provider.name, attempt + 1, error_msg
)
errors.append(f"{provider.name}: {error_msg}")
if attempt < self.config.max_retries_per_provider - 1:
await asyncio.sleep(self.config.retry_delay_seconds)
# All retries failed for this provider
self._record_failure(provider)
# All providers failed
raise RuntimeError(f"All providers failed: {'; '.join(errors)}")
async def _try_provider(
self,
provider: Provider,
@@ -510,7 +528,7 @@ class CascadeRouter:
) -> dict:
"""Try a single provider request."""
start_time = time.time()
if provider.type == "ollama":
result = await self._call_ollama(
provider=provider,
@@ -545,12 +563,12 @@ class CascadeRouter:
)
else:
raise ValueError(f"Unknown provider type: {provider.type}")
latency_ms = (time.time() - start_time) * 1000
result["latency_ms"] = latency_ms
return result
async def _call_ollama(
self,
provider: Provider,
@@ -561,12 +579,12 @@ class CascadeRouter:
) -> dict:
"""Call Ollama API with multi-modal support."""
import aiohttp
url = f"{provider.url}/api/chat"
# Transform messages for Ollama format (including images)
transformed_messages = self._transform_messages_for_ollama(messages)
payload = {
"model": model,
"messages": transformed_messages,
@@ -575,31 +593,31 @@ class CascadeRouter:
"temperature": temperature,
},
}
timeout = aiohttp.ClientTimeout(total=self.config.timeout_seconds)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.post(url, json=payload) as response:
if response.status != 200:
text = await response.text()
raise RuntimeError(f"Ollama error {response.status}: {text}")
data = await response.json()
return {
"content": data["message"]["content"],
"model": model,
}
def _transform_messages_for_ollama(self, messages: list[dict]) -> list[dict]:
"""Transform messages to Ollama format, handling images."""
transformed = []
for msg in messages:
new_msg = {
"role": msg.get("role", "user"),
"content": msg.get("content", ""),
}
# Handle images
images = msg.get("images", [])
if images:
@@ -620,11 +638,11 @@ class CascadeRouter:
new_msg["images"].append(img_data)
except Exception as exc:
logger.error("Failed to read image %s: %s", img, exc)
transformed.append(new_msg)
return transformed
async def _call_openai(
self,
provider: Provider,
@@ -635,13 +653,13 @@ class CascadeRouter:
) -> dict:
"""Call OpenAI API."""
import openai
client = openai.AsyncOpenAI(
api_key=provider.api_key,
base_url=provider.base_url,
timeout=self.config.timeout_seconds,
)
kwargs = {
"model": model,
"messages": messages,
@@ -649,14 +667,14 @@ class CascadeRouter:
}
if max_tokens:
kwargs["max_tokens"] = max_tokens
response = await client.chat.completions.create(**kwargs)
return {
"content": response.choices[0].message.content,
"model": response.model,
}
async def _call_anthropic(
self,
provider: Provider,
@@ -667,12 +685,12 @@ class CascadeRouter:
) -> dict:
"""Call Anthropic API."""
import anthropic
client = anthropic.AsyncAnthropic(
api_key=provider.api_key,
timeout=self.config.timeout_seconds,
)
# Convert messages to Anthropic format
system_msg = None
conversation = []
@@ -680,11 +698,13 @@ class CascadeRouter:
if msg["role"] == "system":
system_msg = msg["content"]
else:
conversation.append({
"role": msg["role"],
"content": msg["content"],
})
conversation.append(
{
"role": msg["role"],
"content": msg["content"],
}
)
kwargs = {
"model": model,
"messages": conversation,
@@ -693,9 +713,9 @@ class CascadeRouter:
}
if system_msg:
kwargs["system"] = system_msg
response = await client.messages.create(**kwargs)
return {
"content": response.content[0].text,
"model": response.model,
@@ -733,7 +753,7 @@ class CascadeRouter:
"content": response.choices[0].message.content,
"model": response.model,
}
def _record_success(self, provider: Provider, latency_ms: float) -> None:
"""Record a successful request."""
provider.metrics.total_requests += 1
@@ -741,50 +761,50 @@ class CascadeRouter:
provider.metrics.total_latency_ms += latency_ms
provider.metrics.last_request_time = datetime.now(timezone.utc).isoformat()
provider.metrics.consecutive_failures = 0
# Close circuit breaker if half-open
if provider.circuit_state == CircuitState.HALF_OPEN:
provider.half_open_calls += 1
if provider.half_open_calls >= self.config.circuit_breaker_half_open_max_calls:
self._close_circuit(provider)
# Update status based on error rate
if provider.metrics.error_rate < 0.1:
provider.status = ProviderStatus.HEALTHY
elif provider.metrics.error_rate < 0.3:
provider.status = ProviderStatus.DEGRADED
def _record_failure(self, provider: Provider) -> None:
"""Record a failed request."""
provider.metrics.total_requests += 1
provider.metrics.failed_requests += 1
provider.metrics.last_error_time = datetime.now(timezone.utc).isoformat()
provider.metrics.consecutive_failures += 1
# Check if we should open circuit breaker
if provider.metrics.consecutive_failures >= self.config.circuit_breaker_failure_threshold:
self._open_circuit(provider)
# Update status
if provider.metrics.error_rate > 0.3:
provider.status = ProviderStatus.DEGRADED
if provider.metrics.error_rate > 0.5:
provider.status = ProviderStatus.UNHEALTHY
def _open_circuit(self, provider: Provider) -> None:
"""Open the circuit breaker for a provider."""
provider.circuit_state = CircuitState.OPEN
provider.circuit_opened_at = time.time()
provider.status = ProviderStatus.UNHEALTHY
logger.warning("Circuit breaker OPEN for %s", provider.name)
def _can_close_circuit(self, provider: Provider) -> bool:
"""Check if circuit breaker can transition to half-open."""
if provider.circuit_opened_at is None:
return False
elapsed = time.time() - provider.circuit_opened_at
return elapsed >= self.config.circuit_breaker_recovery_timeout
def _close_circuit(self, provider: Provider) -> None:
"""Close the circuit breaker (provider healthy again)."""
provider.circuit_state = CircuitState.CLOSED
@@ -793,7 +813,7 @@ class CascadeRouter:
provider.metrics.consecutive_failures = 0
provider.status = ProviderStatus.HEALTHY
logger.info("Circuit breaker CLOSED for %s", provider.name)
def get_metrics(self) -> dict:
"""Get metrics for all providers."""
return {
@@ -814,16 +834,20 @@ class CascadeRouter:
for p in self.providers
]
}
def get_status(self) -> dict:
"""Get current router status."""
healthy = sum(1 for p in self.providers if p.status == ProviderStatus.HEALTHY)
return {
"total_providers": len(self.providers),
"healthy_providers": healthy,
"degraded_providers": sum(1 for p in self.providers if p.status == ProviderStatus.DEGRADED),
"unhealthy_providers": sum(1 for p in self.providers if p.status == ProviderStatus.UNHEALTHY),
"degraded_providers": sum(
1 for p in self.providers if p.status == ProviderStatus.DEGRADED
),
"unhealthy_providers": sum(
1 for p in self.providers if p.status == ProviderStatus.UNHEALTHY
),
"providers": [
{
"name": p.name,
@@ -835,7 +859,7 @@ class CascadeRouter:
for p in self.providers
],
}
async def generate_with_image(
self,
prompt: str,
@@ -844,21 +868,23 @@ class CascadeRouter:
temperature: float = 0.7,
) -> dict:
"""Convenience method for vision requests.
Args:
prompt: Text prompt about the image
image_path: Path to image file
model: Vision-capable model (auto-selected if not provided)
temperature: Sampling temperature
Returns:
Response dict with content and metadata
"""
messages = [{
"role": "user",
"content": prompt,
"images": [image_path],
}]
messages = [
{
"role": "user",
"content": prompt,
"images": [image_path],
}
]
return await self.complete(
messages=messages,
model=model,