forked from Rockachopa/Timmy-time-dashboard
Compare commits
1 Commits
claude/iss
...
claude/iss
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
24072a6173 |
@@ -167,3 +167,6 @@ directory = "htmlcov"
|
|||||||
|
|
||||||
[tool.coverage.xml]
|
[tool.coverage.xml]
|
||||||
output = "coverage.xml"
|
output = "coverage.xml"
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
mypy_path = "src"
|
||||||
|
|||||||
@@ -56,13 +56,6 @@ class Settings(BaseSettings):
|
|||||||
# Set to 0 to use model defaults.
|
# Set to 0 to use model defaults.
|
||||||
ollama_num_ctx: int = 32768
|
ollama_num_ctx: int = 32768
|
||||||
|
|
||||||
# Maximum models loaded simultaneously in Ollama — override with OLLAMA_MAX_LOADED_MODELS
|
|
||||||
# Set to 2 so Qwen3-8B and Qwen3-14B can stay hot concurrently (~17 GB combined).
|
|
||||||
# Requires Ollama ≥ 0.1.33. Export this to the Ollama process environment:
|
|
||||||
# OLLAMA_MAX_LOADED_MODELS=2 ollama serve
|
|
||||||
# or add it to your systemd/launchd unit before starting the harness.
|
|
||||||
ollama_max_loaded_models: int = 2
|
|
||||||
|
|
||||||
# Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS
|
# Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS
|
||||||
# as comma-separated strings, e.g. FALLBACK_MODELS="qwen3:8b,qwen2.5:14b"
|
# as comma-separated strings, e.g. FALLBACK_MODELS="qwen3:8b,qwen2.5:14b"
|
||||||
# Or edit config/providers.yaml → fallback_chains for the canonical source.
|
# Or edit config/providers.yaml → fallback_chains for the canonical source.
|
||||||
|
|||||||
@@ -13,9 +13,9 @@ from timmy.tools import get_all_available_tools
|
|||||||
|
|
||||||
router = APIRouter(tags=["tools"])
|
router = APIRouter(tags=["tools"])
|
||||||
|
|
||||||
_AgentView = namedtuple("AgentView", ["name", "status", "tools", "stats"])
|
_AgentView = namedtuple("_AgentView", ["name", "status", "tools", "stats"])
|
||||||
_ToolView = namedtuple("ToolView", ["name", "description"])
|
_ToolView = namedtuple("_ToolView", ["name", "description"])
|
||||||
_Stats = namedtuple("Stats", ["total_calls"])
|
_Stats = namedtuple("_Stats", ["total_calls"])
|
||||||
|
|
||||||
|
|
||||||
def _build_agent_tools():
|
def _build_agent_tools():
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ def _get_familiar_state() -> dict:
|
|||||||
BARK_STYLES = {"speech", "thought", "whisper", "shout"}
|
BARK_STYLES = {"speech", "thought", "whisper", "shout"}
|
||||||
|
|
||||||
|
|
||||||
def produce_bark(agent_id: str, text: str, reply_to: str = None, style: str = "speech") -> dict:
|
def produce_bark(agent_id: str, text: str, reply_to: str | None = None, style: str = "speech") -> dict:
|
||||||
"""Format a chat response as a Matrix bark message.
|
"""Format a chat response as a Matrix bark message.
|
||||||
|
|
||||||
Barks appear as floating text above agents in the Matrix 3D world with
|
Barks appear as floating text above agents in the Matrix 3D world with
|
||||||
@@ -102,7 +102,7 @@ def produce_bark(agent_id: str, text: str, reply_to: str = None, style: str = "s
|
|||||||
|
|
||||||
|
|
||||||
def produce_thought(
|
def produce_thought(
|
||||||
agent_id: str, thought_text: str, thought_id: int, chain_id: str = None
|
agent_id: str, thought_text: str, thought_id: int, chain_id: str | None = None
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Format a thinking engine thought as a Matrix thought message.
|
"""Format a thinking engine thought as a Matrix thought message.
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,123 +0,0 @@
|
|||||||
"""Config loading helpers for the Cascade LLM Router.
|
|
||||||
|
|
||||||
Parses providers.yaml, expands env vars, and checks provider availability.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from infrastructure.router.models import Provider, RouterConfig
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
try:
|
|
||||||
import yaml
|
|
||||||
except ImportError:
|
|
||||||
yaml = None # type: ignore
|
|
||||||
|
|
||||||
try:
|
|
||||||
import requests
|
|
||||||
except ImportError:
|
|
||||||
requests = None # type: ignore
|
|
||||||
|
|
||||||
|
|
||||||
def expand_env_vars(content: str) -> str:
|
|
||||||
"""Expand ${VAR} syntax in YAML content.
|
|
||||||
|
|
||||||
Uses os.environ directly (not settings) because this is a generic
|
|
||||||
YAML config loader that must expand arbitrary variable references.
|
|
||||||
"""
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
|
|
||||||
def replace_var(match: "re.Match[str]") -> str:
|
|
||||||
var_name = match.group(1)
|
|
||||||
return os.environ.get(var_name, match.group(0))
|
|
||||||
|
|
||||||
return re.sub(r"\$\{(\w+)\}", replace_var, content)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_router_config(data: dict) -> RouterConfig:
|
|
||||||
"""Build a RouterConfig from parsed YAML data."""
|
|
||||||
cascade = data.get("cascade", {})
|
|
||||||
cb = cascade.get("circuit_breaker", {})
|
|
||||||
multimodal = data.get("multimodal", {})
|
|
||||||
|
|
||||||
return RouterConfig(
|
|
||||||
timeout_seconds=cascade.get("timeout_seconds", 30),
|
|
||||||
max_retries_per_provider=cascade.get("max_retries_per_provider", 2),
|
|
||||||
retry_delay_seconds=cascade.get("retry_delay_seconds", 1),
|
|
||||||
circuit_breaker_failure_threshold=cb.get("failure_threshold", 5),
|
|
||||||
circuit_breaker_recovery_timeout=cb.get("recovery_timeout", 60),
|
|
||||||
circuit_breaker_half_open_max_calls=cb.get("half_open_max_calls", 2),
|
|
||||||
auto_pull_models=multimodal.get("auto_pull", True),
|
|
||||||
fallback_chains=data.get("fallback_chains", {}),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def load_providers(data: dict) -> list[Provider]:
|
|
||||||
"""Load and filter providers from parsed YAML data (unsorted)."""
|
|
||||||
providers: list[Provider] = []
|
|
||||||
for p_data in data.get("providers", []):
|
|
||||||
if not p_data.get("enabled", False):
|
|
||||||
continue
|
|
||||||
|
|
||||||
provider = Provider(
|
|
||||||
name=p_data["name"],
|
|
||||||
type=p_data["type"],
|
|
||||||
enabled=p_data.get("enabled", True),
|
|
||||||
priority=p_data.get("priority", 99),
|
|
||||||
tier=p_data.get("tier"),
|
|
||||||
url=p_data.get("url"),
|
|
||||||
api_key=p_data.get("api_key"),
|
|
||||||
base_url=p_data.get("base_url"),
|
|
||||||
models=p_data.get("models", []),
|
|
||||||
)
|
|
||||||
|
|
||||||
if check_provider_available(provider):
|
|
||||||
providers.append(provider)
|
|
||||||
else:
|
|
||||||
logger.warning("Provider %s not available, skipping", provider.name)
|
|
||||||
|
|
||||||
return providers
|
|
||||||
|
|
||||||
|
|
||||||
def check_provider_available(provider: Provider) -> bool:
|
|
||||||
"""Check if a provider is actually available."""
|
|
||||||
from config import settings
|
|
||||||
|
|
||||||
if provider.type == "ollama":
|
|
||||||
# Check if Ollama is running
|
|
||||||
if requests is None:
|
|
||||||
# Can't check without requests, assume available
|
|
||||||
return True
|
|
||||||
try:
|
|
||||||
url = provider.url or settings.ollama_url
|
|
||||||
response = requests.get(f"{url}/api/tags", timeout=5)
|
|
||||||
return response.status_code == 200
|
|
||||||
except Exception as exc:
|
|
||||||
logger.debug("Ollama provider check error: %s", exc)
|
|
||||||
return False
|
|
||||||
|
|
||||||
elif provider.type == "vllm_mlx":
|
|
||||||
# Check if local vllm-mlx server is running (OpenAI-compatible)
|
|
||||||
if requests is None:
|
|
||||||
return True
|
|
||||||
try:
|
|
||||||
base_url = provider.base_url or provider.url or "http://localhost:8000"
|
|
||||||
# Strip /v1 suffix — health endpoint is at the root
|
|
||||||
server_root = base_url.rstrip("/")
|
|
||||||
if server_root.endswith("/v1"):
|
|
||||||
server_root = server_root[:-3]
|
|
||||||
response = requests.get(f"{server_root}/health", timeout=5)
|
|
||||||
return response.status_code == 200
|
|
||||||
except Exception as exc:
|
|
||||||
logger.debug("vllm-mlx provider check error: %s", exc)
|
|
||||||
return False
|
|
||||||
|
|
||||||
elif provider.type in ("openai", "anthropic", "grok"):
|
|
||||||
# Check if API key is set
|
|
||||||
return provider.api_key is not None and provider.api_key != ""
|
|
||||||
|
|
||||||
return True
|
|
||||||
@@ -1,129 +0,0 @@
|
|||||||
"""Content-type detection and model selection for the Cascade LLM Router."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from infrastructure.router.models import ContentType, Provider
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def detect_content_type(messages: list[dict]) -> ContentType:
|
|
||||||
"""Detect the type of content in the messages.
|
|
||||||
|
|
||||||
Checks for images, audio, etc. in the message content.
|
|
||||||
"""
|
|
||||||
has_image = False
|
|
||||||
has_audio = False
|
|
||||||
|
|
||||||
for msg in messages:
|
|
||||||
content = msg.get("content", "")
|
|
||||||
|
|
||||||
# Check for image URLs/paths
|
|
||||||
if msg.get("images"):
|
|
||||||
has_image = True
|
|
||||||
|
|
||||||
# Check for image URLs in content
|
|
||||||
if isinstance(content, str):
|
|
||||||
image_extensions = (".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp")
|
|
||||||
if any(ext in content.lower() for ext in image_extensions):
|
|
||||||
has_image = True
|
|
||||||
if content.startswith("data:image/"):
|
|
||||||
has_image = True
|
|
||||||
|
|
||||||
# Check for audio
|
|
||||||
if msg.get("audio"):
|
|
||||||
has_audio = True
|
|
||||||
|
|
||||||
# Check for multimodal content structure
|
|
||||||
if isinstance(content, list):
|
|
||||||
for item in content:
|
|
||||||
if isinstance(item, dict):
|
|
||||||
if item.get("type") == "image_url":
|
|
||||||
has_image = True
|
|
||||||
elif item.get("type") == "audio":
|
|
||||||
has_audio = True
|
|
||||||
|
|
||||||
if has_image and has_audio:
|
|
||||||
return ContentType.MULTIMODAL
|
|
||||||
elif has_image:
|
|
||||||
return ContentType.VISION
|
|
||||||
elif has_audio:
|
|
||||||
return ContentType.AUDIO
|
|
||||||
return ContentType.TEXT
|
|
||||||
|
|
||||||
|
|
||||||
def get_fallback_model(
|
|
||||||
provider: Provider,
|
|
||||||
original_model: str,
|
|
||||||
content_type: ContentType,
|
|
||||||
fallback_chains: dict,
|
|
||||||
) -> str | None:
|
|
||||||
"""Get a fallback model for the given content type."""
|
|
||||||
# Map content type to capability
|
|
||||||
capability_map = {
|
|
||||||
ContentType.VISION: "vision",
|
|
||||||
ContentType.AUDIO: "audio",
|
|
||||||
ContentType.MULTIMODAL: "vision", # Vision models often do both
|
|
||||||
}
|
|
||||||
|
|
||||||
capability = capability_map.get(content_type)
|
|
||||||
if not capability:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Check provider's models for capability
|
|
||||||
fallback_model = provider.get_model_with_capability(capability)
|
|
||||||
if fallback_model and fallback_model != original_model:
|
|
||||||
return fallback_model
|
|
||||||
|
|
||||||
# Use fallback chains from config
|
|
||||||
fallback_chain = fallback_chains.get(capability, [])
|
|
||||||
for model_name in fallback_chain:
|
|
||||||
if provider.model_has_capability(model_name, capability):
|
|
||||||
return model_name
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def select_model(
|
|
||||||
provider: Provider,
|
|
||||||
model: str | None,
|
|
||||||
content_type: ContentType,
|
|
||||||
mm_manager: Any,
|
|
||||||
fallback_chains: dict,
|
|
||||||
) -> tuple[str | None, bool]:
|
|
||||||
"""Select the best model for the request, with vision fallback.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (selected_model, is_fallback_model).
|
|
||||||
"""
|
|
||||||
selected_model = model or provider.get_default_model()
|
|
||||||
is_fallback = False
|
|
||||||
|
|
||||||
if content_type != ContentType.TEXT and selected_model:
|
|
||||||
if provider.type == "ollama" and mm_manager:
|
|
||||||
from infrastructure.models.multimodal import ModelCapability
|
|
||||||
|
|
||||||
if content_type == ContentType.VISION:
|
|
||||||
supports = mm_manager.model_supports(selected_model, ModelCapability.VISION)
|
|
||||||
if not supports:
|
|
||||||
fallback = get_fallback_model(
|
|
||||||
provider, selected_model, content_type, fallback_chains
|
|
||||||
)
|
|
||||||
if fallback:
|
|
||||||
logger.info(
|
|
||||||
"Model %s doesn't support vision, falling back to %s",
|
|
||||||
selected_model,
|
|
||||||
fallback,
|
|
||||||
)
|
|
||||||
selected_model = fallback
|
|
||||||
is_fallback = True
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
"No vision-capable model found on %s, trying anyway",
|
|
||||||
provider.name,
|
|
||||||
)
|
|
||||||
|
|
||||||
return selected_model, is_fallback
|
|
||||||
@@ -1,79 +0,0 @@
|
|||||||
"""Circuit-breaker and health tracking for the Cascade LLM Router.
|
|
||||||
|
|
||||||
Standalone functions that mutate Provider state in place.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import time
|
|
||||||
from datetime import UTC, datetime
|
|
||||||
|
|
||||||
from infrastructure.router.models import CircuitState, Provider, ProviderStatus, RouterConfig
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def record_success(provider: Provider, latency_ms: float, config: RouterConfig) -> None:
|
|
||||||
"""Record a successful request."""
|
|
||||||
provider.metrics.total_requests += 1
|
|
||||||
provider.metrics.successful_requests += 1
|
|
||||||
provider.metrics.total_latency_ms += latency_ms
|
|
||||||
provider.metrics.last_request_time = datetime.now(UTC).isoformat()
|
|
||||||
provider.metrics.consecutive_failures = 0
|
|
||||||
|
|
||||||
# Close circuit breaker if half-open
|
|
||||||
if provider.circuit_state == CircuitState.HALF_OPEN:
|
|
||||||
provider.half_open_calls += 1
|
|
||||||
if provider.half_open_calls >= config.circuit_breaker_half_open_max_calls:
|
|
||||||
close_circuit(provider)
|
|
||||||
|
|
||||||
# Update status based on error rate
|
|
||||||
if provider.metrics.error_rate < 0.1:
|
|
||||||
provider.status = ProviderStatus.HEALTHY
|
|
||||||
elif provider.metrics.error_rate < 0.3:
|
|
||||||
provider.status = ProviderStatus.DEGRADED
|
|
||||||
|
|
||||||
|
|
||||||
def record_failure(provider: Provider, config: RouterConfig) -> None:
|
|
||||||
"""Record a failed request."""
|
|
||||||
provider.metrics.total_requests += 1
|
|
||||||
provider.metrics.failed_requests += 1
|
|
||||||
provider.metrics.last_error_time = datetime.now(UTC).isoformat()
|
|
||||||
provider.metrics.consecutive_failures += 1
|
|
||||||
|
|
||||||
# Check if we should open circuit breaker
|
|
||||||
if provider.metrics.consecutive_failures >= config.circuit_breaker_failure_threshold:
|
|
||||||
open_circuit(provider)
|
|
||||||
|
|
||||||
# Update status
|
|
||||||
if provider.metrics.error_rate > 0.3:
|
|
||||||
provider.status = ProviderStatus.DEGRADED
|
|
||||||
if provider.metrics.error_rate > 0.5:
|
|
||||||
provider.status = ProviderStatus.UNHEALTHY
|
|
||||||
|
|
||||||
|
|
||||||
def open_circuit(provider: Provider) -> None:
|
|
||||||
"""Open the circuit breaker for a provider."""
|
|
||||||
provider.circuit_state = CircuitState.OPEN
|
|
||||||
provider.circuit_opened_at = time.time()
|
|
||||||
provider.status = ProviderStatus.UNHEALTHY
|
|
||||||
logger.warning("Circuit breaker OPEN for %s", provider.name)
|
|
||||||
|
|
||||||
|
|
||||||
def can_close_circuit(provider: Provider, config: RouterConfig) -> bool:
|
|
||||||
"""Check if circuit breaker can transition to half-open."""
|
|
||||||
if provider.circuit_opened_at is None:
|
|
||||||
return False
|
|
||||||
elapsed = time.time() - provider.circuit_opened_at
|
|
||||||
return elapsed >= config.circuit_breaker_recovery_timeout
|
|
||||||
|
|
||||||
|
|
||||||
def close_circuit(provider: Provider) -> None:
|
|
||||||
"""Close the circuit breaker (provider healthy again)."""
|
|
||||||
provider.circuit_state = CircuitState.CLOSED
|
|
||||||
provider.circuit_opened_at = None
|
|
||||||
provider.half_open_calls = 0
|
|
||||||
provider.metrics.consecutive_failures = 0
|
|
||||||
provider.status = ProviderStatus.HEALTHY
|
|
||||||
logger.info("Circuit breaker CLOSED for %s", provider.name)
|
|
||||||
@@ -1,141 +0,0 @@
|
|||||||
"""Data models for the Cascade LLM Router.
|
|
||||||
|
|
||||||
Enums, dataclasses, and provider configuration shared across
|
|
||||||
router sub-modules.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import time
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from datetime import UTC, datetime
|
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
|
|
||||||
class ProviderStatus(Enum):
|
|
||||||
"""Health status of a provider."""
|
|
||||||
|
|
||||||
HEALTHY = "healthy"
|
|
||||||
DEGRADED = "degraded" # Working but slow or occasional errors
|
|
||||||
UNHEALTHY = "unhealthy" # Circuit breaker open
|
|
||||||
DISABLED = "disabled"
|
|
||||||
|
|
||||||
|
|
||||||
class CircuitState(Enum):
|
|
||||||
"""Circuit breaker state."""
|
|
||||||
|
|
||||||
CLOSED = "closed" # Normal operation
|
|
||||||
OPEN = "open" # Failing, rejecting requests
|
|
||||||
HALF_OPEN = "half_open" # Testing if recovered
|
|
||||||
|
|
||||||
|
|
||||||
class ContentType(Enum):
|
|
||||||
"""Type of content in the request."""
|
|
||||||
|
|
||||||
TEXT = "text"
|
|
||||||
VISION = "vision" # Contains images
|
|
||||||
AUDIO = "audio" # Contains audio
|
|
||||||
MULTIMODAL = "multimodal" # Multiple content types
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ProviderMetrics:
|
|
||||||
"""Metrics for a single provider."""
|
|
||||||
|
|
||||||
total_requests: int = 0
|
|
||||||
successful_requests: int = 0
|
|
||||||
failed_requests: int = 0
|
|
||||||
total_latency_ms: float = 0.0
|
|
||||||
last_request_time: str | None = None
|
|
||||||
last_error_time: str | None = None
|
|
||||||
consecutive_failures: int = 0
|
|
||||||
|
|
||||||
@property
|
|
||||||
def avg_latency_ms(self) -> float:
|
|
||||||
if self.total_requests == 0:
|
|
||||||
return 0.0
|
|
||||||
return self.total_latency_ms / self.total_requests
|
|
||||||
|
|
||||||
@property
|
|
||||||
def error_rate(self) -> float:
|
|
||||||
if self.total_requests == 0:
|
|
||||||
return 0.0
|
|
||||||
return self.failed_requests / self.total_requests
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ModelCapability:
|
|
||||||
"""Capabilities a model supports."""
|
|
||||||
|
|
||||||
name: str
|
|
||||||
supports_vision: bool = False
|
|
||||||
supports_audio: bool = False
|
|
||||||
supports_tools: bool = False
|
|
||||||
supports_json: bool = False
|
|
||||||
supports_streaming: bool = True
|
|
||||||
context_window: int = 4096
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Provider:
|
|
||||||
"""LLM provider configuration and state."""
|
|
||||||
|
|
||||||
name: str
|
|
||||||
type: str # ollama, openai, anthropic
|
|
||||||
enabled: bool
|
|
||||||
priority: int
|
|
||||||
tier: str | None = None # e.g., "local", "standard_cloud", "frontier"
|
|
||||||
url: str | None = None
|
|
||||||
api_key: str | None = None
|
|
||||||
base_url: str | None = None
|
|
||||||
models: list[dict] = field(default_factory=list)
|
|
||||||
|
|
||||||
# Runtime state
|
|
||||||
status: ProviderStatus = ProviderStatus.HEALTHY
|
|
||||||
metrics: ProviderMetrics = field(default_factory=ProviderMetrics)
|
|
||||||
circuit_state: CircuitState = CircuitState.CLOSED
|
|
||||||
circuit_opened_at: float | None = None
|
|
||||||
half_open_calls: int = 0
|
|
||||||
|
|
||||||
def get_default_model(self) -> str | None:
|
|
||||||
"""Get the default model for this provider."""
|
|
||||||
for model in self.models:
|
|
||||||
if model.get("default"):
|
|
||||||
return model["name"]
|
|
||||||
if self.models:
|
|
||||||
return self.models[0]["name"]
|
|
||||||
return None
|
|
||||||
|
|
||||||
def get_model_with_capability(self, capability: str) -> str | None:
|
|
||||||
"""Get a model that supports the given capability."""
|
|
||||||
for model in self.models:
|
|
||||||
capabilities = model.get("capabilities", [])
|
|
||||||
if capability in capabilities:
|
|
||||||
return model["name"]
|
|
||||||
# Fall back to default
|
|
||||||
return self.get_default_model()
|
|
||||||
|
|
||||||
def model_has_capability(self, model_name: str, capability: str) -> bool:
|
|
||||||
"""Check if a specific model has a capability."""
|
|
||||||
for model in self.models:
|
|
||||||
if model["name"] == model_name:
|
|
||||||
capabilities = model.get("capabilities", [])
|
|
||||||
return capability in capabilities
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class RouterConfig:
|
|
||||||
"""Cascade router configuration."""
|
|
||||||
|
|
||||||
timeout_seconds: int = 30
|
|
||||||
max_retries_per_provider: int = 2
|
|
||||||
retry_delay_seconds: int = 1
|
|
||||||
circuit_breaker_failure_threshold: int = 5
|
|
||||||
circuit_breaker_recovery_timeout: int = 60
|
|
||||||
circuit_breaker_half_open_max_calls: int = 2
|
|
||||||
cost_tracking_enabled: bool = True
|
|
||||||
budget_daily_usd: float = 10.0
|
|
||||||
# Multi-modal settings
|
|
||||||
auto_pull_models: bool = True
|
|
||||||
fallback_chains: dict = field(default_factory=dict)
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
# Provider implementations
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
"""Anthropic provider implementation for the Cascade LLM Router."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from infrastructure.router.models import Provider
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
async def call_anthropic(
|
|
||||||
provider: Provider,
|
|
||||||
messages: list[dict],
|
|
||||||
model: str,
|
|
||||||
temperature: float,
|
|
||||||
max_tokens: int | None,
|
|
||||||
timeout_seconds: int,
|
|
||||||
) -> dict:
|
|
||||||
"""Call Anthropic API."""
|
|
||||||
import anthropic
|
|
||||||
|
|
||||||
client = anthropic.AsyncAnthropic(
|
|
||||||
api_key=provider.api_key,
|
|
||||||
timeout=timeout_seconds,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Convert messages to Anthropic format
|
|
||||||
system_msg = None
|
|
||||||
conversation = []
|
|
||||||
for msg in messages:
|
|
||||||
if msg["role"] == "system":
|
|
||||||
system_msg = msg["content"]
|
|
||||||
else:
|
|
||||||
conversation.append(
|
|
||||||
{
|
|
||||||
"role": msg["role"],
|
|
||||||
"content": msg["content"],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
kwargs: dict = {
|
|
||||||
"model": model,
|
|
||||||
"messages": conversation,
|
|
||||||
"temperature": temperature,
|
|
||||||
"max_tokens": max_tokens or 1024,
|
|
||||||
}
|
|
||||||
if system_msg:
|
|
||||||
kwargs["system"] = system_msg
|
|
||||||
|
|
||||||
response = await client.messages.create(**kwargs)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"content": response.content[0].text,
|
|
||||||
"model": response.model,
|
|
||||||
}
|
|
||||||
@@ -1,80 +0,0 @@
|
|||||||
"""Provider dispatch — routes a single request to the correct provider module."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import time
|
|
||||||
|
|
||||||
from infrastructure.router.models import ContentType, Provider
|
|
||||||
|
|
||||||
|
|
||||||
async def call_provider(
|
|
||||||
provider: Provider,
|
|
||||||
messages: list[dict],
|
|
||||||
model: str,
|
|
||||||
temperature: float,
|
|
||||||
max_tokens: int | None,
|
|
||||||
timeout_seconds: int,
|
|
||||||
content_type: ContentType = ContentType.TEXT,
|
|
||||||
) -> dict:
|
|
||||||
"""Dispatch a request to the correct provider implementation.
|
|
||||||
|
|
||||||
Returns a result dict with ``content``, ``model``, and ``latency_ms`` keys.
|
|
||||||
Raises ValueError for unknown provider types.
|
|
||||||
"""
|
|
||||||
from infrastructure.router.providers import ollama as _ollama
|
|
||||||
from infrastructure.router.providers import openai_compat as _openai_compat
|
|
||||||
from infrastructure.router.providers import anthropic as _anthropic
|
|
||||||
from infrastructure.router.providers import grok as _grok
|
|
||||||
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
if provider.type == "ollama":
|
|
||||||
result = await _ollama.call_ollama(
|
|
||||||
provider=provider,
|
|
||||||
messages=messages,
|
|
||||||
model=model or provider.get_default_model(),
|
|
||||||
temperature=temperature,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
content_type=content_type,
|
|
||||||
timeout_seconds=timeout_seconds,
|
|
||||||
)
|
|
||||||
elif provider.type == "openai":
|
|
||||||
result = await _openai_compat.call_openai(
|
|
||||||
provider=provider,
|
|
||||||
messages=messages,
|
|
||||||
model=model or provider.get_default_model(),
|
|
||||||
temperature=temperature,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
timeout_seconds=timeout_seconds,
|
|
||||||
)
|
|
||||||
elif provider.type == "anthropic":
|
|
||||||
result = await _anthropic.call_anthropic(
|
|
||||||
provider=provider,
|
|
||||||
messages=messages,
|
|
||||||
model=model or provider.get_default_model(),
|
|
||||||
temperature=temperature,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
timeout_seconds=timeout_seconds,
|
|
||||||
)
|
|
||||||
elif provider.type == "grok":
|
|
||||||
result = await _grok.call_grok(
|
|
||||||
provider=provider,
|
|
||||||
messages=messages,
|
|
||||||
model=model or provider.get_default_model(),
|
|
||||||
temperature=temperature,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
)
|
|
||||||
elif provider.type == "vllm_mlx":
|
|
||||||
result = await _openai_compat.call_vllm_mlx(
|
|
||||||
provider=provider,
|
|
||||||
messages=messages,
|
|
||||||
model=model or provider.get_default_model(),
|
|
||||||
temperature=temperature,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
timeout_seconds=timeout_seconds,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown provider type: {provider.type}")
|
|
||||||
|
|
||||||
result["latency_ms"] = (time.time() - start_time) * 1000
|
|
||||||
return result
|
|
||||||
@@ -1,44 +0,0 @@
|
|||||||
"""Grok (xAI) provider implementation for the Cascade LLM Router."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from infrastructure.router.models import Provider
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
async def call_grok(
|
|
||||||
provider: Provider,
|
|
||||||
messages: list[dict],
|
|
||||||
model: str,
|
|
||||||
temperature: float,
|
|
||||||
max_tokens: int | None,
|
|
||||||
) -> dict:
|
|
||||||
"""Call xAI Grok API via OpenAI-compatible SDK."""
|
|
||||||
import httpx
|
|
||||||
import openai
|
|
||||||
|
|
||||||
from config import settings
|
|
||||||
|
|
||||||
client = openai.AsyncOpenAI(
|
|
||||||
api_key=provider.api_key,
|
|
||||||
base_url=provider.base_url or settings.xai_base_url,
|
|
||||||
timeout=httpx.Timeout(300.0),
|
|
||||||
)
|
|
||||||
|
|
||||||
kwargs: dict = {
|
|
||||||
"model": model,
|
|
||||||
"messages": messages,
|
|
||||||
"temperature": temperature,
|
|
||||||
}
|
|
||||||
if max_tokens:
|
|
||||||
kwargs["max_tokens"] = max_tokens
|
|
||||||
|
|
||||||
response = await client.chat.completions.create(**kwargs)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"content": response.choices[0].message.content,
|
|
||||||
"model": response.model,
|
|
||||||
}
|
|
||||||
@@ -1,92 +0,0 @@
|
|||||||
"""Ollama provider implementation for the Cascade LLM Router."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import base64
|
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import aiohttp
|
|
||||||
|
|
||||||
from infrastructure.router.models import ContentType, Provider
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
async def call_ollama(
|
|
||||||
provider: Provider,
|
|
||||||
messages: list[dict],
|
|
||||||
model: str,
|
|
||||||
temperature: float,
|
|
||||||
max_tokens: int | None,
|
|
||||||
content_type: ContentType,
|
|
||||||
timeout_seconds: int,
|
|
||||||
) -> dict:
|
|
||||||
"""Call Ollama API with multi-modal support."""
|
|
||||||
from config import settings
|
|
||||||
|
|
||||||
url = f"{provider.url or settings.ollama_url}/api/chat"
|
|
||||||
|
|
||||||
# Transform messages for Ollama format (including images)
|
|
||||||
transformed_messages = transform_messages_for_ollama(messages)
|
|
||||||
|
|
||||||
options: dict = {"temperature": temperature}
|
|
||||||
if max_tokens:
|
|
||||||
options["num_predict"] = max_tokens
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
"model": model,
|
|
||||||
"messages": transformed_messages,
|
|
||||||
"stream": False,
|
|
||||||
"options": options,
|
|
||||||
}
|
|
||||||
|
|
||||||
timeout = aiohttp.ClientTimeout(total=timeout_seconds)
|
|
||||||
|
|
||||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
||||||
async with session.post(url, json=payload) as response:
|
|
||||||
if response.status != 200:
|
|
||||||
text = await response.text()
|
|
||||||
raise RuntimeError(f"Ollama error {response.status}: {text}")
|
|
||||||
|
|
||||||
data = await response.json()
|
|
||||||
return {
|
|
||||||
"content": data["message"]["content"],
|
|
||||||
"model": model,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def transform_messages_for_ollama(messages: list[dict]) -> list[dict]:
|
|
||||||
"""Transform messages to Ollama format, handling images."""
|
|
||||||
transformed = []
|
|
||||||
|
|
||||||
for msg in messages:
|
|
||||||
new_msg = {
|
|
||||||
"role": msg.get("role", "user"),
|
|
||||||
"content": msg.get("content", ""),
|
|
||||||
}
|
|
||||||
|
|
||||||
# Handle images
|
|
||||||
images = msg.get("images", [])
|
|
||||||
if images:
|
|
||||||
new_msg["images"] = []
|
|
||||||
for img in images:
|
|
||||||
if isinstance(img, str):
|
|
||||||
if img.startswith("data:image/"):
|
|
||||||
# Base64 encoded image
|
|
||||||
new_msg["images"].append(img.split(",")[1])
|
|
||||||
elif img.startswith("http://") or img.startswith("https://"):
|
|
||||||
# URL - would need to download, skip for now
|
|
||||||
logger.warning("Image URLs not yet supported, skipping: %s", img)
|
|
||||||
elif Path(img).exists():
|
|
||||||
# Local file path - read and encode
|
|
||||||
try:
|
|
||||||
with open(img, "rb") as f:
|
|
||||||
img_data = base64.b64encode(f.read()).decode()
|
|
||||||
new_msg["images"].append(img_data)
|
|
||||||
except Exception as exc:
|
|
||||||
logger.error("Failed to read image %s: %s", img, exc)
|
|
||||||
|
|
||||||
transformed.append(new_msg)
|
|
||||||
|
|
||||||
return transformed
|
|
||||||
@@ -1,88 +0,0 @@
|
|||||||
"""OpenAI-compatible provider implementations for the Cascade LLM Router.
|
|
||||||
|
|
||||||
Covers the ``openai`` and ``vllm_mlx`` provider types.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from infrastructure.router.models import Provider
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
async def call_openai(
|
|
||||||
provider: Provider,
|
|
||||||
messages: list[dict],
|
|
||||||
model: str,
|
|
||||||
temperature: float,
|
|
||||||
max_tokens: int | None,
|
|
||||||
timeout_seconds: int,
|
|
||||||
) -> dict:
|
|
||||||
"""Call OpenAI API."""
|
|
||||||
import openai
|
|
||||||
|
|
||||||
client = openai.AsyncOpenAI(
|
|
||||||
api_key=provider.api_key,
|
|
||||||
base_url=provider.base_url,
|
|
||||||
timeout=timeout_seconds,
|
|
||||||
)
|
|
||||||
|
|
||||||
kwargs: dict = {
|
|
||||||
"model": model,
|
|
||||||
"messages": messages,
|
|
||||||
"temperature": temperature,
|
|
||||||
}
|
|
||||||
if max_tokens:
|
|
||||||
kwargs["max_tokens"] = max_tokens
|
|
||||||
|
|
||||||
response = await client.chat.completions.create(**kwargs)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"content": response.choices[0].message.content,
|
|
||||||
"model": response.model,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def call_vllm_mlx(
|
|
||||||
provider: Provider,
|
|
||||||
messages: list[dict],
|
|
||||||
model: str,
|
|
||||||
temperature: float,
|
|
||||||
max_tokens: int | None,
|
|
||||||
timeout_seconds: int,
|
|
||||||
) -> dict:
|
|
||||||
"""Call vllm-mlx via its OpenAI-compatible API.
|
|
||||||
|
|
||||||
vllm-mlx exposes the same /v1/chat/completions endpoint as OpenAI,
|
|
||||||
so we reuse the OpenAI client pointed at the local server.
|
|
||||||
No API key is required for local deployments.
|
|
||||||
"""
|
|
||||||
import openai
|
|
||||||
|
|
||||||
base_url = provider.base_url or provider.url or "http://localhost:8000"
|
|
||||||
# Ensure the base_url ends with /v1 as expected by the OpenAI client
|
|
||||||
if not base_url.rstrip("/").endswith("/v1"):
|
|
||||||
base_url = base_url.rstrip("/") + "/v1"
|
|
||||||
|
|
||||||
client = openai.AsyncOpenAI(
|
|
||||||
api_key=provider.api_key or "no-key-required",
|
|
||||||
base_url=base_url,
|
|
||||||
timeout=timeout_seconds,
|
|
||||||
)
|
|
||||||
|
|
||||||
kwargs: dict = {
|
|
||||||
"model": model,
|
|
||||||
"messages": messages,
|
|
||||||
"temperature": temperature,
|
|
||||||
}
|
|
||||||
if max_tokens:
|
|
||||||
kwargs["max_tokens"] = max_tokens
|
|
||||||
|
|
||||||
response = await client.chat.completions.create(**kwargs)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"content": response.choices[0].message.content,
|
|
||||||
"model": response.model,
|
|
||||||
}
|
|
||||||
@@ -1,89 +0,0 @@
|
|||||||
"""Metrics, status, and config-reload helpers for the Cascade LLM Router."""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from infrastructure.router.models import (
|
|
||||||
CircuitState,
|
|
||||||
Provider,
|
|
||||||
ProviderMetrics,
|
|
||||||
ProviderStatus,
|
|
||||||
)
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
pass
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def build_metrics(providers: list[Provider]) -> dict:
|
|
||||||
"""Build a metrics summary dict for all providers."""
|
|
||||||
return {
|
|
||||||
"providers": [
|
|
||||||
{
|
|
||||||
"name": p.name,
|
|
||||||
"type": p.type,
|
|
||||||
"status": p.status.value,
|
|
||||||
"circuit_state": p.circuit_state.value,
|
|
||||||
"metrics": {
|
|
||||||
"total_requests": p.metrics.total_requests,
|
|
||||||
"successful": p.metrics.successful_requests,
|
|
||||||
"failed": p.metrics.failed_requests,
|
|
||||||
"error_rate": round(p.metrics.error_rate, 3),
|
|
||||||
"avg_latency_ms": round(p.metrics.avg_latency_ms, 2),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for p in providers
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def build_status(providers: list[Provider]) -> dict:
|
|
||||||
"""Build a status summary dict for all providers."""
|
|
||||||
healthy = sum(1 for p in providers if p.status == ProviderStatus.HEALTHY)
|
|
||||||
return {
|
|
||||||
"total_providers": len(providers),
|
|
||||||
"healthy_providers": healthy,
|
|
||||||
"degraded_providers": sum(1 for p in providers if p.status == ProviderStatus.DEGRADED),
|
|
||||||
"unhealthy_providers": sum(1 for p in providers if p.status == ProviderStatus.UNHEALTHY),
|
|
||||||
"providers": [
|
|
||||||
{
|
|
||||||
"name": p.name,
|
|
||||||
"type": p.type,
|
|
||||||
"status": p.status.value,
|
|
||||||
"priority": p.priority,
|
|
||||||
"default_model": p.get_default_model(),
|
|
||||||
}
|
|
||||||
for p in providers
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def snapshot_provider_state(
|
|
||||||
providers: list[Provider],
|
|
||||||
) -> dict[str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus]]:
|
|
||||||
"""Capture current runtime state keyed by provider name."""
|
|
||||||
return {
|
|
||||||
p.name: (p.metrics, p.circuit_state, p.circuit_opened_at, p.half_open_calls, p.status)
|
|
||||||
for p in providers
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def restore_provider_state(
|
|
||||||
providers: list[Provider],
|
|
||||||
old_state: dict[str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus]],
|
|
||||||
) -> int:
|
|
||||||
"""Restore saved runtime state to matching providers. Returns count of restored providers."""
|
|
||||||
preserved = 0
|
|
||||||
for p in providers:
|
|
||||||
if p.name in old_state:
|
|
||||||
metrics, circuit, opened_at, half_open, status = old_state[p.name]
|
|
||||||
p.metrics = metrics
|
|
||||||
p.circuit_state = circuit
|
|
||||||
p.circuit_opened_at = opened_at
|
|
||||||
p.half_open_calls = half_open
|
|
||||||
p.status = status
|
|
||||||
preserved += 1
|
|
||||||
return preserved
|
|
||||||
@@ -70,12 +70,12 @@ class SovereigntyAlert:
|
|||||||
|
|
||||||
|
|
||||||
# Graduation targets from issue #981
|
# Graduation targets from issue #981
|
||||||
GRADUATION_TARGETS = {
|
GRADUATION_TARGETS: dict[str, dict[str, float]] = {
|
||||||
"cache_hit_rate": {"week1": 0.10, "month1": 0.40, "month3": 0.80, "graduation": 0.90},
|
"cache_hit_rate": {"week1": 0.10, "month1": 0.40, "month3": 0.80, "graduation": 0.90},
|
||||||
"api_cost": {"week1": 1.50, "month1": 0.50, "month3": 0.10, "graduation": 0.01},
|
"api_cost": {"week1": 1.50, "month1": 0.50, "month3": 0.10, "graduation": 0.01},
|
||||||
"time_to_report": {"week1": 180.0, "month1": 30.0, "month3": 5.0, "graduation": 1.0},
|
"time_to_report": {"week1": 180.0, "month1": 30.0, "month3": 5.0, "graduation": 1.0},
|
||||||
"human_involvement": {"week1": 1.0, "month1": 0.5, "month3": 0.25, "graduation": 0.0},
|
"human_involvement": {"week1": 1.0, "month1": 0.5, "month3": 0.25, "graduation": 0.0},
|
||||||
"local_artifacts": {"week1": 6, "month1": 30, "month3": 100, "graduation": 500},
|
"local_artifacts": {"week1": 6.0, "month1": 30.0, "month3": 100.0, "graduation": 500.0},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -46,12 +46,13 @@ class VisitorRegistry:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
_instance: "VisitorRegistry | None" = None
|
_instance: "VisitorRegistry | None" = None
|
||||||
|
_visitors: dict[str, VisitorState]
|
||||||
|
|
||||||
def __new__(cls) -> "VisitorRegistry":
|
def __new__(cls) -> "VisitorRegistry":
|
||||||
"""Singleton constructor."""
|
"""Singleton constructor."""
|
||||||
if cls._instance is None:
|
if cls._instance is None:
|
||||||
cls._instance = super().__new__(cls)
|
cls._instance = super().__new__(cls)
|
||||||
cls._instance._visitors: dict[str, VisitorState] = {}
|
cls._instance._visitors = {}
|
||||||
return cls._instance
|
return cls._instance
|
||||||
|
|
||||||
def add(
|
def add(
|
||||||
|
|||||||
@@ -123,10 +123,8 @@ class RecoveryManager:
|
|||||||
if snapshot_id is None:
|
if snapshot_id is None:
|
||||||
snap_data = history[0] # most recent
|
snap_data = history[0] # most recent
|
||||||
else:
|
else:
|
||||||
snap_data = next(
|
matches = [s for s in history if s["snapshot_id"] == snapshot_id]
|
||||||
(s for s in history if s["snapshot_id"] == snapshot_id),
|
snap_data = matches[0] if matches else None
|
||||||
None,
|
|
||||||
)
|
|
||||||
|
|
||||||
if snap_data is None:
|
if snap_data is None:
|
||||||
logger.warning("RecoveryManager: snapshot %s not found", snapshot_id)
|
logger.warning("RecoveryManager: snapshot %s not found", snapshot_id)
|
||||||
|
|||||||
@@ -177,35 +177,6 @@ def think(
|
|||||||
timmy.print_response(f"Think carefully about: {topic}", stream=True, session_id=_CLI_SESSION_ID)
|
timmy.print_response(f"Think carefully about: {topic}", stream=True, session_id=_CLI_SESSION_ID)
|
||||||
|
|
||||||
|
|
||||||
def _read_message_input(message: list[str]) -> str:
|
|
||||||
"""Join CLI arguments and read from stdin when appropriate."""
|
|
||||||
message_str = " ".join(message)
|
|
||||||
|
|
||||||
if message_str == "-" or not _is_interactive():
|
|
||||||
try:
|
|
||||||
stdin_content = sys.stdin.read().strip()
|
|
||||||
except (KeyboardInterrupt, EOFError):
|
|
||||||
stdin_content = ""
|
|
||||||
if stdin_content:
|
|
||||||
message_str = stdin_content
|
|
||||||
elif message_str == "-":
|
|
||||||
typer.echo("No input provided via stdin.", err=True)
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
return message_str
|
|
||||||
|
|
||||||
|
|
||||||
def _resolve_session_id(session_id: str | None, new_session: bool) -> str:
|
|
||||||
"""Return the effective session ID based on CLI flags."""
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
if session_id is not None:
|
|
||||||
return session_id
|
|
||||||
if new_session:
|
|
||||||
return str(uuid.uuid4())
|
|
||||||
return _CLI_SESSION_ID
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
def chat(
|
def chat(
|
||||||
message: list[str] = typer.Argument(
|
message: list[str] = typer.Argument(
|
||||||
|
|||||||
@@ -349,7 +349,7 @@ async def triage_research_report(
|
|||||||
logger.info("No action items extracted from research report")
|
logger.info("No action items extracted from research report")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
results = []
|
results: list[dict] = []
|
||||||
for item in items:
|
for item in items:
|
||||||
if dry_run:
|
if dry_run:
|
||||||
results.append({"action_item": item, "gitea_issue": None})
|
results.append({"action_item": item, "gitea_issue": None})
|
||||||
|
|||||||
@@ -249,8 +249,8 @@ class _ErrorRunOutput:
|
|||||||
def __init__(self, message: str):
|
def __init__(self, message: str):
|
||||||
self.content = message
|
self.content = message
|
||||||
self.status = "ERROR"
|
self.status = "ERROR"
|
||||||
self.requirements = []
|
self.requirements: list = []
|
||||||
self.tools = []
|
self.tools: list = []
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def active_requirements(self):
|
def active_requirements(self):
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ class SessionLogger:
|
|||||||
content: The message content
|
content: The message content
|
||||||
confidence: Optional confidence score (0.0 to 1.0)
|
confidence: Optional confidence score (0.0 to 1.0)
|
||||||
"""
|
"""
|
||||||
entry = {
|
entry: dict = {
|
||||||
"type": "message",
|
"type": "message",
|
||||||
"role": role,
|
"role": role,
|
||||||
"content": content,
|
"content": content,
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ except Exception: # ImportError or circular import during early startup
|
|||||||
try:
|
try:
|
||||||
from infrastructure.sovereignty_metrics import GRADUATION_TARGETS, get_sovereignty_store
|
from infrastructure.sovereignty_metrics import GRADUATION_TARGETS, get_sovereignty_store
|
||||||
except Exception:
|
except Exception:
|
||||||
GRADUATION_TARGETS: dict = {} # type: ignore[assignment]
|
GRADUATION_TARGETS: dict = {} # type: ignore[assignment,no-redef]
|
||||||
get_sovereignty_store = None # type: ignore[assignment]
|
get_sovereignty_store = None # type: ignore[assignment]
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|||||||
2
tox.ini
2
tox.ini
@@ -41,6 +41,8 @@ description = Static type checking with mypy
|
|||||||
commands_pre =
|
commands_pre =
|
||||||
deps =
|
deps =
|
||||||
mypy>=1.0.0
|
mypy>=1.0.0
|
||||||
|
types-PyYAML
|
||||||
|
types-requests
|
||||||
commands =
|
commands =
|
||||||
mypy src --ignore-missing-imports --no-error-summary
|
mypy src --ignore-missing-imports --no-error-summary
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user