diff --git a/src/config.py b/src/config.py index 052285a..3750e88 100644 --- a/src/config.py +++ b/src/config.py @@ -64,17 +64,10 @@ class Settings(BaseSettings): # Seconds to wait for user confirmation before auto-rejecting. discord_confirm_timeout: int = 120 - # ── AirLLM / backend selection ─────────────────────────────────────────── + # ── Backend selection ──────────────────────────────────────────────────── # "ollama" — always use Ollama (default, safe everywhere) - # "airllm" — always use AirLLM (requires pip install ".[bigbrain]") - # "auto" — use AirLLM on Apple Silicon if airllm is installed, - # fall back to Ollama otherwise - timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama" - - # AirLLM model size when backend is airllm or auto. - # Larger = smarter, but needs more RAM / disk. - # 8b ~16 GB | 70b ~140 GB | 405b ~810 GB - airllm_model_size: Literal["8b", "70b", "405b"] = "70b" + # "auto" — pick best available local backend, fall back to Ollama + timmy_model_backend: Literal["ollama", "grok", "claude", "auto"] = "ollama" # ── Grok (xAI) — opt-in premium cloud backend ──────────────────────── # Grok is a premium augmentation layer — local-first ethos preserved. diff --git a/src/infrastructure/router/cascade.py b/src/infrastructure/router/cascade.py index aacec03..83a4f4a 100644 --- a/src/infrastructure/router/cascade.py +++ b/src/infrastructure/router/cascade.py @@ -826,7 +826,9 @@ class CascadeRouter: Summary dict with added/removed/preserved counts. """ # Snapshot current runtime state keyed by provider name - old_state: dict[str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus]] = {} + old_state: dict[ + str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus] + ] = {} for p in self.providers: old_state[p.name] = ( p.metrics, diff --git a/src/timmy/agent.py b/src/timmy/agent.py index 04d8690..6aa67bd 100644 --- a/src/timmy/agent.py +++ b/src/timmy/agent.py @@ -220,7 +220,7 @@ def create_timmy( print_response(message, stream). """ resolved = _resolve_backend(backend) - size = model_size or settings.airllm_model_size + size = model_size or "70b" if resolved == "claude": from timmy.backends import ClaudeBackend