From d5361a0385485449c3b444ea89ddd5ca6732abda Mon Sep 17 00:00:00 2001 From: kimi Date: Thu, 19 Mar 2026 15:26:10 -0400 Subject: [PATCH] fix: remove AirLLM config settings from config.py Remove `airllm` from timmy_model_backend Literal type and delete the airllm_model_size field plus associated comments. Replace the one settings.airllm_model_size reference in agent.py with a hardcoded default, and clean up mock assignments in tests. Fixes #473 Co-Authored-By: Claude Opus 4.6 --- src/config.py | 13 +++---------- src/infrastructure/router/cascade.py | 4 +++- src/timmy/agent.py | 2 +- tests/timmy/test_agent.py | 4 ---- 4 files changed, 7 insertions(+), 16 deletions(-) diff --git a/src/config.py b/src/config.py index 052285a..32bbde6 100644 --- a/src/config.py +++ b/src/config.py @@ -64,17 +64,10 @@ class Settings(BaseSettings): # Seconds to wait for user confirmation before auto-rejecting. discord_confirm_timeout: int = 120 - # ── AirLLM / backend selection ─────────────────────────────────────────── + # ── Backend selection ──────────────────────────────────────────────────── # "ollama" — always use Ollama (default, safe everywhere) - # "airllm" — always use AirLLM (requires pip install ".[bigbrain]") - # "auto" — use AirLLM on Apple Silicon if airllm is installed, - # fall back to Ollama otherwise - timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama" - - # AirLLM model size when backend is airllm or auto. - # Larger = smarter, but needs more RAM / disk. - # 8b ~16 GB | 70b ~140 GB | 405b ~810 GB - airllm_model_size: Literal["8b", "70b", "405b"] = "70b" + # "auto" — auto-detect best available backend + timmy_model_backend: Literal["ollama", "grok", "claude", "auto"] = "ollama" # ── Grok (xAI) — opt-in premium cloud backend ──────────────────────── # Grok is a premium augmentation layer — local-first ethos preserved. diff --git a/src/infrastructure/router/cascade.py b/src/infrastructure/router/cascade.py index aacec03..83a4f4a 100644 --- a/src/infrastructure/router/cascade.py +++ b/src/infrastructure/router/cascade.py @@ -826,7 +826,9 @@ class CascadeRouter: Summary dict with added/removed/preserved counts. """ # Snapshot current runtime state keyed by provider name - old_state: dict[str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus]] = {} + old_state: dict[ + str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus] + ] = {} for p in self.providers: old_state[p.name] = ( p.metrics, diff --git a/src/timmy/agent.py b/src/timmy/agent.py index 04d8690..6aa67bd 100644 --- a/src/timmy/agent.py +++ b/src/timmy/agent.py @@ -220,7 +220,7 @@ def create_timmy( print_response(message, stream). """ resolved = _resolve_backend(backend) - size = model_size or settings.airllm_model_size + size = model_size or "70b" if resolved == "claude": from timmy.backends import ClaudeBackend diff --git a/tests/timmy/test_agent.py b/tests/timmy/test_agent.py index b975d48..21bec95 100644 --- a/tests/timmy/test_agent.py +++ b/tests/timmy/test_agent.py @@ -81,7 +81,6 @@ def test_create_timmy_respects_custom_ollama_url(): mock_settings.ollama_url = custom_url mock_settings.ollama_num_ctx = 4096 mock_settings.timmy_model_backend = "ollama" - mock_settings.airllm_model_size = "70b" from timmy.agent import create_timmy @@ -159,7 +158,6 @@ def test_resolve_backend_auto_uses_airllm_on_apple_silicon(): patch("timmy.agent.settings") as mock_settings, ): mock_settings.timmy_model_backend = "auto" - mock_settings.airllm_model_size = "70b" mock_settings.ollama_model = "llama3.2" from timmy.agent import _resolve_backend @@ -174,7 +172,6 @@ def test_resolve_backend_auto_falls_back_on_non_apple(): patch("timmy.agent.settings") as mock_settings, ): mock_settings.timmy_model_backend = "auto" - mock_settings.airllm_model_size = "70b" mock_settings.ollama_model = "llama3.2" from timmy.agent import _resolve_backend @@ -259,7 +256,6 @@ def test_create_timmy_includes_tools_for_large_model(): mock_settings.ollama_url = "http://localhost:11434" mock_settings.ollama_num_ctx = 4096 mock_settings.timmy_model_backend = "ollama" - mock_settings.airllm_model_size = "70b" mock_settings.telemetry_enabled = False from timmy.agent import create_timmy