fix: remove AirLLM config settings from config.py

Remove `airllm` from timmy_model_backend Literal type and delete the airllm_model_size field plus associated comments. Replace the one settings.airllm_model_size reference in agent.py with a hardcoded default, and clean up mock assignments in tests. Fixes #473 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 15:26:10 -04:00
4 changed files with 7 additions and 16 deletions
--- a/src/config.py
+++ b/src/config.py
@@ -64,17 +64,10 @@ class Settings(BaseSettings):
    # Seconds to wait for user confirmation before auto-rejecting.
    discord_confirm_timeout: int = 120

-    # ── AirLLM / backend selection ───────────────────────────────────────────
+    # ── Backend selection ────────────────────────────────────────────────────
    # "ollama"  — always use Ollama (default, safe everywhere)
-    # "airllm"  — always use AirLLM (requires pip install ".[bigbrain]")
-    # "auto"    — use AirLLM on Apple Silicon if airllm is installed,
-    #             fall back to Ollama otherwise
-    timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama"
-
-    # AirLLM model size when backend is airllm or auto.
-    # Larger = smarter, but needs more RAM / disk.
-    # 8b  ~16 GB  |  70b  ~140 GB  |  405b  ~810 GB
-    airllm_model_size: Literal["8b", "70b", "405b"] = "70b"
+    # "auto"    — auto-detect best available backend
+    timmy_model_backend: Literal["ollama", "grok", "claude", "auto"] = "ollama"

    # ── Grok (xAI) — opt-in premium cloud backend ────────────────────────
    # Grok is a premium augmentation layer — local-first ethos preserved.
--- a/src/infrastructure/router/cascade.py
+++ b/src/infrastructure/router/cascade.py
@@ -826,7 +826,9 @@ class CascadeRouter:
            Summary dict with added/removed/preserved counts.
        """
        # Snapshot current runtime state keyed by provider name
-        old_state: dict[str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus]] = {}
+        old_state: dict[
+            str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus]
+        ] = {}
        for p in self.providers:
            old_state[p.name] = (
                p.metrics,
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -220,7 +220,7 @@ def create_timmy(
    print_response(message, stream).
    """
    resolved = _resolve_backend(backend)
-    size = model_size or settings.airllm_model_size
+    size = model_size or "70b"

    if resolved == "claude":
        from timmy.backends import ClaudeBackend
--- a/tests/timmy/test_agent.py
+++ b/tests/timmy/test_agent.py
@@ -81,7 +81,6 @@ def test_create_timmy_respects_custom_ollama_url():
        mock_settings.ollama_url = custom_url
        mock_settings.ollama_num_ctx = 4096
        mock_settings.timmy_model_backend = "ollama"
-        mock_settings.airllm_model_size = "70b"

        from timmy.agent import create_timmy

@@ -159,7 +158,6 @@ def test_resolve_backend_auto_uses_airllm_on_apple_silicon():
        patch("timmy.agent.settings") as mock_settings,
    ):
        mock_settings.timmy_model_backend = "auto"
-        mock_settings.airllm_model_size = "70b"
        mock_settings.ollama_model = "llama3.2"

        from timmy.agent import _resolve_backend
@@ -174,7 +172,6 @@ def test_resolve_backend_auto_falls_back_on_non_apple():
        patch("timmy.agent.settings") as mock_settings,
    ):
        mock_settings.timmy_model_backend = "auto"
-        mock_settings.airllm_model_size = "70b"
        mock_settings.ollama_model = "llama3.2"

        from timmy.agent import _resolve_backend
@@ -259,7 +256,6 @@ def test_create_timmy_includes_tools_for_large_model():
        mock_settings.ollama_url = "http://localhost:11434"
        mock_settings.ollama_num_ctx = 4096
        mock_settings.timmy_model_backend = "ollama"
-        mock_settings.airllm_model_size = "70b"
        mock_settings.telemetry_enabled = False

        from timmy.agent import create_timmy