From d5361a0385485449c3b444ea89ddd5ca6732abda Mon Sep 17 00:00:00 2001
From: kimi <kimi@localhost>
Date: Thu, 19 Mar 2026 15:26:10 -0400
Subject: [PATCH] fix: remove AirLLM config settings from config.py

Remove `airllm` from timmy_model_backend Literal type and delete the
airllm_model_size field plus associated comments. Replace the one
settings.airllm_model_size reference in agent.py with a hardcoded
default, and clean up mock assignments in tests.

Fixes #473

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/config.py                        | 13 +++----------
 src/infrastructure/router/cascade.py |  4 +++-
 src/timmy/agent.py                   |  2 +-
 tests/timmy/test_agent.py            |  4 ----
 4 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/src/config.py b/src/config.py
index 052285a..32bbde6 100644
--- a/src/config.py
+++ b/src/config.py
@@ -64,17 +64,10 @@ class Settings(BaseSettings):
     # Seconds to wait for user confirmation before auto-rejecting.
     discord_confirm_timeout: int = 120
 
-    # ── AirLLM / backend selection ───────────────────────────────────────────
+    # ── Backend selection ────────────────────────────────────────────────────
     # "ollama"  — always use Ollama (default, safe everywhere)
-    # "airllm"  — always use AirLLM (requires pip install ".[bigbrain]")
-    # "auto"    — use AirLLM on Apple Silicon if airllm is installed,
-    #             fall back to Ollama otherwise
-    timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama"
-
-    # AirLLM model size when backend is airllm or auto.
-    # Larger = smarter, but needs more RAM / disk.
-    # 8b  ~16 GB  |  70b  ~140 GB  |  405b  ~810 GB
-    airllm_model_size: Literal["8b", "70b", "405b"] = "70b"
+    # "auto"    — auto-detect best available backend
+    timmy_model_backend: Literal["ollama", "grok", "claude", "auto"] = "ollama"
 
     # ── Grok (xAI) — opt-in premium cloud backend ────────────────────────
     # Grok is a premium augmentation layer — local-first ethos preserved.
diff --git a/src/infrastructure/router/cascade.py b/src/infrastructure/router/cascade.py
index aacec03..83a4f4a 100644
--- a/src/infrastructure/router/cascade.py
+++ b/src/infrastructure/router/cascade.py
@@ -826,7 +826,9 @@ class CascadeRouter:
             Summary dict with added/removed/preserved counts.
         """
         # Snapshot current runtime state keyed by provider name
-        old_state: dict[str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus]] = {}
+        old_state: dict[
+            str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus]
+        ] = {}
         for p in self.providers:
             old_state[p.name] = (
                 p.metrics,
diff --git a/src/timmy/agent.py b/src/timmy/agent.py
index 04d8690..6aa67bd 100644
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -220,7 +220,7 @@ def create_timmy(
     print_response(message, stream).
     """
     resolved = _resolve_backend(backend)
-    size = model_size or settings.airllm_model_size
+    size = model_size or "70b"
 
     if resolved == "claude":
         from timmy.backends import ClaudeBackend
diff --git a/tests/timmy/test_agent.py b/tests/timmy/test_agent.py
index b975d48..21bec95 100644
--- a/tests/timmy/test_agent.py
+++ b/tests/timmy/test_agent.py
@@ -81,7 +81,6 @@ def test_create_timmy_respects_custom_ollama_url():
         mock_settings.ollama_url = custom_url
         mock_settings.ollama_num_ctx = 4096
         mock_settings.timmy_model_backend = "ollama"
-        mock_settings.airllm_model_size = "70b"
 
         from timmy.agent import create_timmy
 
@@ -159,7 +158,6 @@ def test_resolve_backend_auto_uses_airllm_on_apple_silicon():
         patch("timmy.agent.settings") as mock_settings,
     ):
         mock_settings.timmy_model_backend = "auto"
-        mock_settings.airllm_model_size = "70b"
         mock_settings.ollama_model = "llama3.2"
 
         from timmy.agent import _resolve_backend
@@ -174,7 +172,6 @@ def test_resolve_backend_auto_falls_back_on_non_apple():
         patch("timmy.agent.settings") as mock_settings,
     ):
         mock_settings.timmy_model_backend = "auto"
-        mock_settings.airllm_model_size = "70b"
         mock_settings.ollama_model = "llama3.2"
 
         from timmy.agent import _resolve_backend
@@ -259,7 +256,6 @@ def test_create_timmy_includes_tools_for_large_model():
         mock_settings.ollama_url = "http://localhost:11434"
         mock_settings.ollama_num_ctx = 4096
         mock_settings.timmy_model_backend = "ollama"
-        mock_settings.airllm_model_size = "70b"
         mock_settings.telemetry_enabled = False
 
         from timmy.agent import create_timmy