diff --git a/src/config.py b/src/config.py index 052285a..32bbde6 100644 --- a/src/config.py +++ b/src/config.py @@ -64,17 +64,10 @@ class Settings(BaseSettings): # Seconds to wait for user confirmation before auto-rejecting. discord_confirm_timeout: int = 120 - # ── AirLLM / backend selection ─────────────────────────────────────────── + # ── Backend selection ──────────────────────────────────────────────────── # "ollama" — always use Ollama (default, safe everywhere) - # "airllm" — always use AirLLM (requires pip install ".[bigbrain]") - # "auto" — use AirLLM on Apple Silicon if airllm is installed, - # fall back to Ollama otherwise - timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama" - - # AirLLM model size when backend is airllm or auto. - # Larger = smarter, but needs more RAM / disk. - # 8b ~16 GB | 70b ~140 GB | 405b ~810 GB - airllm_model_size: Literal["8b", "70b", "405b"] = "70b" + # "auto" — auto-detect best available backend + timmy_model_backend: Literal["ollama", "grok", "claude", "auto"] = "ollama" # ── Grok (xAI) — opt-in premium cloud backend ──────────────────────── # Grok is a premium augmentation layer — local-first ethos preserved. diff --git a/src/infrastructure/router/cascade.py b/src/infrastructure/router/cascade.py index aacec03..83a4f4a 100644 --- a/src/infrastructure/router/cascade.py +++ b/src/infrastructure/router/cascade.py @@ -826,7 +826,9 @@ class CascadeRouter: Summary dict with added/removed/preserved counts. """ # Snapshot current runtime state keyed by provider name - old_state: dict[str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus]] = {} + old_state: dict[ + str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus] + ] = {} for p in self.providers: old_state[p.name] = ( p.metrics, diff --git a/src/timmy/agent.py b/src/timmy/agent.py index 04d8690..6aa67bd 100644 --- a/src/timmy/agent.py +++ b/src/timmy/agent.py @@ -220,7 +220,7 @@ def create_timmy( print_response(message, stream). """ resolved = _resolve_backend(backend) - size = model_size or settings.airllm_model_size + size = model_size or "70b" if resolved == "claude": from timmy.backends import ClaudeBackend diff --git a/tests/timmy/test_agent.py b/tests/timmy/test_agent.py index b975d48..21bec95 100644 --- a/tests/timmy/test_agent.py +++ b/tests/timmy/test_agent.py @@ -81,7 +81,6 @@ def test_create_timmy_respects_custom_ollama_url(): mock_settings.ollama_url = custom_url mock_settings.ollama_num_ctx = 4096 mock_settings.timmy_model_backend = "ollama" - mock_settings.airllm_model_size = "70b" from timmy.agent import create_timmy @@ -159,7 +158,6 @@ def test_resolve_backend_auto_uses_airllm_on_apple_silicon(): patch("timmy.agent.settings") as mock_settings, ): mock_settings.timmy_model_backend = "auto" - mock_settings.airllm_model_size = "70b" mock_settings.ollama_model = "llama3.2" from timmy.agent import _resolve_backend @@ -174,7 +172,6 @@ def test_resolve_backend_auto_falls_back_on_non_apple(): patch("timmy.agent.settings") as mock_settings, ): mock_settings.timmy_model_backend = "auto" - mock_settings.airllm_model_size = "70b" mock_settings.ollama_model = "llama3.2" from timmy.agent import _resolve_backend @@ -259,7 +256,6 @@ def test_create_timmy_includes_tools_for_large_model(): mock_settings.ollama_url = "http://localhost:11434" mock_settings.ollama_num_ctx = 4096 mock_settings.timmy_model_backend = "ollama" - mock_settings.airllm_model_size = "70b" mock_settings.telemetry_enabled = False from timmy.agent import create_timmy