From 94cd1a984042d6e94fedbcd80ca0b705c9ad457f Mon Sep 17 00:00:00 2001
From: Kimi Agent <kimi@timmy.local>
Date: Sat, 14 Mar 2026 17:26:47 -0400
Subject: [PATCH] fix: make model fallback chains configurable (#53)

Move hardcoded model fallback lists from module-level constants into
settings.fallback_models and settings.vision_fallback_models (pydantic
Settings fields). Can now be overridden via env vars
FALLBACK_MODELS / VISION_FALLBACK_MODELS or config/providers.yaml.

Removed:
- OLLAMA_MODEL_PRIMARY / OLLAMA_MODEL_FALLBACK from config.py
- DEFAULT_MODEL_FALLBACKS / VISION_MODEL_FALLBACKS from agent.py

get_effective_ollama_model() and _resolve_model_with_fallback() now
walk the configurable chains instead of hardcoded constants.

5 new tests guard the configurable behavior and prevent regression
to hardcoded constants.
---
 src/config.py                        | 56 +++++++++++--------
 src/timmy/agent.py                   | 22 +-------
 tests/e2e/test_ollama_integration.py |  5 +-
 tests/timmy/test_agent.py            | 80 ++++++++++++++++++++++++++++
 4 files changed, 120 insertions(+), 43 deletions(-)

diff --git a/src/config.py b/src/config.py
index 02f1a96..f4f139a 100644
--- a/src/config.py
+++ b/src/config.py
@@ -22,6 +22,24 @@ class Settings(BaseSettings):
     # llama3.2 (3B) hallucinated tool output consistently in testing.
     ollama_model: str = "qwen3.5:latest"
 
+    # Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS
+    # as comma-separated strings, e.g. FALLBACK_MODELS="qwen3.5:latest,llama3.1"
+    # Or edit config/providers.yaml → fallback_chains for the canonical source.
+    fallback_models: list[str] = [
+        "llama3.1:8b-instruct",
+        "llama3.1",
+        "qwen3.5:latest",
+        "qwen2.5:14b",
+        "qwen2.5:7b",
+        "llama3.2:3b",
+    ]
+    vision_fallback_models: list[str] = [
+        "llama3.2:3b",
+        "llava:7b",
+        "qwen2.5-vl:3b",
+        "moondream:1.8b",
+    ]
+
     # Set DEBUG=true to enable /docs and /redoc (disabled by default)
     debug: bool = False
 
@@ -346,10 +364,9 @@ if not settings.repo_root:
     settings.repo_root = settings._compute_repo_root()
 
 # ── Model fallback configuration ────────────────────────────────────────────
-# Primary model for reliable tool calling (llama3.1:8b-instruct)
-# Fallback if primary not available: qwen3.5:latest
-OLLAMA_MODEL_PRIMARY: str = "qwen3.5:latest"
-OLLAMA_MODEL_FALLBACK: str = "llama3.1:8b-instruct"
+# Fallback chains are now in settings.fallback_models / settings.vision_fallback_models.
+# Override via env vars (FALLBACK_MODELS, VISION_FALLBACK_MODELS) or
+# edit config/providers.yaml → fallback_chains.
 
 
 def check_ollama_model_available(model_name: str) -> bool:
@@ -376,28 +393,25 @@ def check_ollama_model_available(model_name: str) -> bool:
 
 
 def get_effective_ollama_model() -> str:
-    """Get the effective Ollama model, with fallback logic."""
-    # If user has overridden, use their setting
+    """Get the effective Ollama model, with fallback logic.
+
+    Walks the configurable ``settings.fallback_models`` chain when the
+    user's preferred model is not available locally.
+    """
     user_model = settings.ollama_model
 
-    # Check if user's model is available
     if check_ollama_model_available(user_model):
         return user_model
 
-    # Try primary
-    if check_ollama_model_available(OLLAMA_MODEL_PRIMARY):
-        _startup_logger.warning(
-            f"Requested model '{user_model}' not available. Using primary: {OLLAMA_MODEL_PRIMARY}"
-        )
-        return OLLAMA_MODEL_PRIMARY
-
-    # Try fallback
-    if check_ollama_model_available(OLLAMA_MODEL_FALLBACK):
-        _startup_logger.warning(
-            f"Primary model '{OLLAMA_MODEL_PRIMARY}' not available. "
-            f"Using fallback: {OLLAMA_MODEL_FALLBACK}"
-        )
-        return OLLAMA_MODEL_FALLBACK
+    # Walk the configurable fallback chain
+    for fallback in settings.fallback_models:
+        if check_ollama_model_available(fallback):
+            _startup_logger.warning(
+                "Requested model '%s' not available. Using fallback: %s",
+                user_model,
+                fallback,
+            )
+            return fallback
 
     # Last resort - return user's setting and hope for the best
     return user_model
diff --git a/src/timmy/agent.py b/src/timmy/agent.py
index b42018b..091f235 100644
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -29,24 +29,6 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
-# Fallback chain for text/tool models (in order of preference)
-DEFAULT_MODEL_FALLBACKS = [
-    "llama3.1:8b-instruct",
-    "llama3.1",
-    "qwen3.5:latest",
-    "qwen2.5:14b",
-    "qwen2.5:7b",
-    "llama3.2:3b",
-]
-
-# Fallback chain for vision models
-VISION_MODEL_FALLBACKS = [
-    "llama3.2:3b",
-    "llava:7b",
-    "qwen2.5-vl:3b",
-    "moondream:1.8b",
-]
-
 # Union type for callers that want to hint the return type.
 TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend", "ClaudeBackend"]
 
@@ -130,8 +112,8 @@ def _resolve_model_with_fallback(
             return model, False
         logger.warning("Failed to pull %s, checking fallbacks...", model)
 
-    # Use appropriate fallback chain
-    fallback_chain = VISION_MODEL_FALLBACKS if require_vision else DEFAULT_MODEL_FALLBACKS
+    # Use appropriate configurable fallback chain (from settings / env vars)
+    fallback_chain = settings.vision_fallback_models if require_vision else settings.fallback_models
 
     for fallback_model in fallback_chain:
         if _check_model_available(fallback_model):
diff --git a/tests/e2e/test_ollama_integration.py b/tests/e2e/test_ollama_integration.py
index 6e8120d..e193e0b 100644
--- a/tests/e2e/test_ollama_integration.py
+++ b/tests/e2e/test_ollama_integration.py
@@ -33,7 +33,8 @@ async def test_ollama_connection():
 @pytest.mark.asyncio
 async def test_model_fallback_chain():
     """Test that the model fallback chain works correctly."""
-    from timmy.agent import DEFAULT_MODEL_FALLBACKS, _resolve_model_with_fallback
+    from config import settings
+    from timmy.agent import _resolve_model_with_fallback
 
     # Test with a non-existent model
     model, is_fallback = _resolve_model_with_fallback(
@@ -46,7 +47,7 @@ async def test_model_fallback_chain():
     # or the last resort (the requested model itself if nothing else is available).
     # In tests, if no models are available in the mock environment, it might return the requested model.
     if is_fallback:
-        assert model in DEFAULT_MODEL_FALLBACKS
+        assert model in settings.fallback_models
     else:
         # If no fallbacks were available, it returns the requested model as last resort
         assert model == "nonexistent-model"
diff --git a/tests/timmy/test_agent.py b/tests/timmy/test_agent.py
index 25097ea..948105f 100644
--- a/tests/timmy/test_agent.py
+++ b/tests/timmy/test_agent.py
@@ -341,3 +341,83 @@ def test_create_timmy_default_includes_mcp_tools():
         # MCP factories should be called when skip_mcp is False
         mock_gitea_mcp.assert_called_once()
         mock_fs_mcp.assert_called_once()
+
+
+# ── Configurable fallback chain tests ────────────────────────────────────────
+
+
+def test_settings_has_fallback_model_lists():
+    """settings.fallback_models and vision_fallback_models exist and are lists."""
+    from config import settings
+
+    assert isinstance(settings.fallback_models, list)
+    assert isinstance(settings.vision_fallback_models, list)
+    assert len(settings.fallback_models) > 0
+    assert len(settings.vision_fallback_models) > 0
+
+
+def test_resolve_model_uses_configurable_text_fallback():
+    """_resolve_model_with_fallback walks settings.fallback_models for text models."""
+    with patch("timmy.agent.settings") as mock_settings:
+        mock_settings.ollama_model = "nonexistent-model"
+        mock_settings.fallback_models = ["custom-a", "custom-b"]
+        mock_settings.vision_fallback_models = ["vision-a"]
+
+        # First model in chain is available
+        with patch("timmy.agent._check_model_available", side_effect=lambda m: m == "custom-a"):
+            from timmy.agent import _resolve_model_with_fallback
+
+            model, is_fallback = _resolve_model_with_fallback(
+                requested_model="nonexistent-model",
+                require_vision=False,
+                auto_pull=False,
+            )
+            assert model == "custom-a"
+            assert is_fallback is True
+
+
+def test_resolve_model_uses_configurable_vision_fallback():
+    """_resolve_model_with_fallback walks settings.vision_fallback_models for vision."""
+    with patch("timmy.agent.settings") as mock_settings:
+        mock_settings.ollama_model = "nonexistent-model"
+        mock_settings.fallback_models = ["text-a"]
+        mock_settings.vision_fallback_models = ["vision-x", "vision-y"]
+
+        with patch("timmy.agent._check_model_available", side_effect=lambda m: m == "vision-y"):
+            from timmy.agent import _resolve_model_with_fallback
+
+            model, is_fallback = _resolve_model_with_fallback(
+                requested_model="nonexistent-model",
+                require_vision=True,
+                auto_pull=False,
+            )
+            assert model == "vision-y"
+            assert is_fallback is True
+
+
+def test_get_effective_ollama_model_walks_fallback_chain():
+    """get_effective_ollama_model uses settings.fallback_models."""
+    with (
+        patch("config.settings") as mock_settings,
+        patch("config.check_ollama_model_available", side_effect=lambda m: m == "fb-2") as _,
+    ):
+        mock_settings.ollama_model = "gone-model"
+        mock_settings.ollama_url = "http://localhost:11434"
+        mock_settings.fallback_models = ["fb-1", "fb-2", "fb-3"]
+
+        from config import get_effective_ollama_model
+
+        result = get_effective_ollama_model()
+        assert result == "fb-2"
+
+
+def test_no_hardcoded_fallback_constants_in_agent():
+    """agent.py must not define module-level DEFAULT_MODEL_FALLBACKS."""
+    import timmy.agent as agent_mod
+
+    assert not hasattr(agent_mod, "DEFAULT_MODEL_FALLBACKS"), (
+        "Hardcoded DEFAULT_MODEL_FALLBACKS still exists — use settings.fallback_models"
+    )
+    assert not hasattr(agent_mod, "VISION_MODEL_FALLBACKS"), (
+        "Hardcoded VISION_MODEL_FALLBACKS still exists — use settings.vision_fallback_models"
+    )