fix: make model fallback chains configurable (#53)

Move hardcoded model fallback lists from module-level constants into settings.fallback_models and settings.vision_fallback_models (pydantic Settings fields). Can now be overridden via env vars FALLBACK_MODELS / VISION_FALLBACK_MODELS or config/providers.yaml. Removed: - OLLAMA_MODEL_PRIMARY / OLLAMA_MODEL_FALLBACK from config.py - DEFAULT_MODEL_FALLBACKS / VISION_MODEL_FALLBACKS from agent.py get_effective_ollama_model() and _resolve_model_with_fallback() now walk the configurable chains instead of hardcoded constants. 5 new tests guard the configurable behavior and prevent regression to hardcoded constants.
2026-03-14 17:26:47 -04:00
parent f097784de8
commit 94cd1a9840
4 changed files with 120 additions and 43 deletions
--- a/src/config.py
+++ b/src/config.py
@@ -22,6 +22,24 @@ class Settings(BaseSettings):
    # llama3.2 (3B) hallucinated tool output consistently in testing.
    ollama_model: str = "qwen3.5:latest"

+    # Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS
+    # as comma-separated strings, e.g. FALLBACK_MODELS="qwen3.5:latest,llama3.1"
+    # Or edit config/providers.yaml → fallback_chains for the canonical source.
+    fallback_models: list[str] = [
+        "llama3.1:8b-instruct",
+        "llama3.1",
+        "qwen3.5:latest",
+        "qwen2.5:14b",
+        "qwen2.5:7b",
+        "llama3.2:3b",
+    ]
+    vision_fallback_models: list[str] = [
+        "llama3.2:3b",
+        "llava:7b",
+        "qwen2.5-vl:3b",
+        "moondream:1.8b",
+    ]
+
    # Set DEBUG=true to enable /docs and /redoc (disabled by default)
    debug: bool = False

@@ -346,10 +364,9 @@ if not settings.repo_root:
    settings.repo_root = settings._compute_repo_root()

 # ── Model fallback configuration ────────────────────────────────────────────
-# Primary model for reliable tool calling (llama3.1:8b-instruct)
-# Fallback if primary not available: qwen3.5:latest
-OLLAMA_MODEL_PRIMARY: str = "qwen3.5:latest"
-OLLAMA_MODEL_FALLBACK: str = "llama3.1:8b-instruct"
+# Fallback chains are now in settings.fallback_models / settings.vision_fallback_models.
+# Override via env vars (FALLBACK_MODELS, VISION_FALLBACK_MODELS) or
+# edit config/providers.yaml → fallback_chains.


 def check_ollama_model_available(model_name: str) -> bool:
@@ -376,28 +393,25 @@ def check_ollama_model_available(model_name: str) -> bool:


 def get_effective_ollama_model() -> str:
-    """Get the effective Ollama model, with fallback logic."""
-    # If user has overridden, use their setting
+    """Get the effective Ollama model, with fallback logic.
+
+    Walks the configurable ``settings.fallback_models`` chain when the
+    user's preferred model is not available locally.
+    """
    user_model = settings.ollama_model

-    # Check if user's model is available
    if check_ollama_model_available(user_model):
        return user_model

-    # Try primary
-    if check_ollama_model_available(OLLAMA_MODEL_PRIMARY):
-        _startup_logger.warning(
-            f"Requested model '{user_model}' not available. Using primary: {OLLAMA_MODEL_PRIMARY}"
-        )
-        return OLLAMA_MODEL_PRIMARY
-
-    # Try fallback
-    if check_ollama_model_available(OLLAMA_MODEL_FALLBACK):
-        _startup_logger.warning(
-            f"Primary model '{OLLAMA_MODEL_PRIMARY}' not available. "
-            f"Using fallback: {OLLAMA_MODEL_FALLBACK}"
-        )
-        return OLLAMA_MODEL_FALLBACK
+    # Walk the configurable fallback chain
+    for fallback in settings.fallback_models:
+        if check_ollama_model_available(fallback):
+            _startup_logger.warning(
+                "Requested model '%s' not available. Using fallback: %s",
+                user_model,
+                fallback,
+            )
+            return fallback

    # Last resort - return user's setting and hope for the best
    return user_model