fix: model introspection uses exact match, queries /api/ps first

_get_ollama_model() used prefix match (startswith) on /api/tags, causing qwen3:30b to match qwen3.5:latest. Now: 1. Queries /api/ps (loaded models) first — most accurate 2. Falls back to /api/tags with exact name match 3. Reports actual running model, not just configured one Updated test_get_system_info_contains_model to not assume model==config. Fixes #77. 5 regression tests added.
2026-03-14 18:02:14 -04:00
parent 1f09323e09
commit 7132b42ff3
2 changed files with 133 additions and 15 deletions
--- a/src/timmy/tools_intro/init.py
+++ b/src/timmy/tools_intro/init.py
@@ -55,26 +55,45 @@ def get_system_info() -> dict[str, Any]:


 def _get_ollama_model() -> str:
-    """Query Ollama API to get the current model."""
+    """Query Ollama API to get the actual running model.
+
+    Strategy:
+    1. /api/ps — models currently loaded in memory (most accurate)
+    2. /api/tags — all installed models (fallback)
+    Both use exact name match to avoid prefix collisions
+    (e.g. 'qwen3:30b' vs 'qwen3.5:latest').
+    """
    from config import settings

+    configured = settings.ollama_model
+
    try:
-        # First try to get tags to see available models
+        # First: check actually loaded models via /api/ps
+        response = httpx.get(f"{settings.ollama_url}/api/ps", timeout=5)
+        if response.status_code == 200:
+            running = response.json().get("models", [])
+            for model in running:
+                name = model.get("name", "")
+                if name == configured or name == f"{configured}:latest":
+                    return name
+            # Configured model not loaded — return first running model
+            # so Timmy reports what's *actually* serving his requests
+            if running:
+                return running[0].get("name", configured)
+
+        # Second: check installed models via /api/tags (exact match)
        response = httpx.get(f"{settings.ollama_url}/api/tags", timeout=5)
        if response.status_code == 200:
-            models = response.json().get("models", [])
-            # Check if configured model is available
-            for model in models:
-                if model.get("name", "").startswith(settings.ollama_model.split(":")[0]):
-                    return settings.ollama_model
-
-            # Fallback: return configured model
-            return settings.ollama_model
+            installed = response.json().get("models", [])
+            for model in installed:
+                name = model.get("name", "")
+                if name == configured or name == f"{configured}:latest":
+                    return configured
    except Exception:
        pass

    # Fallback to configured model
-    return settings.ollama_model
+    return configured


 def check_ollama_health() -> dict[str, Any]: