feat: upgrade primary model from llama3.1:8b to qwen2.5:14b (#140)

- Swap OLLAMA_MODEL_PRIMARY to qwen2.5:14b for better reasoning - llama3.1:8b-instruct becomes fallback - Update .env default and README quick start - Fix hardcoded model assertions in tests qwen2.5:14b provides significantly better multi-step reasoning and tool calling reliability while still running locally on modest hardware. The 8B model remains as automatic fallback. Co-authored-by: Trip T <trip@local>
2026-03-07 18:20:34 -05:00
parent 39f2eb418a
commit cdd3e1a90b
4 changed files with 10 additions and 10 deletions
--- a/README.md
+++ b/README.md
@@ -18,16 +18,16 @@ make install              # create venv + install deps
 cp .env.example .env      # configure environment

 ollama serve              # separate terminal
-ollama pull llama3.1:8b-instruct  # Required for reliable tool calling
+ollama pull qwen2.5:14b  # Required for reliable tool calling

 make dev                  # http://localhost:8000
 make test                 # no Ollama needed
 ```

-**Note:** llama3.1:8b-instruct is used instead of llama3.2 because it is
-specifically fine-tuned for reliable tool/function calling.
+**Note:** qwen2.5:14b is the primary model — better reasoning and tool calling
+than llama3.1:8b-instruct while still running locally on modest hardware.
+Fallback: llama3.1:8b-instruct if qwen2.5:14b is not available.
 llama3.2 (3B) was found to hallucinate tool output consistently in testing.
-Fallback: qwen2.5:14b if llama3.1:8b-instruct is not available.

 ---

--- a/src/config.py
+++ b/src/config.py
@@ -15,7 +15,7 @@ class Settings(BaseSettings):
    # specifically fine-tuned for reliable tool/function calling.
    # llama3.2 (3B) hallucinated tool output consistently in testing.
    # Fallback: qwen2.5:14b if llama3.1:8b-instruct not available.
-    ollama_model: str = "llama3.1:8b-instruct"
+    ollama_model: str = "qwen2.5:14b"

    # Set DEBUG=true to enable /docs and /redoc (disabled by default)
    debug: bool = False
@@ -236,8 +236,8 @@ if not settings.repo_root:
 # ── Model fallback configuration ────────────────────────────────────────────
 # Primary model for reliable tool calling (llama3.1:8b-instruct)
 # Fallback if primary not available: qwen2.5:14b
-OLLAMA_MODEL_PRIMARY: str = "llama3.1:8b-instruct"
-OLLAMA_MODEL_FALLBACK: str = "qwen2.5:14b"
+OLLAMA_MODEL_PRIMARY: str = "qwen2.5:14b"
+OLLAMA_MODEL_FALLBACK: str = "llama3.1:8b-instruct"


 def check_ollama_model_available(model_name: str) -> bool:
--- a/tests/dashboard/test_dashboard.py
+++ b/tests/dashboard/test_dashboard.py
@@ -86,7 +86,7 @@ def test_agents_list_metadata(client):
    response = client.get("/agents")
    agent = next(a for a in response.json()["agents"] if a["id"] == "default")
    assert agent["name"] == "Agent"
-    assert agent["model"] == "llama3.1:8b-instruct"
+    assert agent["model"] == "qwen2.5:14b"
    assert agent["type"] == "local"


--- a/tests/dashboard/test_mobile_scenarios.py
+++ b/tests/dashboard/test_mobile_scenarios.py
@@ -298,10 +298,10 @@ def test_M605_health_status_passes_model_to_template(client):
        return_value=True,
    ):
        response = client.get("/health/status")
-    # The default model is llama3.1:8b-instruct — it should appear from settings
+    # The default model is qwen2.5:14b — it should appear from settings
    assert response.status_code == 200
    assert (
-        "llama3.1" in response.text
+        "qwen2.5" in response.text
    )  # rendered via template variable, not hardcoded literal