diff --git a/README.md b/README.md index bebdaea..911b84c 100644 --- a/README.md +++ b/README.md @@ -18,16 +18,16 @@ make install # create venv + install deps cp .env.example .env # configure environment ollama serve # separate terminal -ollama pull llama3.1:8b-instruct # Required for reliable tool calling +ollama pull qwen2.5:14b # Required for reliable tool calling make dev # http://localhost:8000 make test # no Ollama needed ``` -**Note:** llama3.1:8b-instruct is used instead of llama3.2 because it is -specifically fine-tuned for reliable tool/function calling. +**Note:** qwen2.5:14b is the primary model — better reasoning and tool calling +than llama3.1:8b-instruct while still running locally on modest hardware. +Fallback: llama3.1:8b-instruct if qwen2.5:14b is not available. llama3.2 (3B) was found to hallucinate tool output consistently in testing. -Fallback: qwen2.5:14b if llama3.1:8b-instruct is not available. --- diff --git a/src/config.py b/src/config.py index b4dfda7..65d65a8 100644 --- a/src/config.py +++ b/src/config.py @@ -15,7 +15,7 @@ class Settings(BaseSettings): # specifically fine-tuned for reliable tool/function calling. # llama3.2 (3B) hallucinated tool output consistently in testing. # Fallback: qwen2.5:14b if llama3.1:8b-instruct not available. - ollama_model: str = "llama3.1:8b-instruct" + ollama_model: str = "qwen2.5:14b" # Set DEBUG=true to enable /docs and /redoc (disabled by default) debug: bool = False @@ -236,8 +236,8 @@ if not settings.repo_root: # ── Model fallback configuration ──────────────────────────────────────────── # Primary model for reliable tool calling (llama3.1:8b-instruct) # Fallback if primary not available: qwen2.5:14b -OLLAMA_MODEL_PRIMARY: str = "llama3.1:8b-instruct" -OLLAMA_MODEL_FALLBACK: str = "qwen2.5:14b" +OLLAMA_MODEL_PRIMARY: str = "qwen2.5:14b" +OLLAMA_MODEL_FALLBACK: str = "llama3.1:8b-instruct" def check_ollama_model_available(model_name: str) -> bool: diff --git a/tests/dashboard/test_dashboard.py b/tests/dashboard/test_dashboard.py index c2ff1e3..ad2152c 100644 --- a/tests/dashboard/test_dashboard.py +++ b/tests/dashboard/test_dashboard.py @@ -86,7 +86,7 @@ def test_agents_list_metadata(client): response = client.get("/agents") agent = next(a for a in response.json()["agents"] if a["id"] == "default") assert agent["name"] == "Agent" - assert agent["model"] == "llama3.1:8b-instruct" + assert agent["model"] == "qwen2.5:14b" assert agent["type"] == "local" diff --git a/tests/dashboard/test_mobile_scenarios.py b/tests/dashboard/test_mobile_scenarios.py index e2606f3..b023cc1 100644 --- a/tests/dashboard/test_mobile_scenarios.py +++ b/tests/dashboard/test_mobile_scenarios.py @@ -298,10 +298,10 @@ def test_M605_health_status_passes_model_to_template(client): return_value=True, ): response = client.get("/health/status") - # The default model is llama3.1:8b-instruct — it should appear from settings + # The default model is qwen2.5:14b — it should appear from settings assert response.status_code == 200 assert ( - "llama3.1" in response.text + "qwen2.5" in response.text ) # rendered via template variable, not hardcoded literal