forked from Rockachopa/Timmy-time-dashboard
feat: upgrade primary model from llama3.1:8b to qwen2.5:14b (#140)
- Swap OLLAMA_MODEL_PRIMARY to qwen2.5:14b for better reasoning - llama3.1:8b-instruct becomes fallback - Update .env default and README quick start - Fix hardcoded model assertions in tests qwen2.5:14b provides significantly better multi-step reasoning and tool calling reliability while still running locally on modest hardware. The 8B model remains as automatic fallback. Co-authored-by: Trip T <trip@local>
This commit is contained in:
committed by
GitHub
parent
39f2eb418a
commit
cdd3e1a90b
@@ -18,16 +18,16 @@ make install # create venv + install deps
|
||||
cp .env.example .env # configure environment
|
||||
|
||||
ollama serve # separate terminal
|
||||
ollama pull llama3.1:8b-instruct # Required for reliable tool calling
|
||||
ollama pull qwen2.5:14b # Required for reliable tool calling
|
||||
|
||||
make dev # http://localhost:8000
|
||||
make test # no Ollama needed
|
||||
```
|
||||
|
||||
**Note:** llama3.1:8b-instruct is used instead of llama3.2 because it is
|
||||
specifically fine-tuned for reliable tool/function calling.
|
||||
**Note:** qwen2.5:14b is the primary model — better reasoning and tool calling
|
||||
than llama3.1:8b-instruct while still running locally on modest hardware.
|
||||
Fallback: llama3.1:8b-instruct if qwen2.5:14b is not available.
|
||||
llama3.2 (3B) was found to hallucinate tool output consistently in testing.
|
||||
Fallback: qwen2.5:14b if llama3.1:8b-instruct is not available.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ class Settings(BaseSettings):
|
||||
# specifically fine-tuned for reliable tool/function calling.
|
||||
# llama3.2 (3B) hallucinated tool output consistently in testing.
|
||||
# Fallback: qwen2.5:14b if llama3.1:8b-instruct not available.
|
||||
ollama_model: str = "llama3.1:8b-instruct"
|
||||
ollama_model: str = "qwen2.5:14b"
|
||||
|
||||
# Set DEBUG=true to enable /docs and /redoc (disabled by default)
|
||||
debug: bool = False
|
||||
@@ -236,8 +236,8 @@ if not settings.repo_root:
|
||||
# ── Model fallback configuration ────────────────────────────────────────────
|
||||
# Primary model for reliable tool calling (llama3.1:8b-instruct)
|
||||
# Fallback if primary not available: qwen2.5:14b
|
||||
OLLAMA_MODEL_PRIMARY: str = "llama3.1:8b-instruct"
|
||||
OLLAMA_MODEL_FALLBACK: str = "qwen2.5:14b"
|
||||
OLLAMA_MODEL_PRIMARY: str = "qwen2.5:14b"
|
||||
OLLAMA_MODEL_FALLBACK: str = "llama3.1:8b-instruct"
|
||||
|
||||
|
||||
def check_ollama_model_available(model_name: str) -> bool:
|
||||
|
||||
@@ -86,7 +86,7 @@ def test_agents_list_metadata(client):
|
||||
response = client.get("/agents")
|
||||
agent = next(a for a in response.json()["agents"] if a["id"] == "default")
|
||||
assert agent["name"] == "Agent"
|
||||
assert agent["model"] == "llama3.1:8b-instruct"
|
||||
assert agent["model"] == "qwen2.5:14b"
|
||||
assert agent["type"] == "local"
|
||||
|
||||
|
||||
|
||||
@@ -298,10 +298,10 @@ def test_M605_health_status_passes_model_to_template(client):
|
||||
return_value=True,
|
||||
):
|
||||
response = client.get("/health/status")
|
||||
# The default model is llama3.1:8b-instruct — it should appear from settings
|
||||
# The default model is qwen2.5:14b — it should appear from settings
|
||||
assert response.status_code == 200
|
||||
assert (
|
||||
"llama3.1" in response.text
|
||||
"qwen2.5" in response.text
|
||||
) # rendered via template variable, not hardcoded literal
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user