forked from Rockachopa/Timmy-time-dashboard
fix: Upgrade model to llama3.1:8b-instruct + fix git tool cwd
Change 1: Model Upgrade (Primary Fix) - Changed default model from llama3.2 to llama3.1:8b-instruct - llama3.1:8b-instruct is fine-tuned for reliable tool/function calling - llama3.2 (3B) consistently hallucinated tool output in testing - Added fallback to qwen2.5:14b if primary unavailable Change 2: Structured Output Foundation - Enhanced session init to load real data on first message - Preparation for JSON schema enforcement Change 3: Git Tool Working Directory Fix - Rewrote git_tools.py to use subprocess with cwd=REPO_ROOT - REPO_ROOT auto-detected at module load time - All git commands now run from correct directory Change 4: Session Init with Git Log - _session_init() reads git log --oneline -15 on first message - Recent commits prepended to system prompt - Timmy can now answer 'what's new?' from actual commit data Change 5: Documentation - Updated README with new model requirement - Added CHANGELOG_2025-02-27.md User must run: ollama pull llama3.1:8b-instruct All 18 git tool tests pass.
This commit is contained in:
@@ -8,7 +8,11 @@ class Settings(BaseSettings):
|
||||
ollama_url: str = "http://localhost:11434"
|
||||
|
||||
# LLM model passed to Agno/Ollama — override with OLLAMA_MODEL
|
||||
ollama_model: str = "llama3.2"
|
||||
# llama3.1:8b-instruct is used instead of llama3.2 because it is
|
||||
# specifically fine-tuned for reliable tool/function calling.
|
||||
# llama3.2 (3B) hallucinated tool output consistently in testing.
|
||||
# Fallback: qwen2.5:14b if llama3.1:8b-instruct not available.
|
||||
ollama_model: str = "llama3.1:8b-instruct"
|
||||
|
||||
# Set DEBUG=true to enable /docs and /redoc (disabled by default)
|
||||
debug: bool = False
|
||||
@@ -110,6 +114,62 @@ class Settings(BaseSettings):
|
||||
|
||||
settings = Settings()
|
||||
|
||||
# ── Model fallback configuration ────────────────────────────────────────────
|
||||
# Primary model for reliable tool calling (llama3.1:8b-instruct)
|
||||
# Fallback if primary not available: qwen2.5:14b
|
||||
OLLAMA_MODEL_PRIMARY: str = "llama3.1:8b-instruct"
|
||||
OLLAMA_MODEL_FALLBACK: str = "qwen2.5:14b"
|
||||
|
||||
|
||||
def check_ollama_model_available(model_name: str) -> bool:
|
||||
"""Check if a specific Ollama model is available locally."""
|
||||
try:
|
||||
import urllib.request
|
||||
url = settings.ollama_url.replace("localhost", "127.0.0.1")
|
||||
req = urllib.request.Request(
|
||||
f"{url}/api/tags",
|
||||
method="GET",
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=5) as response:
|
||||
import json
|
||||
data = json.loads(response.read().decode())
|
||||
models = [m.get("name", "").split(":")[0] for m in data.get("models", [])]
|
||||
# Check for exact match or model name without tag
|
||||
return any(model_name in m or m in model_name for m in models)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def get_effective_ollama_model() -> str:
|
||||
"""Get the effective Ollama model, with fallback logic."""
|
||||
# If user has overridden, use their setting
|
||||
user_model = settings.ollama_model
|
||||
|
||||
# Check if user's model is available
|
||||
if check_ollama_model_available(user_model):
|
||||
return user_model
|
||||
|
||||
# Try primary
|
||||
if check_ollama_model_available(OLLAMA_MODEL_PRIMARY):
|
||||
_startup_logger.warning(
|
||||
f"Requested model '{user_model}' not available. "
|
||||
f"Using primary: {OLLAMA_MODEL_PRIMARY}"
|
||||
)
|
||||
return OLLAMA_MODEL_PRIMARY
|
||||
|
||||
# Try fallback
|
||||
if check_ollama_model_available(OLLAMA_MODEL_FALLBACK):
|
||||
_startup_logger.warning(
|
||||
f"Primary model '{OLLAMA_MODEL_PRIMARY}' not available. "
|
||||
f"Using fallback: {OLLAMA_MODEL_FALLBACK}"
|
||||
)
|
||||
return OLLAMA_MODEL_FALLBACK
|
||||
|
||||
# Last resort - return user's setting and hope for the best
|
||||
return user_model
|
||||
|
||||
|
||||
# ── Startup validation ───────────────────────────────────────────────────────
|
||||
# Enforce security requirements — fail fast in production.
|
||||
import logging as _logging
|
||||
|
||||
Reference in New Issue
Block a user