forked from Rockachopa/Timmy-time-dashboard
@@ -14,8 +14,8 @@
|
|||||||
# In production (docker-compose.prod.yml), this is set to http://ollama:11434 automatically.
|
# In production (docker-compose.prod.yml), this is set to http://ollama:11434 automatically.
|
||||||
# OLLAMA_URL=http://localhost:11434
|
# OLLAMA_URL=http://localhost:11434
|
||||||
|
|
||||||
# LLM model to use via Ollama (default: qwen3.5:latest)
|
# LLM model to use via Ollama (default: qwen3:30b)
|
||||||
# OLLAMA_MODEL=qwen3.5:latest
|
# OLLAMA_MODEL=qwen3:30b
|
||||||
|
|
||||||
# Ollama context window size (default: 4096 tokens)
|
# Ollama context window size (default: 4096 tokens)
|
||||||
# Set higher for more context, lower to save RAM. 0 = model default.
|
# Set higher for more context, lower to save RAM. 0 = model default.
|
||||||
|
|||||||
@@ -18,15 +18,15 @@ make install # create venv + install deps
|
|||||||
cp .env.example .env # configure environment
|
cp .env.example .env # configure environment
|
||||||
|
|
||||||
ollama serve # separate terminal
|
ollama serve # separate terminal
|
||||||
ollama pull qwen3.5:latest # Required for reliable tool calling
|
ollama pull qwen3:30b # Required for reliable tool calling
|
||||||
|
|
||||||
make dev # http://localhost:8000
|
make dev # http://localhost:8000
|
||||||
make test # no Ollama needed
|
make test # no Ollama needed
|
||||||
```
|
```
|
||||||
|
|
||||||
**Note:** qwen3.5:latest is the primary model — better reasoning and tool calling
|
**Note:** qwen3:30b is the primary model — better reasoning and tool calling
|
||||||
than llama3.1:8b-instruct while still running locally on modest hardware.
|
than llama3.1:8b-instruct while still running locally on modest hardware.
|
||||||
Fallback: llama3.1:8b-instruct if qwen3.5:latest is not available.
|
Fallback: llama3.1:8b-instruct if qwen3:30b is not available.
|
||||||
llama3.2 (3B) was found to hallucinate tool output consistently in testing.
|
llama3.2 (3B) was found to hallucinate tool output consistently in testing.
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -79,7 +79,7 @@ cp .env.example .env
|
|||||||
| Variable | Default | Purpose |
|
| Variable | Default | Purpose |
|
||||||
|----------|---------|---------|
|
|----------|---------|---------|
|
||||||
| `OLLAMA_URL` | `http://localhost:11434` | Ollama host |
|
| `OLLAMA_URL` | `http://localhost:11434` | Ollama host |
|
||||||
| `OLLAMA_MODEL` | `qwen3.5:latest` | Primary model for reasoning and tool calling. Fallback: `llama3.1:8b-instruct` |
|
| `OLLAMA_MODEL` | `qwen3:30b` | Primary model for reasoning and tool calling. Fallback: `llama3.1:8b-instruct` |
|
||||||
| `DEBUG` | `false` | Enable `/docs` and `/redoc` |
|
| `DEBUG` | `false` | Enable `/docs` and `/redoc` |
|
||||||
| `TIMMY_MODEL_BACKEND` | `ollama` | `ollama` \| `airllm` \| `auto` |
|
| `TIMMY_MODEL_BACKEND` | `ollama` | `ollama` \| `airllm` \| `auto` |
|
||||||
| `AIRLLM_MODEL_SIZE` | `70b` | `8b` \| `70b` \| `405b` |
|
| `AIRLLM_MODEL_SIZE` | `70b` | `8b` \| `70b` \| `405b` |
|
||||||
|
|||||||
@@ -20,7 +20,7 @@
|
|||||||
# ── Defaults ────────────────────────────────────────────────────────────────
|
# ── Defaults ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
defaults:
|
defaults:
|
||||||
model: qwen3.5:latest
|
model: qwen3:30b
|
||||||
prompt_tier: lite
|
prompt_tier: lite
|
||||||
max_history: 10
|
max_history: 10
|
||||||
tools: []
|
tools: []
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ providers:
|
|||||||
url: "http://localhost:11434"
|
url: "http://localhost:11434"
|
||||||
models:
|
models:
|
||||||
# Text + Tools models
|
# Text + Tools models
|
||||||
- name: qwen3.5:latest
|
- name: qwen3:30b
|
||||||
default: true
|
default: true
|
||||||
context_window: 128000
|
context_window: 128000
|
||||||
# Note: actual context is capped by OLLAMA_NUM_CTX (default 4096) to save RAM
|
# Note: actual context is capped by OLLAMA_NUM_CTX (default 4096) to save RAM
|
||||||
@@ -114,13 +114,12 @@ fallback_chains:
|
|||||||
# Tool-calling models (for function calling)
|
# Tool-calling models (for function calling)
|
||||||
tools:
|
tools:
|
||||||
- llama3.1:8b-instruct # Best tool use
|
- llama3.1:8b-instruct # Best tool use
|
||||||
- qwen3.5:latest # Qwen 3.5 — strong tool use
|
|
||||||
- qwen2.5:7b # Reliable tools
|
- qwen2.5:7b # Reliable tools
|
||||||
- llama3.2:3b # Small but capable
|
- llama3.2:3b # Small but capable
|
||||||
|
|
||||||
# General text generation (any model)
|
# General text generation (any model)
|
||||||
text:
|
text:
|
||||||
- qwen3.5:latest
|
- qwen3:30b
|
||||||
- llama3.1:8b-instruct
|
- llama3.1:8b-instruct
|
||||||
- qwen2.5:14b
|
- qwen2.5:14b
|
||||||
- deepseek-r1:1.5b
|
- deepseek-r1:1.5b
|
||||||
|
|||||||
@@ -172,7 +172,7 @@ support:
|
|||||||
```python
|
```python
|
||||||
class LLMConfig(BaseModel):
|
class LLMConfig(BaseModel):
|
||||||
ollama_url: str = "http://localhost:11434"
|
ollama_url: str = "http://localhost:11434"
|
||||||
ollama_model: str = "qwen3.5:latest"
|
ollama_model: str = "qwen3:30b"
|
||||||
# ... all LLM settings
|
# ... all LLM settings
|
||||||
|
|
||||||
class MemoryConfig(BaseModel):
|
class MemoryConfig(BaseModel):
|
||||||
|
|||||||
@@ -16,11 +16,11 @@ class Settings(BaseSettings):
|
|||||||
ollama_url: str = "http://localhost:11434"
|
ollama_url: str = "http://localhost:11434"
|
||||||
|
|
||||||
# LLM model passed to Agno/Ollama — override with OLLAMA_MODEL
|
# LLM model passed to Agno/Ollama — override with OLLAMA_MODEL
|
||||||
# qwen3.5:latest is the primary model — better reasoning and tool calling
|
# qwen3:30b is the primary model — better reasoning and tool calling
|
||||||
# than llama3.1:8b-instruct while still running locally on modest hardware.
|
# than llama3.1:8b-instruct while still running locally on modest hardware.
|
||||||
# Fallback: llama3.1:8b-instruct if qwen3.5:latest not available.
|
# Fallback: llama3.1:8b-instruct if qwen3:30b not available.
|
||||||
# llama3.2 (3B) hallucinated tool output consistently in testing.
|
# llama3.2 (3B) hallucinated tool output consistently in testing.
|
||||||
ollama_model: str = "qwen3.5:latest"
|
ollama_model: str = "qwen3:30b"
|
||||||
|
|
||||||
# Context window size for Ollama inference — override with OLLAMA_NUM_CTX
|
# Context window size for Ollama inference — override with OLLAMA_NUM_CTX
|
||||||
# qwen3:30b with default context eats 45GB on a 39GB Mac.
|
# qwen3:30b with default context eats 45GB on a 39GB Mac.
|
||||||
@@ -28,12 +28,11 @@ class Settings(BaseSettings):
|
|||||||
ollama_num_ctx: int = 4096
|
ollama_num_ctx: int = 4096
|
||||||
|
|
||||||
# Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS
|
# Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS
|
||||||
# as comma-separated strings, e.g. FALLBACK_MODELS="qwen3.5:latest,llama3.1"
|
# as comma-separated strings, e.g. FALLBACK_MODELS="qwen3:30b,llama3.1"
|
||||||
# Or edit config/providers.yaml → fallback_chains for the canonical source.
|
# Or edit config/providers.yaml → fallback_chains for the canonical source.
|
||||||
fallback_models: list[str] = [
|
fallback_models: list[str] = [
|
||||||
"llama3.1:8b-instruct",
|
"llama3.1:8b-instruct",
|
||||||
"llama3.1",
|
"llama3.1",
|
||||||
"qwen3.5:latest",
|
|
||||||
"qwen2.5:14b",
|
"qwen2.5:14b",
|
||||||
"qwen2.5:7b",
|
"qwen2.5:7b",
|
||||||
"llama3.2:3b",
|
"llama3.2:3b",
|
||||||
|
|||||||
@@ -93,18 +93,6 @@ KNOWN_MODEL_CAPABILITIES: dict[str, set[ModelCapability]] = {
|
|||||||
ModelCapability.VISION,
|
ModelCapability.VISION,
|
||||||
},
|
},
|
||||||
# Qwen series
|
# Qwen series
|
||||||
"qwen3.5": {
|
|
||||||
ModelCapability.TEXT,
|
|
||||||
ModelCapability.TOOLS,
|
|
||||||
ModelCapability.JSON,
|
|
||||||
ModelCapability.STREAMING,
|
|
||||||
},
|
|
||||||
"qwen3.5:latest": {
|
|
||||||
ModelCapability.TEXT,
|
|
||||||
ModelCapability.TOOLS,
|
|
||||||
ModelCapability.JSON,
|
|
||||||
ModelCapability.STREAMING,
|
|
||||||
},
|
|
||||||
"qwen2.5": {
|
"qwen2.5": {
|
||||||
ModelCapability.TEXT,
|
ModelCapability.TEXT,
|
||||||
ModelCapability.TOOLS,
|
ModelCapability.TOOLS,
|
||||||
@@ -271,9 +259,8 @@ DEFAULT_FALLBACK_CHAINS: dict[ModelCapability, list[str]] = {
|
|||||||
],
|
],
|
||||||
ModelCapability.TOOLS: [
|
ModelCapability.TOOLS: [
|
||||||
"llama3.1:8b-instruct", # Best tool use
|
"llama3.1:8b-instruct", # Best tool use
|
||||||
"qwen3.5:latest", # Qwen 3.5 — strong tool use
|
|
||||||
"llama3.2:3b", # Smaller but capable
|
|
||||||
"qwen2.5:7b", # Reliable fallback
|
"qwen2.5:7b", # Reliable fallback
|
||||||
|
"llama3.2:3b", # Smaller but capable
|
||||||
],
|
],
|
||||||
ModelCapability.AUDIO: [
|
ModelCapability.AUDIO: [
|
||||||
# Audio models are less common in Ollama
|
# Audio models are less common in Ollama
|
||||||
|
|||||||
@@ -283,12 +283,12 @@ def create_aider_tool(base_path: Path):
|
|||||||
def __init__(self, base_dir: Path):
|
def __init__(self, base_dir: Path):
|
||||||
self.base_dir = base_dir
|
self.base_dir = base_dir
|
||||||
|
|
||||||
def run_aider(self, prompt: str, model: str = "qwen3.5:latest") -> str:
|
def run_aider(self, prompt: str, model: str = "qwen3:30b") -> str:
|
||||||
"""Run Aider to generate code changes.
|
"""Run Aider to generate code changes.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prompt: What you want Aider to do (e.g., "add a fibonacci function")
|
prompt: What you want Aider to do (e.g., "add a fibonacci function")
|
||||||
model: Ollama model to use (default: qwen3.5:latest)
|
model: Ollama model to use (default: qwen3:30b)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Aider's response with the code changes made
|
Aider's response with the code changes made
|
||||||
@@ -788,7 +788,7 @@ def _ai_tool_catalog() -> dict:
|
|||||||
},
|
},
|
||||||
"aider": {
|
"aider": {
|
||||||
"name": "Aider AI Assistant",
|
"name": "Aider AI Assistant",
|
||||||
"description": "Local AI coding assistant using Ollama (qwen3.5:latest or deepseek-coder)",
|
"description": "Local AI coding assistant using Ollama (qwen3:30b or deepseek-coder)",
|
||||||
"available_in": ["forge", "orchestrator"],
|
"available_in": ["forge", "orchestrator"],
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ def _get_ollama_model() -> str:
|
|||||||
1. /api/ps — models currently loaded in memory (most accurate)
|
1. /api/ps — models currently loaded in memory (most accurate)
|
||||||
2. /api/tags — all installed models (fallback)
|
2. /api/tags — all installed models (fallback)
|
||||||
Both use exact name match to avoid prefix collisions
|
Both use exact name match to avoid prefix collisions
|
||||||
(e.g. 'qwen3:30b' vs 'qwen3.5:latest').
|
(e.g. 'qwen3:8b' vs 'qwen3:30b').
|
||||||
"""
|
"""
|
||||||
from config import settings
|
from config import settings
|
||||||
|
|
||||||
|
|||||||
@@ -98,35 +98,35 @@ class TestGetOllamaModelExactMatch:
|
|||||||
|
|
||||||
@patch("timmy.tools_intro.httpx.get")
|
@patch("timmy.tools_intro.httpx.get")
|
||||||
def test_prefix_collision_returns_correct_model(self, mock_get):
|
def test_prefix_collision_returns_correct_model(self, mock_get):
|
||||||
"""qwen3:30b configured — must NOT match qwen3.5:latest (prefix bug)."""
|
"""qwen3:8b configured — must NOT match qwen3:30b (prefix bug)."""
|
||||||
from timmy.tools_intro import _get_ollama_model
|
from timmy.tools_intro import _get_ollama_model
|
||||||
|
|
||||||
# /api/ps has both models loaded; configured is qwen3:30b
|
# /api/ps has both models loaded; configured is qwen3:8b
|
||||||
ps_resp = _mock_response({"models": [{"name": "qwen3.5:latest"}, {"name": "qwen3:30b"}]})
|
ps_resp = _mock_response({"models": [{"name": "qwen3:30b"}, {"name": "qwen3:8b"}]})
|
||||||
mock_get.return_value = ps_resp
|
mock_get.return_value = ps_resp
|
||||||
|
|
||||||
with patch("config.settings") as mock_settings:
|
with patch("config.settings") as mock_settings:
|
||||||
mock_settings.ollama_model = "qwen3:30b"
|
mock_settings.ollama_model = "qwen3:8b"
|
||||||
mock_settings.ollama_url = "http://localhost:11434"
|
mock_settings.ollama_url = "http://localhost:11434"
|
||||||
result = _get_ollama_model()
|
result = _get_ollama_model()
|
||||||
|
|
||||||
assert result == "qwen3:30b", f"Got '{result}' — prefix collision bug!"
|
assert result == "qwen3:8b", f"Got '{result}' — prefix collision bug!"
|
||||||
|
|
||||||
@patch("timmy.tools_intro.httpx.get")
|
@patch("timmy.tools_intro.httpx.get")
|
||||||
def test_configured_model_not_running_returns_actual(self, mock_get):
|
def test_configured_model_not_running_returns_actual(self, mock_get):
|
||||||
"""If configured model isn't loaded, report what IS running."""
|
"""If configured model isn't loaded, report what IS running."""
|
||||||
from timmy.tools_intro import _get_ollama_model
|
from timmy.tools_intro import _get_ollama_model
|
||||||
|
|
||||||
ps_resp = _mock_response({"models": [{"name": "qwen3.5:latest"}]})
|
ps_resp = _mock_response({"models": [{"name": "qwen3:30b"}]})
|
||||||
mock_get.return_value = ps_resp
|
mock_get.return_value = ps_resp
|
||||||
|
|
||||||
with patch("config.settings") as mock_settings:
|
with patch("config.settings") as mock_settings:
|
||||||
mock_settings.ollama_model = "qwen3:30b"
|
mock_settings.ollama_model = "qwen3:8b"
|
||||||
mock_settings.ollama_url = "http://localhost:11434"
|
mock_settings.ollama_url = "http://localhost:11434"
|
||||||
result = _get_ollama_model()
|
result = _get_ollama_model()
|
||||||
|
|
||||||
# Should report actual running model, not configured one
|
# Should report actual running model, not configured one
|
||||||
assert result == "qwen3.5:latest"
|
assert result == "qwen3:30b"
|
||||||
|
|
||||||
@patch("timmy.tools_intro.httpx.get")
|
@patch("timmy.tools_intro.httpx.get")
|
||||||
def test_latest_suffix_match(self, mock_get):
|
def test_latest_suffix_match(self, mock_get):
|
||||||
|
|||||||
Reference in New Issue
Block a user