diff --git a/.env.example b/.env.example index f0078a9..07adbee 100644 --- a/.env.example +++ b/.env.example @@ -14,8 +14,8 @@ # In production (docker-compose.prod.yml), this is set to http://ollama:11434 automatically. # OLLAMA_URL=http://localhost:11434 -# LLM model to use via Ollama (default: qwen3.5:latest) -# OLLAMA_MODEL=qwen3.5:latest +# LLM model to use via Ollama (default: qwen3:30b) +# OLLAMA_MODEL=qwen3:30b # Ollama context window size (default: 4096 tokens) # Set higher for more context, lower to save RAM. 0 = model default. diff --git a/README.md b/README.md index 27bdadb..909ef86 100644 --- a/README.md +++ b/README.md @@ -18,15 +18,15 @@ make install # create venv + install deps cp .env.example .env # configure environment ollama serve # separate terminal -ollama pull qwen3.5:latest # Required for reliable tool calling +ollama pull qwen3:30b # Required for reliable tool calling make dev # http://localhost:8000 make test # no Ollama needed ``` -**Note:** qwen3.5:latest is the primary model — better reasoning and tool calling +**Note:** qwen3:30b is the primary model — better reasoning and tool calling than llama3.1:8b-instruct while still running locally on modest hardware. -Fallback: llama3.1:8b-instruct if qwen3.5:latest is not available. +Fallback: llama3.1:8b-instruct if qwen3:30b is not available. llama3.2 (3B) was found to hallucinate tool output consistently in testing. --- @@ -79,7 +79,7 @@ cp .env.example .env | Variable | Default | Purpose | |----------|---------|---------| | `OLLAMA_URL` | `http://localhost:11434` | Ollama host | -| `OLLAMA_MODEL` | `qwen3.5:latest` | Primary model for reasoning and tool calling. Fallback: `llama3.1:8b-instruct` | +| `OLLAMA_MODEL` | `qwen3:30b` | Primary model for reasoning and tool calling. Fallback: `llama3.1:8b-instruct` | | `DEBUG` | `false` | Enable `/docs` and `/redoc` | | `TIMMY_MODEL_BACKEND` | `ollama` | `ollama` \| `airllm` \| `auto` | | `AIRLLM_MODEL_SIZE` | `70b` | `8b` \| `70b` \| `405b` | diff --git a/config/agents.yaml b/config/agents.yaml index 2c265ad..f1e5200 100644 --- a/config/agents.yaml +++ b/config/agents.yaml @@ -20,7 +20,7 @@ # ── Defaults ──────────────────────────────────────────────────────────────── defaults: - model: qwen3.5:latest + model: qwen3:30b prompt_tier: lite max_history: 10 tools: [] diff --git a/config/providers.yaml b/config/providers.yaml index 2b67dd0..0e60c3e 100644 --- a/config/providers.yaml +++ b/config/providers.yaml @@ -25,7 +25,7 @@ providers: url: "http://localhost:11434" models: # Text + Tools models - - name: qwen3.5:latest + - name: qwen3:30b default: true context_window: 128000 # Note: actual context is capped by OLLAMA_NUM_CTX (default 4096) to save RAM @@ -114,13 +114,12 @@ fallback_chains: # Tool-calling models (for function calling) tools: - llama3.1:8b-instruct # Best tool use - - qwen3.5:latest # Qwen 3.5 — strong tool use - qwen2.5:7b # Reliable tools - llama3.2:3b # Small but capable # General text generation (any model) text: - - qwen3.5:latest + - qwen3:30b - llama3.1:8b-instruct - qwen2.5:14b - deepseek-r1:1.5b diff --git a/docs/SOVEREIGN_AGI_RESEARCH.md b/docs/SOVEREIGN_AGI_RESEARCH.md index a05c105..939a4b6 100644 --- a/docs/SOVEREIGN_AGI_RESEARCH.md +++ b/docs/SOVEREIGN_AGI_RESEARCH.md @@ -172,7 +172,7 @@ support: ```python class LLMConfig(BaseModel): ollama_url: str = "http://localhost:11434" - ollama_model: str = "qwen3.5:latest" + ollama_model: str = "qwen3:30b" # ... all LLM settings class MemoryConfig(BaseModel): diff --git a/src/config.py b/src/config.py index 7ceac6e..cc23369 100644 --- a/src/config.py +++ b/src/config.py @@ -16,11 +16,11 @@ class Settings(BaseSettings): ollama_url: str = "http://localhost:11434" # LLM model passed to Agno/Ollama — override with OLLAMA_MODEL - # qwen3.5:latest is the primary model — better reasoning and tool calling + # qwen3:30b is the primary model — better reasoning and tool calling # than llama3.1:8b-instruct while still running locally on modest hardware. - # Fallback: llama3.1:8b-instruct if qwen3.5:latest not available. + # Fallback: llama3.1:8b-instruct if qwen3:30b not available. # llama3.2 (3B) hallucinated tool output consistently in testing. - ollama_model: str = "qwen3.5:latest" + ollama_model: str = "qwen3:30b" # Context window size for Ollama inference — override with OLLAMA_NUM_CTX # qwen3:30b with default context eats 45GB on a 39GB Mac. @@ -28,12 +28,11 @@ class Settings(BaseSettings): ollama_num_ctx: int = 4096 # Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS - # as comma-separated strings, e.g. FALLBACK_MODELS="qwen3.5:latest,llama3.1" + # as comma-separated strings, e.g. FALLBACK_MODELS="qwen3:30b,llama3.1" # Or edit config/providers.yaml → fallback_chains for the canonical source. fallback_models: list[str] = [ "llama3.1:8b-instruct", "llama3.1", - "qwen3.5:latest", "qwen2.5:14b", "qwen2.5:7b", "llama3.2:3b", diff --git a/src/infrastructure/models/multimodal.py b/src/infrastructure/models/multimodal.py index b26b77e..5c0181a 100644 --- a/src/infrastructure/models/multimodal.py +++ b/src/infrastructure/models/multimodal.py @@ -93,18 +93,6 @@ KNOWN_MODEL_CAPABILITIES: dict[str, set[ModelCapability]] = { ModelCapability.VISION, }, # Qwen series - "qwen3.5": { - ModelCapability.TEXT, - ModelCapability.TOOLS, - ModelCapability.JSON, - ModelCapability.STREAMING, - }, - "qwen3.5:latest": { - ModelCapability.TEXT, - ModelCapability.TOOLS, - ModelCapability.JSON, - ModelCapability.STREAMING, - }, "qwen2.5": { ModelCapability.TEXT, ModelCapability.TOOLS, @@ -271,9 +259,8 @@ DEFAULT_FALLBACK_CHAINS: dict[ModelCapability, list[str]] = { ], ModelCapability.TOOLS: [ "llama3.1:8b-instruct", # Best tool use - "qwen3.5:latest", # Qwen 3.5 — strong tool use - "llama3.2:3b", # Smaller but capable "qwen2.5:7b", # Reliable fallback + "llama3.2:3b", # Smaller but capable ], ModelCapability.AUDIO: [ # Audio models are less common in Ollama diff --git a/src/timmy/tools.py b/src/timmy/tools.py index bf992e1..3647982 100644 --- a/src/timmy/tools.py +++ b/src/timmy/tools.py @@ -283,12 +283,12 @@ def create_aider_tool(base_path: Path): def __init__(self, base_dir: Path): self.base_dir = base_dir - def run_aider(self, prompt: str, model: str = "qwen3.5:latest") -> str: + def run_aider(self, prompt: str, model: str = "qwen3:30b") -> str: """Run Aider to generate code changes. Args: prompt: What you want Aider to do (e.g., "add a fibonacci function") - model: Ollama model to use (default: qwen3.5:latest) + model: Ollama model to use (default: qwen3:30b) Returns: Aider's response with the code changes made @@ -788,7 +788,7 @@ def _ai_tool_catalog() -> dict: }, "aider": { "name": "Aider AI Assistant", - "description": "Local AI coding assistant using Ollama (qwen3.5:latest or deepseek-coder)", + "description": "Local AI coding assistant using Ollama (qwen3:30b or deepseek-coder)", "available_in": ["forge", "orchestrator"], }, } diff --git a/src/timmy/tools_intro/__init__.py b/src/timmy/tools_intro/__init__.py index 0efe628..b52f8f9 100644 --- a/src/timmy/tools_intro/__init__.py +++ b/src/timmy/tools_intro/__init__.py @@ -63,7 +63,7 @@ def _get_ollama_model() -> str: 1. /api/ps — models currently loaded in memory (most accurate) 2. /api/tags — all installed models (fallback) Both use exact name match to avoid prefix collisions - (e.g. 'qwen3:30b' vs 'qwen3.5:latest'). + (e.g. 'qwen3:8b' vs 'qwen3:30b'). """ from config import settings diff --git a/tests/timmy/test_introspection.py b/tests/timmy/test_introspection.py index 4c2211e..758bf4b 100644 --- a/tests/timmy/test_introspection.py +++ b/tests/timmy/test_introspection.py @@ -98,35 +98,35 @@ class TestGetOllamaModelExactMatch: @patch("timmy.tools_intro.httpx.get") def test_prefix_collision_returns_correct_model(self, mock_get): - """qwen3:30b configured — must NOT match qwen3.5:latest (prefix bug).""" + """qwen3:8b configured — must NOT match qwen3:30b (prefix bug).""" from timmy.tools_intro import _get_ollama_model - # /api/ps has both models loaded; configured is qwen3:30b - ps_resp = _mock_response({"models": [{"name": "qwen3.5:latest"}, {"name": "qwen3:30b"}]}) + # /api/ps has both models loaded; configured is qwen3:8b + ps_resp = _mock_response({"models": [{"name": "qwen3:30b"}, {"name": "qwen3:8b"}]}) mock_get.return_value = ps_resp with patch("config.settings") as mock_settings: - mock_settings.ollama_model = "qwen3:30b" + mock_settings.ollama_model = "qwen3:8b" mock_settings.ollama_url = "http://localhost:11434" result = _get_ollama_model() - assert result == "qwen3:30b", f"Got '{result}' — prefix collision bug!" + assert result == "qwen3:8b", f"Got '{result}' — prefix collision bug!" @patch("timmy.tools_intro.httpx.get") def test_configured_model_not_running_returns_actual(self, mock_get): """If configured model isn't loaded, report what IS running.""" from timmy.tools_intro import _get_ollama_model - ps_resp = _mock_response({"models": [{"name": "qwen3.5:latest"}]}) + ps_resp = _mock_response({"models": [{"name": "qwen3:30b"}]}) mock_get.return_value = ps_resp with patch("config.settings") as mock_settings: - mock_settings.ollama_model = "qwen3:30b" + mock_settings.ollama_model = "qwen3:8b" mock_settings.ollama_url = "http://localhost:11434" result = _get_ollama_model() # Should report actual running model, not configured one - assert result == "qwen3.5:latest" + assert result == "qwen3:30b" @patch("timmy.tools_intro.httpx.get") def test_latest_suffix_match(self, mock_get):