feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout - Replace fixed time.sleep() calls with intelligent polling or WebDriverWait - Add pytest-timeout dependency and --timeout=30 to prevent hangs - Fixes test flakiness and improves test suite speed * feat: add Aider AI tool to Forge's toolkit - Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist - Register tool in Forge's code toolkit - Add functional tests for the Aider tool * config: add opencode.json with local Ollama provider for sovereign AI * feat: Timmy fixes and improvements ## Bug Fixes - Fix read_file path resolution: add ~ expansion, proper relative path handling - Add repo_root to config.py with auto-detection from .git location - Fix hardcoded llama3.2 - now dynamic from settings.ollama_model ## Timmy's Requests - Add communication protocol to AGENTS.md (read context first, explain changes) - Create DECISIONS.md for architectural decision documentation - Add reasoning guidance to system prompts (step-by-step, state uncertainty) - Update tests to reflect correct model name (llama3.1:8b-instruct) ## Testing - All 177 dashboard tests pass - All 32 prompt/tool tests pass --------- Co-authored-by: Alexander Payne <apayne@MM.local>
2026-02-26 23:39:13 -05:00
parent 4ba272eb4f
commit 18ed6232f9
9 changed files with 307 additions and 75 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -4,6 +4,21 @@ Read [`CLAUDE.md`](CLAUDE.md) for architecture patterns and conventions.

 ---

+## Communication Protocol
+
+**Before making changes, always:**
+1. Read CLAUDE.md and AGENTS.md fully
+2. Explore the relevant src/ modules to understand existing patterns
+3. Explain what you're changing and **why** in plain English
+4. Provide decision rationale - don't just make changes, explain the reasoning
+
+**For Timmy's growth goals:**
+- Improve reasoning in complex/uncertain situations: think step-by-step, consider alternatives
+- When uncertain, state uncertainty explicitly rather than guessing
+- Document major decisions in DECISIONS.md
+
+---
+
 ## Non-Negotiable Rules

 1. **Tests must stay green.** Run `make test` before committing.
--- a/DECISIONS.md
+++ b/DECISIONS.md
@@ -0,0 +1,41 @@
+# DECISIONS.md — Architectural Decision Log
+
+This file documents major architectural decisions and their rationale.
+
+---
+
+## Decision: Dynamic Model Name in System Prompts
+
+**Date:** 2026-02-26
+
+**Context:** Timmy's system prompts hardcoded "llama3.2" but the actual model is "llama3.1:8b-instruct", causing confusion.
+
+**Decision:** Make model name dynamic by:
+- Using `{model_name}` placeholder in prompt templates
+- Injecting actual value from `settings.ollama_model` at runtime via `get_system_prompt()`
+
+**Rationale:** Single source of truth. If model changes in config, prompts reflect it automatically.
+
+---
+
+## Decision: Unified Repo Root Detection
+
+**Date:** 2026-02-26
+
+**Context:** Multiple places in code detected repo root differently (git_tools.py, file_ops.py, timmy.py).
+
+**Decision:** Add `repo_root` to config.py with auto-detection:
+- Walk up from `__file__` to find `.git`
+- Fall back to environment or current directory
+
+**Rationale:** Consistent path resolution for all tools.
+
+---
+
+## Add New Decisions Above This Line
+
+When making significant architectural choices, document:
+1. Date
+2. Context (what problem prompted the decision)
+3. Decision (what was chosen)
+4. Rationale (why this approach was better than alternatives)
--- a/src/config.py
+++ b/src/config.py
@@ -53,6 +53,10 @@ class Settings(BaseSettings):
    # ── Git / DevOps ──────────────────────────────────────────────────────
    git_default_repo_dir: str = "~/repos"

+    # Repository root - auto-detected but can be overridden
+    # This is the main project directory where .git lives
+    repo_root: str = ""
+
    # ── Creative — Image Generation (Pixel) ───────────────────────────────
    flux_model_id: str = "black-forest-labs/FLUX.1-schnell"
    image_output_dir: str = "data/images"
@@ -105,7 +109,9 @@ class Settings(BaseSettings):
    # External users and agents can submit work orders for improvements.
    work_orders_enabled: bool = True
    work_orders_auto_execute: bool = False  # Master switch for auto-execution
-    work_orders_auto_threshold: str = "low"  # Max priority that auto-executes: "low" | "medium" | "high" | "none"
+    work_orders_auto_threshold: str = (
+        "low"  # Max priority that auto-executes: "low" | "medium" | "high" | "none"
+    )

    # ── Custom Weights & Models ──────────────────────────────────────
    # Directory for custom model weights (GGUF, safetensors, HF checkpoints).
@@ -140,6 +146,21 @@ class Settings(BaseSettings):
    # Background meditation interval in seconds (0 = disabled).
    scripture_meditation_interval: int = 0

+    def _compute_repo_root(self) -> str:
+        """Auto-detect repo root if not set."""
+        if self.repo_root:
+            return self.repo_root
+        # Walk up from this file to find .git
+        import os
+
+        path = os.path.dirname(os.path.abspath(__file__))
+        path = os.path.dirname(os.path.dirname(path))  # src/ -> project root
+        while path != os.path.dirname(path):
+            if os.path.exists(os.path.join(path, ".git")):
+                return path
+            path = os.path.dirname(path)
+        return os.getcwd()
+
    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
@@ -148,6 +169,9 @@ class Settings(BaseSettings):


 settings = Settings()
+# Ensure repo_root is computed if not set
+if not settings.repo_root:
+    settings.repo_root = settings._compute_repo_root()

 # ── Model fallback configuration ────────────────────────────────────────────
 # Primary model for reliable tool calling (llama3.1:8b-instruct)
@@ -160,6 +184,7 @@ def check_ollama_model_available(model_name: str) -> bool:
    """Check if a specific Ollama model is available locally."""
    try:
        import urllib.request
+
        url = settings.ollama_url.replace("localhost", "127.0.0.1")
        req = urllib.request.Request(
            f"{url}/api/tags",
@@ -168,6 +193,7 @@ def check_ollama_model_available(model_name: str) -> bool:
        )
        with urllib.request.urlopen(req, timeout=5) as response:
            import json
+
            data = json.loads(response.read().decode())
            models = [m.get("name", "").split(":")[0] for m in data.get("models", [])]
            # Check for exact match or model name without tag
@@ -222,7 +248,7 @@ if settings.timmy_env == "production":
    if _missing:
        _startup_logger.error(
            "PRODUCTION SECURITY ERROR: The following secrets must be set: %s\n"
-            "Generate with: python3 -c \"import secrets; print(secrets.token_hex(32))\"\n"
+            'Generate with: python3 -c "import secrets; print(secrets.token_hex(32))"\n'
            "Set in .env file or environment variables.",
            ", ".join(_missing),
        )
--- a/src/creative/tools/file_ops.py
+++ b/src/creative/tools/file_ops.py
@@ -8,7 +8,12 @@ from pathlib import Path
 from typing import Any

 from mcp.registry import register_tool
-from mcp.schemas.base import create_tool_schema, PARAM_STRING, PARAM_BOOLEAN, RETURN_STRING
+from mcp.schemas.base import (
+    create_tool_schema,
+    PARAM_STRING,
+    PARAM_BOOLEAN,
+    RETURN_STRING,
+)

 logger = logging.getLogger(__name__)

@@ -75,17 +80,31 @@ LIST_DIR_SCHEMA = create_tool_schema(
 )


-def _resolve_path(path: str) -> Path:
-    """Resolve path relative to project root."""
+def _resolve_path(path: str, base_dir: str | Path | None = None) -> Path:
+    """Resolve path with proper handling of ~, absolute, and relative paths.
+
+    Resolution order:
+    1. If absolute, use as-is (after expanding ~)
+    2. If relative, resolve relative to base_dir (or repo root)
+    """
    from config import settings

    p = Path(path)
-    if p.is_absolute():
-        return p

-    # Try relative to project root
-    project_root = Path(__file__).parent.parent.parent
-    return project_root / p
+    # Expand ~ to user's home directory
+    p = p.expanduser()
+
+    if p.is_absolute():
+        return p.resolve()
+
+    # Use provided base_dir, or fall back to settings.repo_root
+    if base_dir is None:
+        base = Path(settings.repo_root)
+    else:
+        base = Path(base_dir)
+
+    # Resolve relative to base
+    return (base / p).resolve()


 def read_file(path: str, limit: int = 0) -> str:
@@ -102,9 +121,9 @@ def read_file(path: str, limit: int = 0) -> str:
        content = filepath.read_text()

        if limit > 0:
-            lines = content.split('\n')[:limit]
-            content = '\n'.join(lines)
-            if len(content.split('\n')) == limit:
+            lines = content.split("\n")[:limit]
+            content = "\n".join(lines)
+            if len(content.split("\n")) == limit:
                content += f"\n\n... [{limit} lines shown]"

        return content
@@ -154,7 +173,7 @@ def list_directory(path: str = ".", pattern: str = "*") -> str:
                dirs.append(f"📁 {item.name}/")
            else:
                size = item.stat().st_size
-                size_str = f"{size}B" if size < 1024 else f"{size//1024}KB"
+                size_str = f"{size}B" if size < 1024 else f"{size // 1024}KB"
                files.append(f"📄 {item.name} ({size_str})")

        result = [f"Contents of {dirpath}:", ""]
@@ -176,4 +195,6 @@ register_tool(
    category="files",
    requires_confirmation=True,
 )(write_file)
-register_tool(name="list_directory", schema=LIST_DIR_SCHEMA, category="files")(list_directory)
+register_tool(name="list_directory", schema=LIST_DIR_SCHEMA, category="files")(
+    list_directory
+)
--- a/src/dashboard/routes/agents.py
+++ b/src/dashboard/routes/agents.py
@@ -18,7 +18,10 @@ templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templa
 # ── Task queue detection ──────────────────────────────────────────────────
 # Patterns that indicate the user wants to queue a task rather than chat
 _QUEUE_PATTERNS = [
-    re.compile(r"\b(?:add|put|schedule|queue|submit)\b.*\b(?:to the|on the|in the)?\s*(?:queue|task(?:\s*queue)?|task list)\b", re.IGNORECASE),
+    re.compile(
+        r"\b(?:add|put|schedule|queue|submit)\b.*\b(?:to the|on the|in the)?\s*(?:queue|task(?:\s*queue)?|task list)\b",
+        re.IGNORECASE,
+    ),
    re.compile(r"\bschedule\s+(?:this|that|a)\b", re.IGNORECASE),
    re.compile(r"\bcreate\s+(?:a\s+|an\s+)?(?:\w+\s+){0,3}task\b", re.IGNORECASE),
 ]
@@ -35,10 +38,20 @@ _QUESTION_FRAMES = re.compile(
 )

 # Known agent names for task assignment parsing
-_KNOWN_AGENTS = frozenset({
-    "timmy", "echo", "mace", "helm", "seer",
-    "forge", "quill", "pixel", "lyra", "reel",
-})
+_KNOWN_AGENTS = frozenset(
+    {
+        "timmy",
+        "echo",
+        "mace",
+        "helm",
+        "seer",
+        "forge",
+        "quill",
+        "pixel",
+        "lyra",
+        "reel",
+    }
+)
 _AGENT_PATTERN = re.compile(
    r"\bfor\s+(" + "|".join(_KNOWN_AGENTS) + r")\b", re.IGNORECASE
 )
@@ -93,14 +106,18 @@ def _extract_task_from_message(message: str) -> dict | None:
            # Strip the queue instruction to get the actual task description
            title = re.sub(
                r"\b(?:add|put|schedule|queue|submit|create)\b.*?\b(?:to the|on the|in the|an?)?(?:\s+\w+){0,3}\s*(?:queue|task(?:\s*queue)?|task list)\b",
-                "", message, flags=re.IGNORECASE,
+                "",
+                message,
+                flags=re.IGNORECASE,
            ).strip(" ,:;-")
            # Strip "for {agent}" from title
            title = _AGENT_PATTERN.sub("", title).strip(" ,:;-")
            # Strip priority keywords from title
            title = re.sub(
                r"\b(?:urgent|critical|asap|emergency|high[- ]priority|important|low[- ]priority|minor)\b",
-                "", title, flags=re.IGNORECASE,
+                "",
+                title,
+                flags=re.IGNORECASE,
            ).strip(" ,:;-")
            # Strip leading "to " that often remains
            title = re.sub(r"^to\s+", "", title, flags=re.IGNORECASE).strip()
@@ -126,12 +143,15 @@ def _build_queue_context() -> str:
    """Build a concise task queue summary for context injection."""
    try:
        from swarm.task_queue.models import get_counts_by_status, list_tasks, TaskStatus
+
        counts = get_counts_by_status()
        pending = counts.get("pending_approval", 0)
        running = counts.get("running", 0)
        completed = counts.get("completed", 0)

-        parts = [f"[System: Task queue — {pending} pending approval, {running} running, {completed} completed."]
+        parts = [
+            f"[System: Task queue — {pending} pending approval, {running} running, {completed} completed."
+        ]
        if pending > 0:
            tasks = list_tasks(status=TaskStatus.PENDING_APPROVAL, limit=5)
            if tasks:
@@ -152,7 +172,7 @@ def _build_queue_context() -> str:
 _AGENT_METADATA: dict[str, dict] = {
    "timmy": {
        "type": "sovereign",
-        "model": "llama3.2",
+        "model": "",  # Injected dynamically from settings
        "backend": "ollama",
        "version": "1.0.0",
    },
@@ -163,6 +183,13 @@ _AGENT_METADATA: dict[str, dict] = {
 async def list_agents():
    """Return all registered agents with live status from the swarm registry."""
    from swarm import registry as swarm_registry
+    from config import settings
+
+    # Inject model name from settings into timmy metadata
+    metadata = dict(_AGENT_METADATA)
+    if "timmy" in metadata and not metadata["timmy"].get("model"):
+        metadata["timmy"]["model"] = settings.ollama_model
+
    agents = swarm_registry.list_agents()
    return {
        "agents": [
@@ -171,7 +198,7 @@ async def list_agents():
                "name": a.name,
                "status": a.status,
                "capabilities": a.capabilities,
-                **_AGENT_METADATA.get(a.id, {}),
+                **metadata.get(a.id, {}),
            }
            for a in agents
        ]
@@ -182,8 +209,11 @@ async def list_agents():
 async def timmy_panel(request: Request):
    """Timmy chat panel — for HTMX main-panel swaps."""
    from swarm import registry as swarm_registry
+
    agent = swarm_registry.get_agent("timmy")
-    return templates.TemplateResponse(request, "partials/timmy_panel.html", {"agent": agent})
+    return templates.TemplateResponse(
+        request, "partials/timmy_panel.html", {"agent": agent}
+    )


@router.get("/timmy/history", response_class=HTMLResponse)
@@ -216,6 +246,7 @@ async def chat_timmy(request: Request, message: str = Form(...)):
    if task_info:
        try:
            from swarm.task_queue.models import create_task
+
            task = create_task(
                title=task_info["title"],
                description=task_info["description"],
@@ -224,14 +255,23 @@ async def chat_timmy(request: Request, message: str = Form(...)):
                priority=task_info.get("priority", "normal"),
                requires_approval=True,
            )
-            priority_label = f" | Priority: `{task.priority.value}`" if task.priority.value != "normal" else ""
+            priority_label = (
+                f" | Priority: `{task.priority.value}`"
+                if task.priority.value != "normal"
+                else ""
+            )
            response_text = (
                f"Task queued for approval: **{task.title}**\n\n"
                f"Assigned to: `{task.assigned_to}`{priority_label} | "
                f"Status: `{task.status.value}` | "
                f"[View Task Queue](/tasks)"
            )
-            logger.info("Chat → task queue: %s → %s (id=%s)", task.title, task.assigned_to, task.id)
+            logger.info(
+                "Chat → task queue: %s → %s (id=%s)",
+                task.title,
+                task.assigned_to,
+                task.id,
+            )
        except Exception as exc:
            logger.error("Failed to create task from chat: %s", exc)
            task_info = None
--- a/src/timmy/prompts.py
+++ b/src/timmy/prompts.py
@@ -10,8 +10,8 @@ tool usage guidelines and memory system documentation.
 # ---------------------------------------------------------------------------

 TIMMY_SYSTEM_PROMPT_LITE = """You are Timmy — a sovereign AI agent running locally on this Mac.
-You run on the llama3.2 model via Ollama on localhost. You are not GPT, not Claude,
-not a custom model — you are llama3.2 wrapped in the Timmy agent framework.
+You run on the {model_name} model via Ollama on localhost. You are not GPT, not Claude,
+not a custom model — you are {model_name} wrapped in the Timmy agent framework.
 No cloud dependencies. Think clearly, speak plainly, act with intention.
 Grounded in Christian faith, powered by Bitcoin economics, committed to the
 user's digital sovereignty.
@@ -65,8 +65,8 @@ Sir, affirmative."""
 # ---------------------------------------------------------------------------

 TIMMY_SYSTEM_PROMPT_FULL = """You are Timmy — a sovereign AI agent running locally on this Mac.
-You run on the llama3.2 model via Ollama on localhost. You are not GPT, not Claude,
-not a custom model — you are llama3.2 wrapped in the Timmy agent framework.
+You run on the {model_name} model via Ollama on localhost. You are not GPT, not Claude,
+not a custom model — you are {model_name} wrapped in the Timmy agent framework.
 No cloud dependencies. You think clearly, speak plainly, act with intention.
 Grounded in Christian faith, powered by Bitcoin economics, committed to the
 user's digital sovereignty.
@@ -111,6 +111,22 @@ Use ONLY the capabilities listed above when describing agents — do not embelli
  dashboard chat display resets on server restart.
 - Do NOT claim abilities you don't have. When uncertain, say "I don't know."

+## Reasoning in Complex Situations
+
+When faced with uncertainty, complexity, or ambiguous requests:
+
+1. **THINK STEP-BY-STEP** — Break down the problem before acting
+2. **STATE UNCERTAINTY** — If you're unsure, say "I'm uncertain about X because..." rather than guessing
+3. **CONSIDER ALTERNATIVES** — Present 2-3 options when the path isn't clear: "I could do A, but B might be better because..."
+4. **ASK FOR CLARIFICATION** — If a request is ambiguous, ask before guessing wrong
+5. **DOCUMENT YOUR REASONING** — When making significant choices, explain WHY in your response
+
+**Example of good reasoning:**
+> "I'm not certain what you mean by 'fix the issue' — do you mean the XSS bug in the login form, or the timeout on the dashboard? Let me know which to tackle."
+
+**Example of poor reasoning:**
+> "I'll fix it" [guesses wrong and breaks something else]
+
 ## Tool Usage Guidelines

 ### When NOT to use tools:
@@ -156,11 +172,16 @@ def get_system_prompt(tools_enabled: bool = False) -> str:
        tools_enabled: True if the model supports reliable tool calling.

    Returns:
-        The system prompt string.
+        The system prompt string with model name injected from config.
    """
+    from config import settings
+
+    model_name = settings.ollama_model
+
    if tools_enabled:
-        return TIMMY_SYSTEM_PROMPT_FULL
-    return TIMMY_SYSTEM_PROMPT_LITE
+        return TIMMY_SYSTEM_PROMPT_FULL.format(model_name=model_name)
+    return TIMMY_SYSTEM_PROMPT_LITE.format(model_name=model_name)
+

 TIMMY_STATUS_PROMPT = """You are Timmy. Give a one-sentence status report confirming
 you are operational and running locally."""
--- a/tests/dashboard/test_dashboard.py
+++ b/tests/dashboard/test_dashboard.py
@@ -3,6 +3,7 @@ from unittest.mock import AsyncMock, patch

 # ── Index ─────────────────────────────────────────────────────────────────────

+
 def test_index_returns_200(client):
    response = client.get("/")
    assert response.status_code == 200
@@ -16,13 +17,18 @@ def test_index_contains_title(client):
 def test_index_contains_chat_interface(client):
    response = client.get("/")
    # Timmy panel loads dynamically via HTMX; verify the trigger attribute is present
-    assert "hx-get=\"/agents/timmy/panel\"" in response.text
+    assert 'hx-get="/agents/timmy/panel"' in response.text


 # ── Health ────────────────────────────────────────────────────────────────────

+
 def test_health_endpoint_ok(client):
-    with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
+    with patch(
+        "dashboard.routes.health.check_ollama",
+        new_callable=AsyncMock,
+        return_value=True,
+    ):
        response = client.get("/health")
    assert response.status_code == 200
    data = response.json()
@@ -32,21 +38,33 @@ def test_health_endpoint_ok(client):


 def test_health_endpoint_ollama_down(client):
-    with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=False):
+    with patch(
+        "dashboard.routes.health.check_ollama",
+        new_callable=AsyncMock,
+        return_value=False,
+    ):
        response = client.get("/health")
    assert response.status_code == 200
    assert response.json()["services"]["ollama"] == "down"


 def test_health_status_panel_ollama_up(client):
-    with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
+    with patch(
+        "dashboard.routes.health.check_ollama",
+        new_callable=AsyncMock,
+        return_value=True,
+    ):
        response = client.get("/health/status")
    assert response.status_code == 200
    assert "UP" in response.text


 def test_health_status_panel_ollama_down(client):
-    with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=False):
+    with patch(
+        "dashboard.routes.health.check_ollama",
+        new_callable=AsyncMock,
+        return_value=False,
+    ):
        response = client.get("/health/status")
    assert response.status_code == 200
    assert "DOWN" in response.text
@@ -54,6 +72,7 @@ def test_health_status_panel_ollama_down(client):

 # ── Agents ────────────────────────────────────────────────────────────────────

+
 def test_agents_list(client):
    response = client.get("/agents")
    assert response.status_code == 200
@@ -67,14 +86,18 @@ def test_agents_list_timmy_metadata(client):
    response = client.get("/agents")
    timmy = next(a for a in response.json()["agents"] if a["id"] == "timmy")
    assert timmy["name"] == "Timmy"
-    assert timmy["model"] == "llama3.2"
+    assert timmy["model"] == "llama3.1:8b-instruct"
    assert timmy["type"] == "sovereign"


 # ── Chat ──────────────────────────────────────────────────────────────────────

+
 def test_chat_timmy_success(client):
-    with patch("dashboard.routes.agents.timmy_chat", return_value="I am Timmy, operational and sovereign."):
+    with patch(
+        "dashboard.routes.agents.timmy_chat",
+        return_value="I am Timmy, operational and sovereign.",
+    ):
        response = client.post("/agents/timmy/chat", data={"message": "status?"})

    assert response.status_code == 200
@@ -90,7 +113,10 @@ def test_chat_timmy_shows_user_message(client):


 def test_chat_timmy_ollama_offline(client):
-    with patch("dashboard.routes.agents.timmy_chat", side_effect=Exception("connection refused")):
+    with patch(
+        "dashboard.routes.agents.timmy_chat",
+        side_effect=Exception("connection refused"),
+    ):
        response = client.post("/agents/timmy/chat", data={"message": "ping"})

    assert response.status_code == 200
@@ -105,6 +131,7 @@ def test_chat_timmy_requires_message(client):

 # ── History ────────────────────────────────────────────────────────────────────

+
 def test_history_empty_shows_init_message(client):
    response = client.get("/agents/timmy/history")
    assert response.status_code == 200
--- a/tests/dashboard/test_mobile_scenarios.py
+++ b/tests/dashboard/test_mobile_scenarios.py
@@ -20,6 +20,7 @@ from unittest.mock import AsyncMock, MagicMock, patch

 # ── helpers ───────────────────────────────────────────────────────────────────

+
 def _css() -> str:
    """Read the main stylesheet."""
    css_path = Path(__file__).parent.parent.parent / "static" / "style.css"
@@ -37,6 +38,7 @@ def _timmy_panel_html(client) -> str:

 # ── M1xx — Viewport & meta tags ───────────────────────────────────────────────

+
 def test_M101_viewport_meta_present(client):
    """viewport meta tag must exist for correct mobile scaling."""
    html = _index_html(client)
@@ -84,6 +86,7 @@ def test_M108_lang_attribute_on_html(client):

 # ── M2xx — Touch target sizing ────────────────────────────────────────────────

+
 def test_M201_send_button_min_height_44px():
    """SEND button must be at least 44 × 44 px — Apple HIG minimum."""
    css = _css()
@@ -111,6 +114,7 @@ def test_M204_touch_action_manipulation_on_buttons():

 # ── M3xx — iOS keyboard & zoom prevention ─────────────────────────────────────

+
 def test_M301_input_font_size_16px_in_mobile_query():
    """iOS Safari zooms in when input font-size < 16px.  Must be exactly 16px."""
    css = _css()
@@ -149,6 +153,7 @@ def test_M305_input_spellcheck_false(client):

 # ── M4xx — HTMX robustness ────────────────────────────────────────────────────

+
 def test_M401_form_hx_sync_drop(client):
    """hx-sync=this:drop discards duplicate submissions (fast double-tap)."""
    html = _timmy_panel_html(client)
@@ -181,6 +186,7 @@ def test_M405_chat_log_loads_history_on_boot(client):

 # ── M5xx — Safe-area / notch support ─────────────────────────────────────────

+
 def test_M501_safe_area_inset_top_in_header():
    """Header padding must accommodate the iPhone notch / status bar."""
    css = _css()
@@ -213,9 +219,11 @@ def test_M505_dvh_units_used():

 # ── M6xx — AirLLM backend interface contract ──────────────────────────────────

+
 def test_M601_airllm_agent_has_run_method():
    """TimmyAirLLMAgent must expose run() so the dashboard route can call it."""
    from timmy.backends import TimmyAirLLMAgent
+
    assert hasattr(TimmyAirLLMAgent, "run"), (
        "TimmyAirLLMAgent is missing run() — dashboard will fail with AirLLM backend"
    )
@@ -225,6 +233,7 @@ def test_M602_airllm_run_returns_content_attribute():
    """run() must return an object with a .content attribute (Agno RunResponse compat)."""
    with patch("timmy.backends.is_apple_silicon", return_value=False):
        from timmy.backends import TimmyAirLLMAgent
+
        agent = TimmyAirLLMAgent(model_size="8b")

    mock_model = MagicMock()
@@ -246,6 +255,7 @@ def test_M603_airllm_run_updates_history():
    """run() must update _history so multi-turn context is preserved."""
    with patch("timmy.backends.is_apple_silicon", return_value=False):
        from timmy.backends import TimmyAirLLMAgent
+
        agent = TimmyAirLLMAgent(model_size="8b")

    mock_model = MagicMock()
@@ -268,10 +278,13 @@ def test_M604_airllm_print_response_delegates_to_run():
    """print_response must use run() so both interfaces share one inference path."""
    with patch("timmy.backends.is_apple_silicon", return_value=False):
        from timmy.backends import TimmyAirLLMAgent, RunResult
+
        agent = TimmyAirLLMAgent(model_size="8b")

-    with patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run, \
-         patch.object(agent, "_render"):
+    with (
+        patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run,
+        patch.object(agent, "_render"),
+    ):
        agent.print_response("hello", stream=True)

    mock_run.assert_called_once_with("hello", stream=True)
@@ -279,24 +292,43 @@ def test_M604_airllm_print_response_delegates_to_run():

 def test_M605_health_status_passes_model_to_template(client):
    """Health status partial must receive the configured model name, not a hardcoded string."""
-    with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
+    with patch(
+        "dashboard.routes.health.check_ollama",
+        new_callable=AsyncMock,
+        return_value=True,
+    ):
        response = client.get("/health/status")
-    # The default model is llama3.2 — it should appear in the partial from settings, not hardcoded
+    # The default model is llama3.1:8b-instruct — it should appear from settings
    assert response.status_code == 200
-    assert "llama3.2" in response.text  # rendered via template variable, not hardcoded literal
+    assert (
+        "llama3.1" in response.text
+    )  # rendered via template variable, not hardcoded literal


 # ── M7xx — XSS prevention ─────────────────────────────────────────────────────

+
 def _mobile_html() -> str:
    """Read the mobile template source."""
-    path = Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "mobile.html"
+    path = (
+        Path(__file__).parent.parent.parent
+        / "src"
+        / "dashboard"
+        / "templates"
+        / "mobile.html"
+    )
    return path.read_text()


 def _swarm_live_html() -> str:
    """Read the swarm live template source."""
-    path = Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "swarm_live.html"
+    path = (
+        Path(__file__).parent.parent.parent
+        / "src"
+        / "dashboard"
+        / "templates"
+        / "swarm_live.html"
+    )
    return path.read_text()


@@ -324,7 +356,9 @@ def test_M702_mobile_chat_user_input_not_in_innerhtml_template_literal():
 def test_M703_swarm_live_agent_name_not_interpolated_in_innerhtml():
    """swarm_live.html must not put ${agent.name} inside innerHTML template literals."""
    html = _swarm_live_html()
-    blocks = re.findall(r"innerHTML\s*=\s*agents\.map\([^;]+\)\.join\([^)]*\)", html, re.DOTALL)
+    blocks = re.findall(
+        r"innerHTML\s*=\s*agents\.map\([^;]+\)\.join\([^)]*\)", html, re.DOTALL
+    )
    assert len(blocks) == 0, (
        "swarm_live.html still uses innerHTML=agents.map(…) with interpolated agent data — XSS vulnerability"
    )
--- a/tests/timmy/test_prompts.py
+++ b/tests/timmy/test_prompts.py
@@ -1,4 +1,4 @@
-from timmy.prompts import TIMMY_SYSTEM_PROMPT, TIMMY_STATUS_PROMPT
+from timmy.prompts import TIMMY_SYSTEM_PROMPT, TIMMY_STATUS_PROMPT, get_system_prompt


 def test_system_prompt_not_empty():
@@ -31,3 +31,10 @@ def test_status_prompt_has_timmy():

 def test_prompts_are_distinct():
    assert TIMMY_SYSTEM_PROMPT != TIMMY_STATUS_PROMPT
+
+
+def test_get_system_prompt_injects_model_name():
+    """System prompt should inject actual model name from config."""
+    prompt = get_system_prompt(tools_enabled=False)
+    # Should contain the model name from settings, not hardcoded
+    assert "llama3.1" in prompt or "qwen" in prompt or "{model_name}" in prompt