feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout - Replace fixed time.sleep() calls with intelligent polling or WebDriverWait - Add pytest-timeout dependency and --timeout=30 to prevent hangs - Fixes test flakiness and improves test suite speed * feat: add Aider AI tool to Forge's toolkit - Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist - Register tool in Forge's code toolkit - Add functional tests for the Aider tool * config: add opencode.json with local Ollama provider for sovereign AI * feat: Timmy fixes and improvements ## Bug Fixes - Fix read_file path resolution: add ~ expansion, proper relative path handling - Add repo_root to config.py with auto-detection from .git location - Fix hardcoded llama3.2 - now dynamic from settings.ollama_model ## Timmy's Requests - Add communication protocol to AGENTS.md (read context first, explain changes) - Create DECISIONS.md for architectural decision documentation - Add reasoning guidance to system prompts (step-by-step, state uncertainty) - Update tests to reflect correct model name (llama3.1:8b-instruct) ## Testing - All 177 dashboard tests pass - All 32 prompt/tool tests pass --------- Co-authored-by: Alexander Payne <apayne@MM.local>
2026-02-26 23:39:13 -05:00
parent 4ba272eb4f
commit 18ed6232f9
9 changed files with 307 additions and 75 deletions
--- a/tests/dashboard/test_dashboard.py
+++ b/tests/dashboard/test_dashboard.py
@@ -3,6 +3,7 @@ from unittest.mock import AsyncMock, patch

 # ── Index ─────────────────────────────────────────────────────────────────────

+
 def test_index_returns_200(client):
    response = client.get("/")
    assert response.status_code == 200
@@ -16,13 +17,18 @@ def test_index_contains_title(client):
 def test_index_contains_chat_interface(client):
    response = client.get("/")
    # Timmy panel loads dynamically via HTMX; verify the trigger attribute is present
-    assert "hx-get=\"/agents/timmy/panel\"" in response.text
+    assert 'hx-get="/agents/timmy/panel"' in response.text


 # ── Health ────────────────────────────────────────────────────────────────────

+
 def test_health_endpoint_ok(client):
-    with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
+    with patch(
+        "dashboard.routes.health.check_ollama",
+        new_callable=AsyncMock,
+        return_value=True,
+    ):
        response = client.get("/health")
    assert response.status_code == 200
    data = response.json()
@@ -32,21 +38,33 @@ def test_health_endpoint_ok(client):


 def test_health_endpoint_ollama_down(client):
-    with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=False):
+    with patch(
+        "dashboard.routes.health.check_ollama",
+        new_callable=AsyncMock,
+        return_value=False,
+    ):
        response = client.get("/health")
    assert response.status_code == 200
    assert response.json()["services"]["ollama"] == "down"


 def test_health_status_panel_ollama_up(client):
-    with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
+    with patch(
+        "dashboard.routes.health.check_ollama",
+        new_callable=AsyncMock,
+        return_value=True,
+    ):
        response = client.get("/health/status")
    assert response.status_code == 200
    assert "UP" in response.text


 def test_health_status_panel_ollama_down(client):
-    with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=False):
+    with patch(
+        "dashboard.routes.health.check_ollama",
+        new_callable=AsyncMock,
+        return_value=False,
+    ):
        response = client.get("/health/status")
    assert response.status_code == 200
    assert "DOWN" in response.text
@@ -54,6 +72,7 @@ def test_health_status_panel_ollama_down(client):

 # ── Agents ────────────────────────────────────────────────────────────────────

+
 def test_agents_list(client):
    response = client.get("/agents")
    assert response.status_code == 200
@@ -67,14 +86,18 @@ def test_agents_list_timmy_metadata(client):
    response = client.get("/agents")
    timmy = next(a for a in response.json()["agents"] if a["id"] == "timmy")
    assert timmy["name"] == "Timmy"
-    assert timmy["model"] == "llama3.2"
+    assert timmy["model"] == "llama3.1:8b-instruct"
    assert timmy["type"] == "sovereign"


 # ── Chat ──────────────────────────────────────────────────────────────────────

+
 def test_chat_timmy_success(client):
-    with patch("dashboard.routes.agents.timmy_chat", return_value="I am Timmy, operational and sovereign."):
+    with patch(
+        "dashboard.routes.agents.timmy_chat",
+        return_value="I am Timmy, operational and sovereign.",
+    ):
        response = client.post("/agents/timmy/chat", data={"message": "status?"})

    assert response.status_code == 200
@@ -90,7 +113,10 @@ def test_chat_timmy_shows_user_message(client):


 def test_chat_timmy_ollama_offline(client):
-    with patch("dashboard.routes.agents.timmy_chat", side_effect=Exception("connection refused")):
+    with patch(
+        "dashboard.routes.agents.timmy_chat",
+        side_effect=Exception("connection refused"),
+    ):
        response = client.post("/agents/timmy/chat", data={"message": "ping"})

    assert response.status_code == 200
@@ -105,6 +131,7 @@ def test_chat_timmy_requires_message(client):

 # ── History ────────────────────────────────────────────────────────────────────

+
 def test_history_empty_shows_init_message(client):
    response = client.get("/agents/timmy/history")
    assert response.status_code == 200
--- a/tests/dashboard/test_mobile_scenarios.py
+++ b/tests/dashboard/test_mobile_scenarios.py
@@ -20,6 +20,7 @@ from unittest.mock import AsyncMock, MagicMock, patch

 # ── helpers ───────────────────────────────────────────────────────────────────

+
 def _css() -> str:
    """Read the main stylesheet."""
    css_path = Path(__file__).parent.parent.parent / "static" / "style.css"
@@ -37,6 +38,7 @@ def _timmy_panel_html(client) -> str:

 # ── M1xx — Viewport & meta tags ───────────────────────────────────────────────

+
 def test_M101_viewport_meta_present(client):
    """viewport meta tag must exist for correct mobile scaling."""
    html = _index_html(client)
@@ -84,6 +86,7 @@ def test_M108_lang_attribute_on_html(client):

 # ── M2xx — Touch target sizing ────────────────────────────────────────────────

+
 def test_M201_send_button_min_height_44px():
    """SEND button must be at least 44 × 44 px — Apple HIG minimum."""
    css = _css()
@@ -111,6 +114,7 @@ def test_M204_touch_action_manipulation_on_buttons():

 # ── M3xx — iOS keyboard & zoom prevention ─────────────────────────────────────

+
 def test_M301_input_font_size_16px_in_mobile_query():
    """iOS Safari zooms in when input font-size < 16px.  Must be exactly 16px."""
    css = _css()
@@ -149,6 +153,7 @@ def test_M305_input_spellcheck_false(client):

 # ── M4xx — HTMX robustness ────────────────────────────────────────────────────

+
 def test_M401_form_hx_sync_drop(client):
    """hx-sync=this:drop discards duplicate submissions (fast double-tap)."""
    html = _timmy_panel_html(client)
@@ -181,6 +186,7 @@ def test_M405_chat_log_loads_history_on_boot(client):

 # ── M5xx — Safe-area / notch support ─────────────────────────────────────────

+
 def test_M501_safe_area_inset_top_in_header():
    """Header padding must accommodate the iPhone notch / status bar."""
    css = _css()
@@ -213,9 +219,11 @@ def test_M505_dvh_units_used():

 # ── M6xx — AirLLM backend interface contract ──────────────────────────────────

+
 def test_M601_airllm_agent_has_run_method():
    """TimmyAirLLMAgent must expose run() so the dashboard route can call it."""
    from timmy.backends import TimmyAirLLMAgent
+
    assert hasattr(TimmyAirLLMAgent, "run"), (
        "TimmyAirLLMAgent is missing run() — dashboard will fail with AirLLM backend"
    )
@@ -225,6 +233,7 @@ def test_M602_airllm_run_returns_content_attribute():
    """run() must return an object with a .content attribute (Agno RunResponse compat)."""
    with patch("timmy.backends.is_apple_silicon", return_value=False):
        from timmy.backends import TimmyAirLLMAgent
+
        agent = TimmyAirLLMAgent(model_size="8b")

    mock_model = MagicMock()
@@ -246,6 +255,7 @@ def test_M603_airllm_run_updates_history():
    """run() must update _history so multi-turn context is preserved."""
    with patch("timmy.backends.is_apple_silicon", return_value=False):
        from timmy.backends import TimmyAirLLMAgent
+
        agent = TimmyAirLLMAgent(model_size="8b")

    mock_model = MagicMock()
@@ -268,10 +278,13 @@ def test_M604_airllm_print_response_delegates_to_run():
    """print_response must use run() so both interfaces share one inference path."""
    with patch("timmy.backends.is_apple_silicon", return_value=False):
        from timmy.backends import TimmyAirLLMAgent, RunResult
+
        agent = TimmyAirLLMAgent(model_size="8b")

-    with patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run, \
-         patch.object(agent, "_render"):
+    with (
+        patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run,
+        patch.object(agent, "_render"),
+    ):
        agent.print_response("hello", stream=True)

    mock_run.assert_called_once_with("hello", stream=True)
@@ -279,24 +292,43 @@ def test_M604_airllm_print_response_delegates_to_run():

 def test_M605_health_status_passes_model_to_template(client):
    """Health status partial must receive the configured model name, not a hardcoded string."""
-    with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
+    with patch(
+        "dashboard.routes.health.check_ollama",
+        new_callable=AsyncMock,
+        return_value=True,
+    ):
        response = client.get("/health/status")
-    # The default model is llama3.2 — it should appear in the partial from settings, not hardcoded
+    # The default model is llama3.1:8b-instruct — it should appear from settings
    assert response.status_code == 200
-    assert "llama3.2" in response.text  # rendered via template variable, not hardcoded literal
+    assert (
+        "llama3.1" in response.text
+    )  # rendered via template variable, not hardcoded literal


 # ── M7xx — XSS prevention ─────────────────────────────────────────────────────

+
 def _mobile_html() -> str:
    """Read the mobile template source."""
-    path = Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "mobile.html"
+    path = (
+        Path(__file__).parent.parent.parent
+        / "src"
+        / "dashboard"
+        / "templates"
+        / "mobile.html"
+    )
    return path.read_text()


 def _swarm_live_html() -> str:
    """Read the swarm live template source."""
-    path = Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "swarm_live.html"
+    path = (
+        Path(__file__).parent.parent.parent
+        / "src"
+        / "dashboard"
+        / "templates"
+        / "swarm_live.html"
+    )
    return path.read_text()


@@ -324,7 +356,9 @@ def test_M702_mobile_chat_user_input_not_in_innerhtml_template_literal():
 def test_M703_swarm_live_agent_name_not_interpolated_in_innerhtml():
    """swarm_live.html must not put ${agent.name} inside innerHTML template literals."""
    html = _swarm_live_html()
-    blocks = re.findall(r"innerHTML\s*=\s*agents\.map\([^;]+\)\.join\([^)]*\)", html, re.DOTALL)
+    blocks = re.findall(
+        r"innerHTML\s*=\s*agents\.map\([^;]+\)\.join\([^)]*\)", html, re.DOTALL
+    )
    assert len(blocks) == 0, (
        "swarm_live.html still uses innerHTML=agents.map(…) with interpolated agent data — XSS vulnerability"
    )
--- a/tests/timmy/test_prompts.py
+++ b/tests/timmy/test_prompts.py
@@ -1,4 +1,4 @@
-from timmy.prompts import TIMMY_SYSTEM_PROMPT, TIMMY_STATUS_PROMPT
+from timmy.prompts import TIMMY_SYSTEM_PROMPT, TIMMY_STATUS_PROMPT, get_system_prompt


 def test_system_prompt_not_empty():
@@ -31,3 +31,10 @@ def test_status_prompt_has_timmy():

 def test_prompts_are_distinct():
    assert TIMMY_SYSTEM_PROMPT != TIMMY_STATUS_PROMPT
+
+
+def test_get_system_prompt_injects_model_name():
+    """System prompt should inject actual model name from config."""
+    prompt = get_system_prompt(tools_enabled=False)
+    # Should contain the model name from settings, not hardcoded
+    assert "llama3.1" in prompt or "qwen" in prompt or "{model_name}" in prompt