feat: Timmy fixes and improvements (#72)

* test: remove hardcoded sleeps, add pytest-timeout

- Replace fixed time.sleep() calls with intelligent polling or WebDriverWait
- Add pytest-timeout dependency and --timeout=30 to prevent hangs
- Fixes test flakiness and improves test suite speed

* feat: add Aider AI tool to Forge's toolkit

- Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist
- Register tool in Forge's code toolkit
- Add functional tests for the Aider tool

* config: add opencode.json with local Ollama provider for sovereign AI

* feat: Timmy fixes and improvements

## Bug Fixes
- Fix read_file path resolution: add ~ expansion, proper relative path handling
- Add repo_root to config.py with auto-detection from .git location
- Fix hardcoded llama3.2 - now dynamic from settings.ollama_model

## Timmy's Requests
- Add communication protocol to AGENTS.md (read context first, explain changes)
- Create DECISIONS.md for architectural decision documentation
- Add reasoning guidance to system prompts (step-by-step, state uncertainty)
- Update tests to reflect correct model name (llama3.1:8b-instruct)

## Testing
- All 177 dashboard tests pass
- All 32 prompt/tool tests pass

---------

Co-authored-by: Alexander Payne <apayne@MM.local>
This commit is contained in:
Alexander Whitestone
2026-02-26 23:39:13 -05:00
committed by GitHub
parent 4ba272eb4f
commit 18ed6232f9
9 changed files with 307 additions and 75 deletions

View File

@@ -3,6 +3,7 @@ from unittest.mock import AsyncMock, patch
# ── Index ─────────────────────────────────────────────────────────────────────
def test_index_returns_200(client):
response = client.get("/")
assert response.status_code == 200
@@ -16,13 +17,18 @@ def test_index_contains_title(client):
def test_index_contains_chat_interface(client):
response = client.get("/")
# Timmy panel loads dynamically via HTMX; verify the trigger attribute is present
assert "hx-get=\"/agents/timmy/panel\"" in response.text
assert 'hx-get="/agents/timmy/panel"' in response.text
# ── Health ────────────────────────────────────────────────────────────────────
def test_health_endpoint_ok(client):
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
with patch(
"dashboard.routes.health.check_ollama",
new_callable=AsyncMock,
return_value=True,
):
response = client.get("/health")
assert response.status_code == 200
data = response.json()
@@ -32,21 +38,33 @@ def test_health_endpoint_ok(client):
def test_health_endpoint_ollama_down(client):
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=False):
with patch(
"dashboard.routes.health.check_ollama",
new_callable=AsyncMock,
return_value=False,
):
response = client.get("/health")
assert response.status_code == 200
assert response.json()["services"]["ollama"] == "down"
def test_health_status_panel_ollama_up(client):
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
with patch(
"dashboard.routes.health.check_ollama",
new_callable=AsyncMock,
return_value=True,
):
response = client.get("/health/status")
assert response.status_code == 200
assert "UP" in response.text
def test_health_status_panel_ollama_down(client):
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=False):
with patch(
"dashboard.routes.health.check_ollama",
new_callable=AsyncMock,
return_value=False,
):
response = client.get("/health/status")
assert response.status_code == 200
assert "DOWN" in response.text
@@ -54,6 +72,7 @@ def test_health_status_panel_ollama_down(client):
# ── Agents ────────────────────────────────────────────────────────────────────
def test_agents_list(client):
response = client.get("/agents")
assert response.status_code == 200
@@ -67,14 +86,18 @@ def test_agents_list_timmy_metadata(client):
response = client.get("/agents")
timmy = next(a for a in response.json()["agents"] if a["id"] == "timmy")
assert timmy["name"] == "Timmy"
assert timmy["model"] == "llama3.2"
assert timmy["model"] == "llama3.1:8b-instruct"
assert timmy["type"] == "sovereign"
# ── Chat ──────────────────────────────────────────────────────────────────────
def test_chat_timmy_success(client):
with patch("dashboard.routes.agents.timmy_chat", return_value="I am Timmy, operational and sovereign."):
with patch(
"dashboard.routes.agents.timmy_chat",
return_value="I am Timmy, operational and sovereign.",
):
response = client.post("/agents/timmy/chat", data={"message": "status?"})
assert response.status_code == 200
@@ -90,7 +113,10 @@ def test_chat_timmy_shows_user_message(client):
def test_chat_timmy_ollama_offline(client):
with patch("dashboard.routes.agents.timmy_chat", side_effect=Exception("connection refused")):
with patch(
"dashboard.routes.agents.timmy_chat",
side_effect=Exception("connection refused"),
):
response = client.post("/agents/timmy/chat", data={"message": "ping"})
assert response.status_code == 200
@@ -105,6 +131,7 @@ def test_chat_timmy_requires_message(client):
# ── History ────────────────────────────────────────────────────────────────────
def test_history_empty_shows_init_message(client):
response = client.get("/agents/timmy/history")
assert response.status_code == 200

View File

@@ -20,6 +20,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
# ── helpers ───────────────────────────────────────────────────────────────────
def _css() -> str:
"""Read the main stylesheet."""
css_path = Path(__file__).parent.parent.parent / "static" / "style.css"
@@ -37,6 +38,7 @@ def _timmy_panel_html(client) -> str:
# ── M1xx — Viewport & meta tags ───────────────────────────────────────────────
def test_M101_viewport_meta_present(client):
"""viewport meta tag must exist for correct mobile scaling."""
html = _index_html(client)
@@ -84,6 +86,7 @@ def test_M108_lang_attribute_on_html(client):
# ── M2xx — Touch target sizing ────────────────────────────────────────────────
def test_M201_send_button_min_height_44px():
"""SEND button must be at least 44 × 44 px — Apple HIG minimum."""
css = _css()
@@ -111,6 +114,7 @@ def test_M204_touch_action_manipulation_on_buttons():
# ── M3xx — iOS keyboard & zoom prevention ─────────────────────────────────────
def test_M301_input_font_size_16px_in_mobile_query():
"""iOS Safari zooms in when input font-size < 16px. Must be exactly 16px."""
css = _css()
@@ -149,6 +153,7 @@ def test_M305_input_spellcheck_false(client):
# ── M4xx — HTMX robustness ────────────────────────────────────────────────────
def test_M401_form_hx_sync_drop(client):
"""hx-sync=this:drop discards duplicate submissions (fast double-tap)."""
html = _timmy_panel_html(client)
@@ -181,6 +186,7 @@ def test_M405_chat_log_loads_history_on_boot(client):
# ── M5xx — Safe-area / notch support ─────────────────────────────────────────
def test_M501_safe_area_inset_top_in_header():
"""Header padding must accommodate the iPhone notch / status bar."""
css = _css()
@@ -213,9 +219,11 @@ def test_M505_dvh_units_used():
# ── M6xx — AirLLM backend interface contract ──────────────────────────────────
def test_M601_airllm_agent_has_run_method():
"""TimmyAirLLMAgent must expose run() so the dashboard route can call it."""
from timmy.backends import TimmyAirLLMAgent
assert hasattr(TimmyAirLLMAgent, "run"), (
"TimmyAirLLMAgent is missing run() — dashboard will fail with AirLLM backend"
)
@@ -225,6 +233,7 @@ def test_M602_airllm_run_returns_content_attribute():
"""run() must return an object with a .content attribute (Agno RunResponse compat)."""
with patch("timmy.backends.is_apple_silicon", return_value=False):
from timmy.backends import TimmyAirLLMAgent
agent = TimmyAirLLMAgent(model_size="8b")
mock_model = MagicMock()
@@ -246,6 +255,7 @@ def test_M603_airllm_run_updates_history():
"""run() must update _history so multi-turn context is preserved."""
with patch("timmy.backends.is_apple_silicon", return_value=False):
from timmy.backends import TimmyAirLLMAgent
agent = TimmyAirLLMAgent(model_size="8b")
mock_model = MagicMock()
@@ -268,10 +278,13 @@ def test_M604_airllm_print_response_delegates_to_run():
"""print_response must use run() so both interfaces share one inference path."""
with patch("timmy.backends.is_apple_silicon", return_value=False):
from timmy.backends import TimmyAirLLMAgent, RunResult
agent = TimmyAirLLMAgent(model_size="8b")
with patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run, \
patch.object(agent, "_render"):
with (
patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run,
patch.object(agent, "_render"),
):
agent.print_response("hello", stream=True)
mock_run.assert_called_once_with("hello", stream=True)
@@ -279,24 +292,43 @@ def test_M604_airllm_print_response_delegates_to_run():
def test_M605_health_status_passes_model_to_template(client):
"""Health status partial must receive the configured model name, not a hardcoded string."""
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
with patch(
"dashboard.routes.health.check_ollama",
new_callable=AsyncMock,
return_value=True,
):
response = client.get("/health/status")
# The default model is llama3.2 — it should appear in the partial from settings, not hardcoded
# The default model is llama3.1:8b-instruct — it should appear from settings
assert response.status_code == 200
assert "llama3.2" in response.text # rendered via template variable, not hardcoded literal
assert (
"llama3.1" in response.text
) # rendered via template variable, not hardcoded literal
# ── M7xx — XSS prevention ─────────────────────────────────────────────────────
def _mobile_html() -> str:
"""Read the mobile template source."""
path = Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "mobile.html"
path = (
Path(__file__).parent.parent.parent
/ "src"
/ "dashboard"
/ "templates"
/ "mobile.html"
)
return path.read_text()
def _swarm_live_html() -> str:
"""Read the swarm live template source."""
path = Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "swarm_live.html"
path = (
Path(__file__).parent.parent.parent
/ "src"
/ "dashboard"
/ "templates"
/ "swarm_live.html"
)
return path.read_text()
@@ -324,7 +356,9 @@ def test_M702_mobile_chat_user_input_not_in_innerhtml_template_literal():
def test_M703_swarm_live_agent_name_not_interpolated_in_innerhtml():
"""swarm_live.html must not put ${agent.name} inside innerHTML template literals."""
html = _swarm_live_html()
blocks = re.findall(r"innerHTML\s*=\s*agents\.map\([^;]+\)\.join\([^)]*\)", html, re.DOTALL)
blocks = re.findall(
r"innerHTML\s*=\s*agents\.map\([^;]+\)\.join\([^)]*\)", html, re.DOTALL
)
assert len(blocks) == 0, (
"swarm_live.html still uses innerHTML=agents.map(…) with interpolated agent data — XSS vulnerability"
)

View File

@@ -1,4 +1,4 @@
from timmy.prompts import TIMMY_SYSTEM_PROMPT, TIMMY_STATUS_PROMPT
from timmy.prompts import TIMMY_SYSTEM_PROMPT, TIMMY_STATUS_PROMPT, get_system_prompt
def test_system_prompt_not_empty():
@@ -31,3 +31,10 @@ def test_status_prompt_has_timmy():
def test_prompts_are_distinct():
assert TIMMY_SYSTEM_PROMPT != TIMMY_STATUS_PROMPT
def test_get_system_prompt_injects_model_name():
"""System prompt should inject actual model name from config."""
prompt = get_system_prompt(tools_enabled=False)
# Should contain the model name from settings, not hardcoded
assert "llama3.1" in prompt or "qwen" in prompt or "{model_name}" in prompt