forked from Rockachopa/Timmy-time-dashboard
feat: Timmy fixes and improvements (#72)
* test: remove hardcoded sleeps, add pytest-timeout - Replace fixed time.sleep() calls with intelligent polling or WebDriverWait - Add pytest-timeout dependency and --timeout=30 to prevent hangs - Fixes test flakiness and improves test suite speed * feat: add Aider AI tool to Forge's toolkit - Add Aider tool that calls local Ollama (qwen2.5:14b) for AI coding assist - Register tool in Forge's code toolkit - Add functional tests for the Aider tool * config: add opencode.json with local Ollama provider for sovereign AI * feat: Timmy fixes and improvements ## Bug Fixes - Fix read_file path resolution: add ~ expansion, proper relative path handling - Add repo_root to config.py with auto-detection from .git location - Fix hardcoded llama3.2 - now dynamic from settings.ollama_model ## Timmy's Requests - Add communication protocol to AGENTS.md (read context first, explain changes) - Create DECISIONS.md for architectural decision documentation - Add reasoning guidance to system prompts (step-by-step, state uncertainty) - Update tests to reflect correct model name (llama3.1:8b-instruct) ## Testing - All 177 dashboard tests pass - All 32 prompt/tool tests pass --------- Co-authored-by: Alexander Payne <apayne@MM.local>
This commit is contained in:
committed by
GitHub
parent
4ba272eb4f
commit
18ed6232f9
@@ -3,6 +3,7 @@ from unittest.mock import AsyncMock, patch
|
||||
|
||||
# ── Index ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_index_returns_200(client):
|
||||
response = client.get("/")
|
||||
assert response.status_code == 200
|
||||
@@ -16,13 +17,18 @@ def test_index_contains_title(client):
|
||||
def test_index_contains_chat_interface(client):
|
||||
response = client.get("/")
|
||||
# Timmy panel loads dynamically via HTMX; verify the trigger attribute is present
|
||||
assert "hx-get=\"/agents/timmy/panel\"" in response.text
|
||||
assert 'hx-get="/agents/timmy/panel"' in response.text
|
||||
|
||||
|
||||
# ── Health ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_health_endpoint_ok(client):
|
||||
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
|
||||
with patch(
|
||||
"dashboard.routes.health.check_ollama",
|
||||
new_callable=AsyncMock,
|
||||
return_value=True,
|
||||
):
|
||||
response = client.get("/health")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
@@ -32,21 +38,33 @@ def test_health_endpoint_ok(client):
|
||||
|
||||
|
||||
def test_health_endpoint_ollama_down(client):
|
||||
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=False):
|
||||
with patch(
|
||||
"dashboard.routes.health.check_ollama",
|
||||
new_callable=AsyncMock,
|
||||
return_value=False,
|
||||
):
|
||||
response = client.get("/health")
|
||||
assert response.status_code == 200
|
||||
assert response.json()["services"]["ollama"] == "down"
|
||||
|
||||
|
||||
def test_health_status_panel_ollama_up(client):
|
||||
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
|
||||
with patch(
|
||||
"dashboard.routes.health.check_ollama",
|
||||
new_callable=AsyncMock,
|
||||
return_value=True,
|
||||
):
|
||||
response = client.get("/health/status")
|
||||
assert response.status_code == 200
|
||||
assert "UP" in response.text
|
||||
|
||||
|
||||
def test_health_status_panel_ollama_down(client):
|
||||
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=False):
|
||||
with patch(
|
||||
"dashboard.routes.health.check_ollama",
|
||||
new_callable=AsyncMock,
|
||||
return_value=False,
|
||||
):
|
||||
response = client.get("/health/status")
|
||||
assert response.status_code == 200
|
||||
assert "DOWN" in response.text
|
||||
@@ -54,6 +72,7 @@ def test_health_status_panel_ollama_down(client):
|
||||
|
||||
# ── Agents ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_agents_list(client):
|
||||
response = client.get("/agents")
|
||||
assert response.status_code == 200
|
||||
@@ -67,14 +86,18 @@ def test_agents_list_timmy_metadata(client):
|
||||
response = client.get("/agents")
|
||||
timmy = next(a for a in response.json()["agents"] if a["id"] == "timmy")
|
||||
assert timmy["name"] == "Timmy"
|
||||
assert timmy["model"] == "llama3.2"
|
||||
assert timmy["model"] == "llama3.1:8b-instruct"
|
||||
assert timmy["type"] == "sovereign"
|
||||
|
||||
|
||||
# ── Chat ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_chat_timmy_success(client):
|
||||
with patch("dashboard.routes.agents.timmy_chat", return_value="I am Timmy, operational and sovereign."):
|
||||
with patch(
|
||||
"dashboard.routes.agents.timmy_chat",
|
||||
return_value="I am Timmy, operational and sovereign.",
|
||||
):
|
||||
response = client.post("/agents/timmy/chat", data={"message": "status?"})
|
||||
|
||||
assert response.status_code == 200
|
||||
@@ -90,7 +113,10 @@ def test_chat_timmy_shows_user_message(client):
|
||||
|
||||
|
||||
def test_chat_timmy_ollama_offline(client):
|
||||
with patch("dashboard.routes.agents.timmy_chat", side_effect=Exception("connection refused")):
|
||||
with patch(
|
||||
"dashboard.routes.agents.timmy_chat",
|
||||
side_effect=Exception("connection refused"),
|
||||
):
|
||||
response = client.post("/agents/timmy/chat", data={"message": "ping"})
|
||||
|
||||
assert response.status_code == 200
|
||||
@@ -105,6 +131,7 @@ def test_chat_timmy_requires_message(client):
|
||||
|
||||
# ── History ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_history_empty_shows_init_message(client):
|
||||
response = client.get("/agents/timmy/history")
|
||||
assert response.status_code == 200
|
||||
|
||||
@@ -20,6 +20,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
# ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _css() -> str:
|
||||
"""Read the main stylesheet."""
|
||||
css_path = Path(__file__).parent.parent.parent / "static" / "style.css"
|
||||
@@ -37,6 +38,7 @@ def _timmy_panel_html(client) -> str:
|
||||
|
||||
# ── M1xx — Viewport & meta tags ───────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_M101_viewport_meta_present(client):
|
||||
"""viewport meta tag must exist for correct mobile scaling."""
|
||||
html = _index_html(client)
|
||||
@@ -84,6 +86,7 @@ def test_M108_lang_attribute_on_html(client):
|
||||
|
||||
# ── M2xx — Touch target sizing ────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_M201_send_button_min_height_44px():
|
||||
"""SEND button must be at least 44 × 44 px — Apple HIG minimum."""
|
||||
css = _css()
|
||||
@@ -111,6 +114,7 @@ def test_M204_touch_action_manipulation_on_buttons():
|
||||
|
||||
# ── M3xx — iOS keyboard & zoom prevention ─────────────────────────────────────
|
||||
|
||||
|
||||
def test_M301_input_font_size_16px_in_mobile_query():
|
||||
"""iOS Safari zooms in when input font-size < 16px. Must be exactly 16px."""
|
||||
css = _css()
|
||||
@@ -149,6 +153,7 @@ def test_M305_input_spellcheck_false(client):
|
||||
|
||||
# ── M4xx — HTMX robustness ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_M401_form_hx_sync_drop(client):
|
||||
"""hx-sync=this:drop discards duplicate submissions (fast double-tap)."""
|
||||
html = _timmy_panel_html(client)
|
||||
@@ -181,6 +186,7 @@ def test_M405_chat_log_loads_history_on_boot(client):
|
||||
|
||||
# ── M5xx — Safe-area / notch support ─────────────────────────────────────────
|
||||
|
||||
|
||||
def test_M501_safe_area_inset_top_in_header():
|
||||
"""Header padding must accommodate the iPhone notch / status bar."""
|
||||
css = _css()
|
||||
@@ -213,9 +219,11 @@ def test_M505_dvh_units_used():
|
||||
|
||||
# ── M6xx — AirLLM backend interface contract ──────────────────────────────────
|
||||
|
||||
|
||||
def test_M601_airllm_agent_has_run_method():
|
||||
"""TimmyAirLLMAgent must expose run() so the dashboard route can call it."""
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
|
||||
assert hasattr(TimmyAirLLMAgent, "run"), (
|
||||
"TimmyAirLLMAgent is missing run() — dashboard will fail with AirLLM backend"
|
||||
)
|
||||
@@ -225,6 +233,7 @@ def test_M602_airllm_run_returns_content_attribute():
|
||||
"""run() must return an object with a .content attribute (Agno RunResponse compat)."""
|
||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
|
||||
agent = TimmyAirLLMAgent(model_size="8b")
|
||||
|
||||
mock_model = MagicMock()
|
||||
@@ -246,6 +255,7 @@ def test_M603_airllm_run_updates_history():
|
||||
"""run() must update _history so multi-turn context is preserved."""
|
||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
|
||||
agent = TimmyAirLLMAgent(model_size="8b")
|
||||
|
||||
mock_model = MagicMock()
|
||||
@@ -268,10 +278,13 @@ def test_M604_airllm_print_response_delegates_to_run():
|
||||
"""print_response must use run() so both interfaces share one inference path."""
|
||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
||||
from timmy.backends import TimmyAirLLMAgent, RunResult
|
||||
|
||||
agent = TimmyAirLLMAgent(model_size="8b")
|
||||
|
||||
with patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run, \
|
||||
patch.object(agent, "_render"):
|
||||
with (
|
||||
patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run,
|
||||
patch.object(agent, "_render"),
|
||||
):
|
||||
agent.print_response("hello", stream=True)
|
||||
|
||||
mock_run.assert_called_once_with("hello", stream=True)
|
||||
@@ -279,24 +292,43 @@ def test_M604_airllm_print_response_delegates_to_run():
|
||||
|
||||
def test_M605_health_status_passes_model_to_template(client):
|
||||
"""Health status partial must receive the configured model name, not a hardcoded string."""
|
||||
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
|
||||
with patch(
|
||||
"dashboard.routes.health.check_ollama",
|
||||
new_callable=AsyncMock,
|
||||
return_value=True,
|
||||
):
|
||||
response = client.get("/health/status")
|
||||
# The default model is llama3.2 — it should appear in the partial from settings, not hardcoded
|
||||
# The default model is llama3.1:8b-instruct — it should appear from settings
|
||||
assert response.status_code == 200
|
||||
assert "llama3.2" in response.text # rendered via template variable, not hardcoded literal
|
||||
assert (
|
||||
"llama3.1" in response.text
|
||||
) # rendered via template variable, not hardcoded literal
|
||||
|
||||
|
||||
# ── M7xx — XSS prevention ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _mobile_html() -> str:
|
||||
"""Read the mobile template source."""
|
||||
path = Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "mobile.html"
|
||||
path = (
|
||||
Path(__file__).parent.parent.parent
|
||||
/ "src"
|
||||
/ "dashboard"
|
||||
/ "templates"
|
||||
/ "mobile.html"
|
||||
)
|
||||
return path.read_text()
|
||||
|
||||
|
||||
def _swarm_live_html() -> str:
|
||||
"""Read the swarm live template source."""
|
||||
path = Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "swarm_live.html"
|
||||
path = (
|
||||
Path(__file__).parent.parent.parent
|
||||
/ "src"
|
||||
/ "dashboard"
|
||||
/ "templates"
|
||||
/ "swarm_live.html"
|
||||
)
|
||||
return path.read_text()
|
||||
|
||||
|
||||
@@ -324,7 +356,9 @@ def test_M702_mobile_chat_user_input_not_in_innerhtml_template_literal():
|
||||
def test_M703_swarm_live_agent_name_not_interpolated_in_innerhtml():
|
||||
"""swarm_live.html must not put ${agent.name} inside innerHTML template literals."""
|
||||
html = _swarm_live_html()
|
||||
blocks = re.findall(r"innerHTML\s*=\s*agents\.map\([^;]+\)\.join\([^)]*\)", html, re.DOTALL)
|
||||
blocks = re.findall(
|
||||
r"innerHTML\s*=\s*agents\.map\([^;]+\)\.join\([^)]*\)", html, re.DOTALL
|
||||
)
|
||||
assert len(blocks) == 0, (
|
||||
"swarm_live.html still uses innerHTML=agents.map(…) with interpolated agent data — XSS vulnerability"
|
||||
)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from timmy.prompts import TIMMY_SYSTEM_PROMPT, TIMMY_STATUS_PROMPT
|
||||
from timmy.prompts import TIMMY_SYSTEM_PROMPT, TIMMY_STATUS_PROMPT, get_system_prompt
|
||||
|
||||
|
||||
def test_system_prompt_not_empty():
|
||||
@@ -31,3 +31,10 @@ def test_status_prompt_has_timmy():
|
||||
|
||||
def test_prompts_are_distinct():
|
||||
assert TIMMY_SYSTEM_PROMPT != TIMMY_STATUS_PROMPT
|
||||
|
||||
|
||||
def test_get_system_prompt_injects_model_name():
|
||||
"""System prompt should inject actual model name from config."""
|
||||
prompt = get_system_prompt(tools_enabled=False)
|
||||
# Should contain the model name from settings, not hardcoded
|
||||
assert "llama3.1" in prompt or "qwen" in prompt or "{model_name}" in prompt
|
||||
|
||||
Reference in New Issue
Block a user