Files
Timmy-time-dashboard/tests/test_dashboard.py
Alexander Payne 26e1691099 Fix Timmy coherence: persistent session, model-aware tools, response sanitization
Timmy was exhibiting severe incoherence (no memory between messages, tool call
leakage, chain-of-thought narration, random tool invocations) due to creating
a brand new agent per HTTP request and giving a 3B model (llama3.2) a 73-line
system prompt with complex tool-calling instructions it couldn't follow.

Key changes:
- Add session.py singleton with stable session_id for conversation continuity
- Add _model_supports_tools() to strip tools from small models (< 7B)
- Add two-tier prompts: lite (12 lines) for small models, full for capable ones
- Add response sanitizer to strip leaked JSON tool calls and CoT narration
- Set show_tool_calls=False to prevent raw tool JSON in output
- Wire ConversationManager for user name extraction
- Deprecate orphaned memory_layers.py (unused 4-layer system)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 19:18:08 -05:00

149 lines
5.9 KiB
Python

from unittest.mock import AsyncMock, patch
# ── Index ─────────────────────────────────────────────────────────────────────
def test_index_returns_200(client):
response = client.get("/")
assert response.status_code == 200
def test_index_contains_title(client):
response = client.get("/")
assert "TIMMY TIME" in response.text
def test_index_contains_chat_interface(client):
response = client.get("/")
# Timmy panel loads dynamically via HTMX; verify the trigger attribute is present
assert "hx-get=\"/agents/timmy/panel\"" in response.text
# ── Health ────────────────────────────────────────────────────────────────────
def test_health_endpoint_ok(client):
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
response = client.get("/health")
assert response.status_code == 200
data = response.json()
assert data["status"] == "ok"
assert data["services"]["ollama"] == "up"
assert "timmy" in data["agents"]
def test_health_endpoint_ollama_down(client):
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=False):
response = client.get("/health")
assert response.status_code == 200
assert response.json()["services"]["ollama"] == "down"
def test_health_status_panel_ollama_up(client):
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=True):
response = client.get("/health/status")
assert response.status_code == 200
assert "UP" in response.text
def test_health_status_panel_ollama_down(client):
with patch("dashboard.routes.health.check_ollama", new_callable=AsyncMock, return_value=False):
response = client.get("/health/status")
assert response.status_code == 200
assert "DOWN" in response.text
# ── Agents ────────────────────────────────────────────────────────────────────
def test_agents_list(client):
response = client.get("/agents")
assert response.status_code == 200
data = response.json()
assert "agents" in data
ids = [a["id"] for a in data["agents"]]
assert "timmy" in ids
def test_agents_list_timmy_metadata(client):
response = client.get("/agents")
timmy = next(a for a in response.json()["agents"] if a["id"] == "timmy")
assert timmy["name"] == "Timmy"
assert timmy["model"] == "llama3.2"
assert timmy["type"] == "sovereign"
# ── Chat ──────────────────────────────────────────────────────────────────────
def test_chat_timmy_success(client):
with patch("dashboard.routes.agents.timmy_chat", return_value="I am Timmy, operational and sovereign."):
response = client.post("/agents/timmy/chat", data={"message": "status?"})
assert response.status_code == 200
assert "status?" in response.text
assert "I am Timmy" in response.text
def test_chat_timmy_shows_user_message(client):
with patch("dashboard.routes.agents.timmy_chat", return_value="Acknowledged."):
response = client.post("/agents/timmy/chat", data={"message": "hello there"})
assert "hello there" in response.text
def test_chat_timmy_ollama_offline(client):
with patch("dashboard.routes.agents.timmy_chat", side_effect=Exception("connection refused")):
response = client.post("/agents/timmy/chat", data={"message": "ping"})
assert response.status_code == 200
assert "Timmy is offline" in response.text
assert "ping" in response.text
def test_chat_timmy_requires_message(client):
response = client.post("/agents/timmy/chat", data={})
assert response.status_code == 422
# ── History ────────────────────────────────────────────────────────────────────
def test_history_empty_shows_init_message(client):
response = client.get("/agents/timmy/history")
assert response.status_code == 200
assert "Mission Control initialized" in response.text
def test_history_records_user_and_agent_messages(client):
with patch("dashboard.routes.agents.timmy_chat", return_value="I am operational."):
client.post("/agents/timmy/chat", data={"message": "status check"})
response = client.get("/agents/timmy/history")
assert "status check" in response.text
assert "I am operational." in response.text
def test_history_records_error_when_offline(client):
with patch("dashboard.routes.agents.timmy_chat", side_effect=Exception("refused")):
client.post("/agents/timmy/chat", data={"message": "ping"})
response = client.get("/agents/timmy/history")
assert "ping" in response.text
assert "Timmy is offline" in response.text
def test_history_clear_resets_to_init_message(client):
with patch("dashboard.routes.agents.timmy_chat", return_value="Acknowledged."):
client.post("/agents/timmy/chat", data={"message": "hello"})
response = client.delete("/agents/timmy/history")
assert response.status_code == 200
assert "Mission Control initialized" in response.text
def test_history_empty_after_clear(client):
with patch("dashboard.routes.agents.timmy_chat", return_value="OK."):
client.post("/agents/timmy/chat", data={"message": "test"})
client.delete("/agents/timmy/history")
response = client.get("/agents/timmy/history")
assert "test" not in response.text
assert "Mission Control initialized" in response.text