forked from Rockachopa/Timmy-time-dashboard
fix: chat evaluation bugs — task pipeline, prompt grounding, markdown rendering
Addresses 14 bugs from 3 rounds of deep chat evaluation:
- Add chat-to-task pipeline in agents.py with regex-based intent detection,
agent extraction, priority extraction, and title cleaning
- Filter meta-questions ("how do I create a task?") from task creation
- Inject real-time date/time context into every chat message
- Inject live queue state when user asks about tasks
- Ground system prompts with agent roster, honesty guardrails, self-knowledge,
math delegation template, anti-filler rules, values-conflict guidance
- Add CSS for markdown code blocks, inline code, lists, blockquotes in chat
- Add highlight.js CDN for syntax highlighting in chat responses
- Reduce small-model memory context budget (4000→2000) for expanded prompt
- Add 27 comprehensive tests covering the full chat-to-task pipeline
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -304,3 +304,244 @@ def test_api_approve_nonexistent(client):
|
||||
def test_api_veto_nonexistent(client):
|
||||
resp = client.patch("/api/tasks/nonexistent/veto")
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
# ── Chat-to-Task Pipeline Tests ──────────────────────────────────────────
|
||||
|
||||
|
||||
class TestExtractTaskFromMessage:
|
||||
"""Tests for _extract_task_from_message — queue intent detection."""
|
||||
|
||||
def test_add_to_queue(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Add refactor the login to the task queue")
|
||||
assert result is not None
|
||||
assert result["agent"] == "timmy"
|
||||
assert result["priority"] == "normal"
|
||||
|
||||
def test_schedule_this(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Schedule this for later")
|
||||
assert result is not None
|
||||
|
||||
def test_create_a_task(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Create a task to fix the login page")
|
||||
assert result is not None
|
||||
assert "title" in result
|
||||
|
||||
def test_normal_message_returns_none(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
assert _extract_task_from_message("Hello, how are you?") is None
|
||||
|
||||
def test_meta_question_about_tasks_returns_none(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
assert _extract_task_from_message("How do I create a task?") is None
|
||||
|
||||
def test_what_is_question_returns_none(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
assert _extract_task_from_message("What is a task queue?") is None
|
||||
|
||||
def test_explain_question_returns_none(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
assert _extract_task_from_message("Can you explain how to create a task?") is None
|
||||
|
||||
def test_what_would_question_returns_none(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
assert _extract_task_from_message("What would a task flow look like?") is None
|
||||
|
||||
|
||||
class TestExtractAgentFromMessage:
|
||||
"""Tests for _extract_agent_from_message."""
|
||||
|
||||
def test_extracts_forge(self):
|
||||
from dashboard.routes.agents import _extract_agent_from_message
|
||||
assert _extract_agent_from_message("Create a task for Forge to refactor") == "forge"
|
||||
|
||||
def test_extracts_echo(self):
|
||||
from dashboard.routes.agents import _extract_agent_from_message
|
||||
assert _extract_agent_from_message("Add research for Echo to the queue") == "echo"
|
||||
|
||||
def test_case_insensitive(self):
|
||||
from dashboard.routes.agents import _extract_agent_from_message
|
||||
assert _extract_agent_from_message("Schedule this for SEER") == "seer"
|
||||
|
||||
def test_defaults_to_timmy(self):
|
||||
from dashboard.routes.agents import _extract_agent_from_message
|
||||
assert _extract_agent_from_message("Create a task to fix the bug") == "timmy"
|
||||
|
||||
def test_ignores_unknown_agent(self):
|
||||
from dashboard.routes.agents import _extract_agent_from_message
|
||||
assert _extract_agent_from_message("Create a task for BobAgent") == "timmy"
|
||||
|
||||
|
||||
class TestExtractPriorityFromMessage:
|
||||
"""Tests for _extract_priority_from_message."""
|
||||
|
||||
def test_urgent(self):
|
||||
from dashboard.routes.agents import _extract_priority_from_message
|
||||
assert _extract_priority_from_message("urgent: fix the server") == "urgent"
|
||||
|
||||
def test_critical(self):
|
||||
from dashboard.routes.agents import _extract_priority_from_message
|
||||
assert _extract_priority_from_message("This is critical, do it now") == "urgent"
|
||||
|
||||
def test_asap(self):
|
||||
from dashboard.routes.agents import _extract_priority_from_message
|
||||
assert _extract_priority_from_message("Fix this ASAP") == "urgent"
|
||||
|
||||
def test_high_priority(self):
|
||||
from dashboard.routes.agents import _extract_priority_from_message
|
||||
assert _extract_priority_from_message("This is important work") == "high"
|
||||
|
||||
def test_low_priority(self):
|
||||
from dashboard.routes.agents import _extract_priority_from_message
|
||||
assert _extract_priority_from_message("minor cleanup task") == "low"
|
||||
|
||||
def test_default_normal(self):
|
||||
from dashboard.routes.agents import _extract_priority_from_message
|
||||
assert _extract_priority_from_message("Fix the login page") == "normal"
|
||||
|
||||
|
||||
class TestTitleCleaning:
|
||||
"""Tests for task title extraction and cleaning."""
|
||||
|
||||
def test_strips_agent_from_title(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Create a task for Forge to refactor the login")
|
||||
assert result is not None
|
||||
assert "forge" not in result["title"].lower()
|
||||
assert "for" not in result["title"].lower().split()[0:1] # "for" stripped
|
||||
|
||||
def test_strips_priority_from_title(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Create an urgent task to fix the server")
|
||||
assert result is not None
|
||||
assert "urgent" not in result["title"].lower()
|
||||
|
||||
def test_title_is_capitalized(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Add refactor the login to the task queue")
|
||||
assert result is not None
|
||||
assert result["title"][0].isupper()
|
||||
|
||||
def test_title_capped_at_120_chars(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
long_msg = "Create a task to " + "x" * 200
|
||||
result = _extract_task_from_message(long_msg)
|
||||
assert result is not None
|
||||
assert len(result["title"]) <= 120
|
||||
|
||||
|
||||
class TestFullExtraction:
|
||||
"""Tests for combined agent + priority + title extraction."""
|
||||
|
||||
def test_task_includes_agent_and_priority(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Create a high priority task for Forge to refactor auth")
|
||||
assert result is not None
|
||||
assert result["agent"] == "forge"
|
||||
assert result["priority"] == "high"
|
||||
assert result["description"] # original message preserved
|
||||
|
||||
def test_create_with_all_fields(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Add an urgent task for Mace to audit security to the queue")
|
||||
assert result is not None
|
||||
assert result["agent"] == "mace"
|
||||
assert result["priority"] == "urgent"
|
||||
|
||||
|
||||
# ── Integration: chat_timmy Route ─────────────────────────────────────────
|
||||
|
||||
|
||||
class TestChatTimmyIntegration:
|
||||
"""Integration tests for the /agents/timmy/chat route."""
|
||||
|
||||
def test_chat_creates_task_on_queue_request(self, client):
|
||||
resp = client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "Create a task to refactor the login module"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert "Task queued" in resp.text or "task" in resp.text.lower()
|
||||
|
||||
def test_chat_creates_task_with_agent(self, client):
|
||||
resp = client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "Add deploy monitoring for Helm to the task queue"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert "helm" in resp.text.lower() or "Task queued" in resp.text
|
||||
|
||||
def test_chat_creates_task_with_priority(self, client):
|
||||
resp = client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "Create an urgent task to fix the production server"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert "Task queued" in resp.text or "urgent" in resp.text.lower()
|
||||
|
||||
@patch("dashboard.routes.agents.timmy_chat")
|
||||
def test_chat_injects_datetime_context(self, mock_chat, client):
|
||||
mock_chat.return_value = "Hello there!"
|
||||
client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "Hello Timmy"},
|
||||
)
|
||||
mock_chat.assert_called_once()
|
||||
call_arg = mock_chat.call_args[0][0]
|
||||
assert "[System: Current date/time is" in call_arg
|
||||
|
||||
@patch("dashboard.routes.agents.timmy_chat")
|
||||
@patch("dashboard.routes.agents._build_queue_context")
|
||||
def test_chat_injects_queue_context_on_queue_query(self, mock_ctx, mock_chat, client):
|
||||
mock_ctx.return_value = "[System: Task queue — 3 pending approval, 1 running, 5 completed.]"
|
||||
mock_chat.return_value = "There are 3 tasks pending."
|
||||
client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "What tasks are in the queue?"},
|
||||
)
|
||||
mock_ctx.assert_called_once()
|
||||
mock_chat.assert_called_once()
|
||||
call_arg = mock_chat.call_args[0][0]
|
||||
assert "[System: Task queue" in call_arg
|
||||
|
||||
@patch("dashboard.routes.agents.timmy_chat")
|
||||
@patch("dashboard.routes.agents._build_queue_context")
|
||||
def test_chat_no_queue_context_for_normal_message(self, mock_ctx, mock_chat, client):
|
||||
mock_chat.return_value = "Hi!"
|
||||
client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "Tell me a joke"},
|
||||
)
|
||||
mock_ctx.assert_not_called()
|
||||
|
||||
@patch("dashboard.routes.agents.timmy_chat")
|
||||
def test_chat_normal_message_uses_timmy(self, mock_chat, client):
|
||||
mock_chat.return_value = "I'm doing well, thank you."
|
||||
resp = client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "How are you?"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
mock_chat.assert_called_once()
|
||||
|
||||
|
||||
class TestBuildQueueContext:
|
||||
"""Tests for _build_queue_context helper."""
|
||||
|
||||
def test_returns_string_with_counts(self):
|
||||
from dashboard.routes.agents import _build_queue_context
|
||||
from task_queue.models import create_task
|
||||
create_task(title="Context test task", created_by="test")
|
||||
ctx = _build_queue_context()
|
||||
assert "[System: Task queue" in ctx
|
||||
assert "pending" in ctx.lower()
|
||||
|
||||
def test_returns_empty_on_error(self):
|
||||
from dashboard.routes.agents import _build_queue_context
|
||||
with patch("task_queue.models.get_counts_by_status", side_effect=Exception("DB error")):
|
||||
ctx = _build_queue_context()
|
||||
assert isinstance(ctx, str)
|
||||
assert ctx == ""
|
||||
|
||||
Reference in New Issue
Block a user