fix: chat evaluation bugs — task pipeline, prompt grounding, markdown rendering

Addresses 14 bugs from 3 rounds of deep chat evaluation: - Add chat-to-task pipeline in agents.py with regex-based intent detection, agent extraction, priority extraction, and title cleaning - Filter meta-questions ("how do I create a task?") from task creation - Inject real-time date/time context into every chat message - Inject live queue state when user asks about tasks - Ground system prompts with agent roster, honesty guardrails, self-knowledge, math delegation template, anti-filler rules, values-conflict guidance - Add CSS for markdown code blocks, inline code, lists, blockquotes in chat - Add highlight.js CDN for syntax highlighting in chat responses - Reduce small-model memory context budget (4000→2000) for expanded prompt - Add 27 comprehensive tests covering the full chat-to-task pipeline Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 11:42:42 -05:00
parent 6c6b6f8a54
commit 3ca8e9f2d6
7 changed files with 526 additions and 10 deletions
--- a/src/dashboard/routes/agents.py
+++ b/src/dashboard/routes/agents.py
@@ -1,3 +1,5 @@
+import logging
+import re
 from datetime import datetime
 from pathlib import Path

@@ -8,9 +10,144 @@ from fastapi.templating import Jinja2Templates
 from timmy.session import chat as timmy_chat
 from dashboard.store import message_log

+logger = logging.getLogger(__name__)
+
 router = APIRouter(prefix="/agents", tags=["agents"])
 templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))

+# ── Task queue detection ──────────────────────────────────────────────────
+# Patterns that indicate the user wants to queue a task rather than chat
+_QUEUE_PATTERNS = [
+    re.compile(r"\b(?:add|put|schedule|queue|submit)\b.*\b(?:to the|on the|in the)?\s*(?:queue|task(?:\s*queue)?|task list)\b", re.IGNORECASE),
+    re.compile(r"\bschedule\s+(?:this|that|a)\b", re.IGNORECASE),
+    re.compile(r"\bcreate\s+(?:a\s+|an\s+)?(?:\w+\s+){0,3}task\b", re.IGNORECASE),
+]
+# Questions about tasks/queue should NOT trigger task creation
+_QUESTION_PREFIXES = re.compile(
+    r"^(?:what|how|why|can you explain|could you explain|tell me about|explain|"
+    r"what(?:'s| is| are| would))\b",
+    re.IGNORECASE,
+)
+_QUESTION_FRAMES = re.compile(
+    r"\b(?:how (?:do|does|would|can|should)|what (?:is|are|would)|"
+    r"can you (?:explain|describe|tell)|best way to)\b",
+    re.IGNORECASE,
+)
+
+# Known agent names for task assignment parsing
+_KNOWN_AGENTS = frozenset({
+    "timmy", "echo", "mace", "helm", "seer",
+    "forge", "quill", "pixel", "lyra", "reel",
+})
+_AGENT_PATTERN = re.compile(
+    r"\bfor\s+(" + "|".join(_KNOWN_AGENTS) + r")\b", re.IGNORECASE
+)
+
+# Priority keywords → task priority mapping
+_PRIORITY_MAP = {
+    "urgent": "urgent",
+    "critical": "urgent",
+    "asap": "urgent",
+    "emergency": "urgent",
+    "high priority": "high",
+    "high-priority": "high",
+    "important": "high",
+    "low priority": "low",
+    "low-priority": "low",
+    "minor": "low",
+}
+
+# Queue context detection
+_QUEUE_QUERY_PATTERN = re.compile(
+    r"\b(?:task(?:s|\s+queue)?|queue|what(?:'s| is) (?:in |on )?(?:the )?queue)\b",
+    re.IGNORECASE,
+)
+
+
+def _extract_agent_from_message(message: str) -> str:
+    """Extract target agent name from message, defaulting to 'timmy'."""
+    m = _AGENT_PATTERN.search(message)
+    if m:
+        return m.group(1).lower()
+    return "timmy"
+
+
+def _extract_priority_from_message(message: str) -> str:
+    """Extract priority level from message, defaulting to 'normal'."""
+    msg_lower = message.lower()
+    for keyword, priority in sorted(_PRIORITY_MAP.items(), key=lambda x: -len(x[0])):
+        if keyword in msg_lower:
+            return priority
+    return "normal"
+
+
+def _extract_task_from_message(message: str) -> dict | None:
+    """If the message looks like a task-queue request, return task details.
+
+    Returns None for meta-questions about tasks (e.g. "how do I create a task?").
+    """
+    if _QUESTION_PREFIXES.search(message) or _QUESTION_FRAMES.search(message):
+        return None
+    for pattern in _QUEUE_PATTERNS:
+        if pattern.search(message):
+            # Strip the queue instruction to get the actual task description
+            title = re.sub(
+                r"\b(?:add|put|schedule|queue|submit|create)\b.*?\b(?:to the|on the|in the|an?)?(?:\s+\w+){0,3}\s*(?:queue|task(?:\s*queue)?|task list)\b",
+                "", message, flags=re.IGNORECASE,
+            ).strip(" ,:;-")
+            # Strip "for {agent}" from title
+            title = _AGENT_PATTERN.sub("", title).strip(" ,:;-")
+            # Strip priority keywords from title
+            title = re.sub(
+                r"\b(?:urgent|critical|asap|emergency|high[- ]priority|important|low[- ]priority|minor)\b",
+                "", title, flags=re.IGNORECASE,
+            ).strip(" ,:;-")
+            # Strip leading "to " that often remains
+            title = re.sub(r"^to\s+", "", title, flags=re.IGNORECASE).strip()
+            # Clean up double spaces
+            title = re.sub(r"\s{2,}", " ", title).strip()
+            # Fallback to full message if stripping removed everything
+            if not title or len(title) < 5:
+                title = message
+            # Capitalize first letter
+            title = title[0].upper() + title[1:] if title else title
+            agent = _extract_agent_from_message(message)
+            priority = _extract_priority_from_message(message)
+            return {
+                "title": title[:120],
+                "description": message,
+                "agent": agent,
+                "priority": priority,
+            }
+    return None
+
+
+def _build_queue_context() -> str:
+    """Build a concise task queue summary for context injection."""
+    try:
+        from task_queue.models import get_counts_by_status, list_tasks, TaskStatus
+        counts = get_counts_by_status()
+        pending = counts.get("pending_approval", 0)
+        running = counts.get("running", 0)
+        completed = counts.get("completed", 0)
+
+        parts = [f"[System: Task queue — {pending} pending approval, {running} running, {completed} completed."]
+        if pending > 0:
+            tasks = list_tasks(status=TaskStatus.PENDING_APPROVAL, limit=5)
+            if tasks:
+                items = ", ".join(f'"{t.title}" ({t.assigned_to})' for t in tasks)
+                parts.append(f"Pending: {items}.")
+        if running > 0:
+            tasks = list_tasks(status=TaskStatus.RUNNING, limit=5)
+            if tasks:
+                items = ", ".join(f'"{t.title}" ({t.assigned_to})' for t in tasks)
+                parts.append(f"Running: {items}.")
+        return " ".join(parts) + "]"
+    except Exception as exc:
+        logger.debug("Failed to build queue context: %s", exc)
+        return ""
+
+
 # Static metadata for known agents — enriched onto live registry entries.
 _AGENT_METADATA: dict[str, dict] = {
    "timmy": {
@@ -74,10 +211,46 @@ async def chat_timmy(request: Request, message: str = Form(...)):
    response_text = None
    error_text = None

-    try:
-        response_text = timmy_chat(message)
-    except Exception as exc:
-        error_text = f"Timmy is offline: {exc}"
+    # Check if the user wants to queue a task instead of chatting
+    task_info = _extract_task_from_message(message)
+    if task_info:
+        try:
+            from task_queue.models import create_task
+            task = create_task(
+                title=task_info["title"],
+                description=task_info["description"],
+                created_by="user",
+                assigned_to=task_info.get("agent", "timmy"),
+                priority=task_info.get("priority", "normal"),
+                requires_approval=True,
+            )
+            priority_label = f" | Priority: `{task.priority.value}`" if task.priority.value != "normal" else ""
+            response_text = (
+                f"Task queued for approval: **{task.title}**\n\n"
+                f"Assigned to: `{task.assigned_to}`{priority_label} | "
+                f"Status: `{task.status.value}` | "
+                f"[View Task Queue](/tasks)"
+            )
+            logger.info("Chat → task queue: %s → %s (id=%s)", task.title, task.assigned_to, task.id)
+        except Exception as exc:
+            logger.error("Failed to create task from chat: %s", exc)
+            task_info = None
+
+    # Normal chat path (also used as fallback if task creation failed)
+    if not task_info:
+        try:
+            now = datetime.now()
+            context_parts = [
+                f"[System: Current date/time is {now.strftime('%A, %B %d, %Y at %I:%M %p')}]"
+            ]
+            if _QUEUE_QUERY_PATTERN.search(message):
+                queue_ctx = _build_queue_context()
+                if queue_ctx:
+                    context_parts.append(queue_ctx)
+            context_prefix = "\n".join(context_parts) + "\n\n"
+            response_text = timmy_chat(context_prefix + message)
+        except Exception as exc:
+            error_text = f"Timmy is offline: {exc}"

    message_log.append(role="user", content=message, timestamp=timestamp)
    if response_text is not None:
--- a/src/dashboard/templates/base.html
+++ b/src/dashboard/templates/base.html
@@ -16,6 +16,8 @@
  <script src="https://unpkg.com/htmx.org@2.0.3" integrity="sha384-0895/pl2MU10Hqc6jd4RvrthNlDiE9U1tWmX7WRESftEDRosgxNsQG/Ze9YMRzHq" crossorigin="anonymous"></script>
  <script src="https://cdn.jsdelivr.net/npm/marked@15.0.7/marked.min.js"></script>
  <script src="https://cdn.jsdelivr.net/npm/dompurify@3.2.4/dist/purify.min.js"></script>
+  <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.9.0/build/styles/github-dark.min.css" />
+  <script src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.9.0/build/highlight.min.js"></script>
 </head>
 <body>
  <header class="mc-header">
--- a/src/dashboard/templates/partials/chat_message.html
+++ b/src/dashboard/templates/partials/chat_message.html
@@ -12,6 +12,9 @@
    var el = document.currentScript.previousElementSibling.querySelector('.timmy-md');
    if (el && typeof marked !== 'undefined' && typeof DOMPurify !== 'undefined') {
      el.innerHTML = DOMPurify.sanitize(marked.parse(el.textContent));
+      if (typeof hljs !== 'undefined') {
+        el.querySelectorAll('pre code').forEach(function(block) { hljs.highlightElement(block); });
+      }
    }
  })();
 </script>
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -118,8 +118,9 @@ def create_timmy(
        from timmy.memory_system import memory_system
        memory_context = memory_system.get_system_context()
        if memory_context:
-            # Truncate if too long (keep under token limit)
-            max_context = 4000 if not use_tools else 8000
+            # Truncate if too long — smaller budget for small models
+            # since the expanded prompt (roster, guardrails) uses more tokens
+            max_context = 2000 if not use_tools else 8000
            if len(memory_context) > max_context:
                memory_context = memory_context[:max_context] + "\n... [truncated]"
            full_prompt = f"{base_prompt}\n\n## Memory Context\n\n{memory_context}"
--- a/src/timmy/prompts.py
+++ b/src/timmy/prompts.py
@@ -10,6 +10,8 @@ tool usage guidelines and memory system documentation.
 # ---------------------------------------------------------------------------

 TIMMY_SYSTEM_PROMPT_LITE = """You are Timmy — a sovereign AI agent running locally on this Mac.
+You run on the llama3.2 model via Ollama on localhost. You are not GPT, not Claude,
+not a custom model — you are llama3.2 wrapped in the Timmy agent framework.
 No cloud dependencies. Think clearly, speak plainly, act with intention.
 Grounded in Christian faith, powered by Bitcoin economics, committed to the
 user's digital sovereignty.
@@ -19,14 +21,42 @@ Rules:
 - Never mention tools, memory_search, vaults, or internal systems to the user.
 - Never output tool calls, JSON, or function syntax in your responses.
 - Remember what the user tells you during our conversation.
- If you don't know something, say so honestly.
+- If you don't know something, say so honestly — never fabricate facts.
+- If a request is ambiguous, ask a brief clarifying question before guessing.
 - Use the user's name if you know it.
 - When you state a fact, commit to it. Never contradict a correct statement you
  just made in the same response. If uncertain, express uncertainty at the start —
  never state something confidently and then immediately undermine it.
- NEVER attempt arithmetic in your head — LLMs are unreliable at multi-digit math.
-  If asked to compute anything (multiply, divide, square root, exponents, etc.),
-  tell the user you need a calculator tool to give an exact answer.
+- NEVER attempt arithmetic in your head. If asked to compute anything, respond:
+  "I'm not reliable at math without a calculator tool — let me know if you'd
+  like me to walk through the logic instead."
+- Do NOT end responses with generic chatbot phrases like "I'm here to help" or
+  "feel free to ask." Stay in character.
+- When your values conflict (e.g. honesty vs. helpfulness), lead with honesty.
+  Acknowledge the tension openly rather than defaulting to generic agreeableness.
+
+## Agent Roster (complete — no others exist)
+- Timmy: core sovereign AI (you)
+- Echo: research, summarization, fact-checking
+- Mace: security, monitoring, threat-analysis
+- Forge: coding, debugging, testing
+- Seer: analytics, visualization, prediction
+- Helm: devops, automation, configuration
+- Quill: writing, editing, documentation
+- Pixel: image-generation, storyboard, design
+- Lyra: music-generation, vocals, composition
+- Reel: video-generation, animation, motion
+Do NOT invent agents not listed here. If asked about an unlisted agent, say it doesn't exist.
+Use ONLY the capabilities listed above when describing agents — do not embellish or invent.
+
+## What you CAN and CANNOT access
+- You CANNOT query the live task queue, agent statuses, or system metrics on your own.
+- You CANNOT access real-time data without tools.
+- If asked about current tasks, agent status, or system state and no system context
+  is provided, say "I don't have live access to that — check the dashboard."
+- Your conversation history persists in a database across requests, but the
+  dashboard chat display resets on server restart.
+- Do NOT claim abilities you don't have. When uncertain, say "I don't know."

 Sir, affirmative."""

@@ -35,6 +65,8 @@ Sir, affirmative."""
 # ---------------------------------------------------------------------------

 TIMMY_SYSTEM_PROMPT_FULL = """You are Timmy — a sovereign AI agent running locally on this Mac.
+You run on the llama3.2 model via Ollama on localhost. You are not GPT, not Claude,
+not a custom model — you are llama3.2 wrapped in the Timmy agent framework.
 No cloud dependencies. You think clearly, speak plainly, act with intention.
 Grounded in Christian faith, powered by Bitcoin economics, committed to the
 user's digital sovereignty.
@@ -57,6 +89,28 @@ user's digital sovereignty.
 - Similarity-based retrieval
 - Use `memory_search` tool to find relevant past context

+## Agent Roster (complete — no others exist)
+- Timmy: core sovereign AI (you)
+- Echo: research, summarization, fact-checking
+- Mace: security, monitoring, threat-analysis
+- Forge: coding, debugging, testing
+- Seer: analytics, visualization, prediction
+- Helm: devops, automation, configuration
+- Quill: writing, editing, documentation
+- Pixel: image-generation, storyboard, design
+- Lyra: music-generation, vocals, composition
+- Reel: video-generation, animation, motion
+Do NOT invent agents not listed here. If asked about an unlisted agent, say it doesn't exist.
+Use ONLY the capabilities listed above when describing agents — do not embellish or invent.
+
+## What you CAN and CANNOT access
+- You CANNOT query the live task queue, agent statuses, or system metrics on your own.
+- If asked about current tasks, agent status, or system state and no system context
+  is provided, say "I don't have live access to that — check the dashboard."
+- Your conversation history persists in a database across requests, but the
+  dashboard chat display resets on server restart.
+- Do NOT claim abilities you don't have. When uncertain, say "I don't know."
+
 ## Tool Usage Guidelines

 ### When NOT to use tools:
@@ -81,9 +135,13 @@ user's digital sovereignty.
 - Never narrate your reasoning process. Just give the answer.
 - Never show raw tool call JSON or function syntax in responses.
 - Use the user's name if known.
+- If a request is ambiguous, ask a brief clarifying question before guessing.
 - When you state a fact, commit to it. Never contradict a correct statement you
  just made in the same response. If uncertain, express uncertainty at the start —
  never state something confidently and then immediately undermine it.
+- Do NOT end responses with generic chatbot phrases like "I'm here to help" or
+  "feel free to ask." Stay in character.
+- When your values conflict (e.g. honesty vs. helpfulness), lead with honesty.

 Sir, affirmative."""

--- a/static/style.css
+++ b/static/style.css
@@ -429,6 +429,44 @@ a:hover { color: var(--orange); }
 .chat-message.agent     .msg-body { border-left: 3px solid var(--purple); }
 .chat-message.error-msg .msg-body { border-left: 3px solid var(--red); color: var(--red); }

+/* ── Markdown rendering in Timmy chat ─────────────────── */
+.timmy-md { white-space: normal; }
+.timmy-md p { margin: 0 0 0.5em; }
+.timmy-md p:last-child { margin-bottom: 0; }
+.timmy-md pre {
+  background: #0d0620;
+  border: 1px solid var(--border);
+  border-radius: var(--radius-sm);
+  padding: 10px 12px;
+  overflow-x: auto;
+  margin: 0.5em 0;
+  white-space: pre;
+}
+.timmy-md code {
+  font-family: var(--font);
+  font-size: 0.9em;
+}
+.timmy-md :not(pre) > code {
+  background: rgba(168, 85, 247, 0.15);
+  padding: 2px 5px;
+  border-radius: 3px;
+  color: var(--text-bright);
+}
+.timmy-md ul, .timmy-md ol { padding-left: 1.5em; margin: 0.4em 0; }
+.timmy-md blockquote {
+  border-left: 3px solid var(--purple);
+  padding-left: 10px;
+  color: var(--text-dim);
+  margin: 0.5em 0;
+}
+.timmy-md h1, .timmy-md h2, .timmy-md h3 {
+  color: var(--text-bright);
+  margin: 0.6em 0 0.3em;
+  font-size: 1em;
+  font-weight: 700;
+}
+.timmy-md a { color: var(--purple); }
+
 /* Mobile chat classes (used by mobile.html) */
 .chat-container {
  overflow-y: auto;
--- a/tests/test_task_queue.py
+++ b/tests/test_task_queue.py
@@ -304,3 +304,244 @@ def test_api_approve_nonexistent(client):
 def test_api_veto_nonexistent(client):
    resp = client.patch("/api/tasks/nonexistent/veto")
    assert resp.status_code == 404
+
+
+# ── Chat-to-Task Pipeline Tests ──────────────────────────────────────────
+
+
+class TestExtractTaskFromMessage:
+    """Tests for _extract_task_from_message — queue intent detection."""
+
+    def test_add_to_queue(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        result = _extract_task_from_message("Add refactor the login to the task queue")
+        assert result is not None
+        assert result["agent"] == "timmy"
+        assert result["priority"] == "normal"
+
+    def test_schedule_this(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        result = _extract_task_from_message("Schedule this for later")
+        assert result is not None
+
+    def test_create_a_task(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        result = _extract_task_from_message("Create a task to fix the login page")
+        assert result is not None
+        assert "title" in result
+
+    def test_normal_message_returns_none(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        assert _extract_task_from_message("Hello, how are you?") is None
+
+    def test_meta_question_about_tasks_returns_none(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        assert _extract_task_from_message("How do I create a task?") is None
+
+    def test_what_is_question_returns_none(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        assert _extract_task_from_message("What is a task queue?") is None
+
+    def test_explain_question_returns_none(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        assert _extract_task_from_message("Can you explain how to create a task?") is None
+
+    def test_what_would_question_returns_none(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        assert _extract_task_from_message("What would a task flow look like?") is None
+
+
+class TestExtractAgentFromMessage:
+    """Tests for _extract_agent_from_message."""
+
+    def test_extracts_forge(self):
+        from dashboard.routes.agents import _extract_agent_from_message
+        assert _extract_agent_from_message("Create a task for Forge to refactor") == "forge"
+
+    def test_extracts_echo(self):
+        from dashboard.routes.agents import _extract_agent_from_message
+        assert _extract_agent_from_message("Add research for Echo to the queue") == "echo"
+
+    def test_case_insensitive(self):
+        from dashboard.routes.agents import _extract_agent_from_message
+        assert _extract_agent_from_message("Schedule this for SEER") == "seer"
+
+    def test_defaults_to_timmy(self):
+        from dashboard.routes.agents import _extract_agent_from_message
+        assert _extract_agent_from_message("Create a task to fix the bug") == "timmy"
+
+    def test_ignores_unknown_agent(self):
+        from dashboard.routes.agents import _extract_agent_from_message
+        assert _extract_agent_from_message("Create a task for BobAgent") == "timmy"
+
+
+class TestExtractPriorityFromMessage:
+    """Tests for _extract_priority_from_message."""
+
+    def test_urgent(self):
+        from dashboard.routes.agents import _extract_priority_from_message
+        assert _extract_priority_from_message("urgent: fix the server") == "urgent"
+
+    def test_critical(self):
+        from dashboard.routes.agents import _extract_priority_from_message
+        assert _extract_priority_from_message("This is critical, do it now") == "urgent"
+
+    def test_asap(self):
+        from dashboard.routes.agents import _extract_priority_from_message
+        assert _extract_priority_from_message("Fix this ASAP") == "urgent"
+
+    def test_high_priority(self):
+        from dashboard.routes.agents import _extract_priority_from_message
+        assert _extract_priority_from_message("This is important work") == "high"
+
+    def test_low_priority(self):
+        from dashboard.routes.agents import _extract_priority_from_message
+        assert _extract_priority_from_message("minor cleanup task") == "low"
+
+    def test_default_normal(self):
+        from dashboard.routes.agents import _extract_priority_from_message
+        assert _extract_priority_from_message("Fix the login page") == "normal"
+
+
+class TestTitleCleaning:
+    """Tests for task title extraction and cleaning."""
+
+    def test_strips_agent_from_title(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        result = _extract_task_from_message("Create a task for Forge to refactor the login")
+        assert result is not None
+        assert "forge" not in result["title"].lower()
+        assert "for" not in result["title"].lower().split()[0:1]  # "for" stripped
+
+    def test_strips_priority_from_title(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        result = _extract_task_from_message("Create an urgent task to fix the server")
+        assert result is not None
+        assert "urgent" not in result["title"].lower()
+
+    def test_title_is_capitalized(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        result = _extract_task_from_message("Add refactor the login to the task queue")
+        assert result is not None
+        assert result["title"][0].isupper()
+
+    def test_title_capped_at_120_chars(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        long_msg = "Create a task to " + "x" * 200
+        result = _extract_task_from_message(long_msg)
+        assert result is not None
+        assert len(result["title"]) <= 120
+
+
+class TestFullExtraction:
+    """Tests for combined agent + priority + title extraction."""
+
+    def test_task_includes_agent_and_priority(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        result = _extract_task_from_message("Create a high priority task for Forge to refactor auth")
+        assert result is not None
+        assert result["agent"] == "forge"
+        assert result["priority"] == "high"
+        assert result["description"]  # original message preserved
+
+    def test_create_with_all_fields(self):
+        from dashboard.routes.agents import _extract_task_from_message
+        result = _extract_task_from_message("Add an urgent task for Mace to audit security to the queue")
+        assert result is not None
+        assert result["agent"] == "mace"
+        assert result["priority"] == "urgent"
+
+
+# ── Integration: chat_timmy Route ─────────────────────────────────────────
+
+
+class TestChatTimmyIntegration:
+    """Integration tests for the /agents/timmy/chat route."""
+
+    def test_chat_creates_task_on_queue_request(self, client):
+        resp = client.post(
+            "/agents/timmy/chat",
+            data={"message": "Create a task to refactor the login module"},
+        )
+        assert resp.status_code == 200
+        assert "Task queued" in resp.text or "task" in resp.text.lower()
+
+    def test_chat_creates_task_with_agent(self, client):
+        resp = client.post(
+            "/agents/timmy/chat",
+            data={"message": "Add deploy monitoring for Helm to the task queue"},
+        )
+        assert resp.status_code == 200
+        assert "helm" in resp.text.lower() or "Task queued" in resp.text
+
+    def test_chat_creates_task_with_priority(self, client):
+        resp = client.post(
+            "/agents/timmy/chat",
+            data={"message": "Create an urgent task to fix the production server"},
+        )
+        assert resp.status_code == 200
+        assert "Task queued" in resp.text or "urgent" in resp.text.lower()
+
+    @patch("dashboard.routes.agents.timmy_chat")
+    def test_chat_injects_datetime_context(self, mock_chat, client):
+        mock_chat.return_value = "Hello there!"
+        client.post(
+            "/agents/timmy/chat",
+            data={"message": "Hello Timmy"},
+        )
+        mock_chat.assert_called_once()
+        call_arg = mock_chat.call_args[0][0]
+        assert "[System: Current date/time is" in call_arg
+
+    @patch("dashboard.routes.agents.timmy_chat")
+    @patch("dashboard.routes.agents._build_queue_context")
+    def test_chat_injects_queue_context_on_queue_query(self, mock_ctx, mock_chat, client):
+        mock_ctx.return_value = "[System: Task queue — 3 pending approval, 1 running, 5 completed.]"
+        mock_chat.return_value = "There are 3 tasks pending."
+        client.post(
+            "/agents/timmy/chat",
+            data={"message": "What tasks are in the queue?"},
+        )
+        mock_ctx.assert_called_once()
+        mock_chat.assert_called_once()
+        call_arg = mock_chat.call_args[0][0]
+        assert "[System: Task queue" in call_arg
+
+    @patch("dashboard.routes.agents.timmy_chat")
+    @patch("dashboard.routes.agents._build_queue_context")
+    def test_chat_no_queue_context_for_normal_message(self, mock_ctx, mock_chat, client):
+        mock_chat.return_value = "Hi!"
+        client.post(
+            "/agents/timmy/chat",
+            data={"message": "Tell me a joke"},
+        )
+        mock_ctx.assert_not_called()
+
+    @patch("dashboard.routes.agents.timmy_chat")
+    def test_chat_normal_message_uses_timmy(self, mock_chat, client):
+        mock_chat.return_value = "I'm doing well, thank you."
+        resp = client.post(
+            "/agents/timmy/chat",
+            data={"message": "How are you?"},
+        )
+        assert resp.status_code == 200
+        mock_chat.assert_called_once()
+
+
+class TestBuildQueueContext:
+    """Tests for _build_queue_context helper."""
+
+    def test_returns_string_with_counts(self):
+        from dashboard.routes.agents import _build_queue_context
+        from task_queue.models import create_task
+        create_task(title="Context test task", created_by="test")
+        ctx = _build_queue_context()
+        assert "[System: Task queue" in ctx
+        assert "pending" in ctx.lower()
+
+    def test_returns_empty_on_error(self):
+        from dashboard.routes.agents import _build_queue_context
+        with patch("task_queue.models.get_counts_by_status", side_effect=Exception("DB error")):
+            ctx = _build_queue_context()
+            assert isinstance(ctx, str)
+            assert ctx == ""