fix: chat evaluation bugs — task pipeline, prompt grounding, markdown rendering

Addresses 14 bugs from 3 rounds of deep chat evaluation:

- Add chat-to-task pipeline in agents.py with regex-based intent detection,
  agent extraction, priority extraction, and title cleaning
- Filter meta-questions ("how do I create a task?") from task creation
- Inject real-time date/time context into every chat message
- Inject live queue state when user asks about tasks
- Ground system prompts with agent roster, honesty guardrails, self-knowledge,
  math delegation template, anti-filler rules, values-conflict guidance
- Add CSS for markdown code blocks, inline code, lists, blockquotes in chat
- Add highlight.js CDN for syntax highlighting in chat responses
- Reduce small-model memory context budget (4000→2000) for expanded prompt
- Add 27 comprehensive tests covering the full chat-to-task pipeline

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alexander Payne
2026-02-26 11:42:42 -05:00
parent 6c6b6f8a54
commit 3ca8e9f2d6
7 changed files with 526 additions and 10 deletions

View File

@@ -1,3 +1,5 @@
import logging
import re
from datetime import datetime
from pathlib import Path
@@ -8,9 +10,144 @@ from fastapi.templating import Jinja2Templates
from timmy.session import chat as timmy_chat
from dashboard.store import message_log
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/agents", tags=["agents"])
templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))
# ── Task queue detection ──────────────────────────────────────────────────
# Patterns that indicate the user wants to queue a task rather than chat
_QUEUE_PATTERNS = [
re.compile(r"\b(?:add|put|schedule|queue|submit)\b.*\b(?:to the|on the|in the)?\s*(?:queue|task(?:\s*queue)?|task list)\b", re.IGNORECASE),
re.compile(r"\bschedule\s+(?:this|that|a)\b", re.IGNORECASE),
re.compile(r"\bcreate\s+(?:a\s+|an\s+)?(?:\w+\s+){0,3}task\b", re.IGNORECASE),
]
# Questions about tasks/queue should NOT trigger task creation
_QUESTION_PREFIXES = re.compile(
r"^(?:what|how|why|can you explain|could you explain|tell me about|explain|"
r"what(?:'s| is| are| would))\b",
re.IGNORECASE,
)
_QUESTION_FRAMES = re.compile(
r"\b(?:how (?:do|does|would|can|should)|what (?:is|are|would)|"
r"can you (?:explain|describe|tell)|best way to)\b",
re.IGNORECASE,
)
# Known agent names for task assignment parsing
_KNOWN_AGENTS = frozenset({
"timmy", "echo", "mace", "helm", "seer",
"forge", "quill", "pixel", "lyra", "reel",
})
_AGENT_PATTERN = re.compile(
r"\bfor\s+(" + "|".join(_KNOWN_AGENTS) + r")\b", re.IGNORECASE
)
# Priority keywords → task priority mapping
_PRIORITY_MAP = {
"urgent": "urgent",
"critical": "urgent",
"asap": "urgent",
"emergency": "urgent",
"high priority": "high",
"high-priority": "high",
"important": "high",
"low priority": "low",
"low-priority": "low",
"minor": "low",
}
# Queue context detection
_QUEUE_QUERY_PATTERN = re.compile(
r"\b(?:task(?:s|\s+queue)?|queue|what(?:'s| is) (?:in |on )?(?:the )?queue)\b",
re.IGNORECASE,
)
def _extract_agent_from_message(message: str) -> str:
"""Extract target agent name from message, defaulting to 'timmy'."""
m = _AGENT_PATTERN.search(message)
if m:
return m.group(1).lower()
return "timmy"
def _extract_priority_from_message(message: str) -> str:
"""Extract priority level from message, defaulting to 'normal'."""
msg_lower = message.lower()
for keyword, priority in sorted(_PRIORITY_MAP.items(), key=lambda x: -len(x[0])):
if keyword in msg_lower:
return priority
return "normal"
def _extract_task_from_message(message: str) -> dict | None:
"""If the message looks like a task-queue request, return task details.
Returns None for meta-questions about tasks (e.g. "how do I create a task?").
"""
if _QUESTION_PREFIXES.search(message) or _QUESTION_FRAMES.search(message):
return None
for pattern in _QUEUE_PATTERNS:
if pattern.search(message):
# Strip the queue instruction to get the actual task description
title = re.sub(
r"\b(?:add|put|schedule|queue|submit|create)\b.*?\b(?:to the|on the|in the|an?)?(?:\s+\w+){0,3}\s*(?:queue|task(?:\s*queue)?|task list)\b",
"", message, flags=re.IGNORECASE,
).strip(" ,:;-")
# Strip "for {agent}" from title
title = _AGENT_PATTERN.sub("", title).strip(" ,:;-")
# Strip priority keywords from title
title = re.sub(
r"\b(?:urgent|critical|asap|emergency|high[- ]priority|important|low[- ]priority|minor)\b",
"", title, flags=re.IGNORECASE,
).strip(" ,:;-")
# Strip leading "to " that often remains
title = re.sub(r"^to\s+", "", title, flags=re.IGNORECASE).strip()
# Clean up double spaces
title = re.sub(r"\s{2,}", " ", title).strip()
# Fallback to full message if stripping removed everything
if not title or len(title) < 5:
title = message
# Capitalize first letter
title = title[0].upper() + title[1:] if title else title
agent = _extract_agent_from_message(message)
priority = _extract_priority_from_message(message)
return {
"title": title[:120],
"description": message,
"agent": agent,
"priority": priority,
}
return None
def _build_queue_context() -> str:
"""Build a concise task queue summary for context injection."""
try:
from task_queue.models import get_counts_by_status, list_tasks, TaskStatus
counts = get_counts_by_status()
pending = counts.get("pending_approval", 0)
running = counts.get("running", 0)
completed = counts.get("completed", 0)
parts = [f"[System: Task queue — {pending} pending approval, {running} running, {completed} completed."]
if pending > 0:
tasks = list_tasks(status=TaskStatus.PENDING_APPROVAL, limit=5)
if tasks:
items = ", ".join(f'"{t.title}" ({t.assigned_to})' for t in tasks)
parts.append(f"Pending: {items}.")
if running > 0:
tasks = list_tasks(status=TaskStatus.RUNNING, limit=5)
if tasks:
items = ", ".join(f'"{t.title}" ({t.assigned_to})' for t in tasks)
parts.append(f"Running: {items}.")
return " ".join(parts) + "]"
except Exception as exc:
logger.debug("Failed to build queue context: %s", exc)
return ""
# Static metadata for known agents — enriched onto live registry entries.
_AGENT_METADATA: dict[str, dict] = {
"timmy": {
@@ -74,10 +211,46 @@ async def chat_timmy(request: Request, message: str = Form(...)):
response_text = None
error_text = None
try:
response_text = timmy_chat(message)
except Exception as exc:
error_text = f"Timmy is offline: {exc}"
# Check if the user wants to queue a task instead of chatting
task_info = _extract_task_from_message(message)
if task_info:
try:
from task_queue.models import create_task
task = create_task(
title=task_info["title"],
description=task_info["description"],
created_by="user",
assigned_to=task_info.get("agent", "timmy"),
priority=task_info.get("priority", "normal"),
requires_approval=True,
)
priority_label = f" | Priority: `{task.priority.value}`" if task.priority.value != "normal" else ""
response_text = (
f"Task queued for approval: **{task.title}**\n\n"
f"Assigned to: `{task.assigned_to}`{priority_label} | "
f"Status: `{task.status.value}` | "
f"[View Task Queue](/tasks)"
)
logger.info("Chat → task queue: %s%s (id=%s)", task.title, task.assigned_to, task.id)
except Exception as exc:
logger.error("Failed to create task from chat: %s", exc)
task_info = None
# Normal chat path (also used as fallback if task creation failed)
if not task_info:
try:
now = datetime.now()
context_parts = [
f"[System: Current date/time is {now.strftime('%A, %B %d, %Y at %I:%M %p')}]"
]
if _QUEUE_QUERY_PATTERN.search(message):
queue_ctx = _build_queue_context()
if queue_ctx:
context_parts.append(queue_ctx)
context_prefix = "\n".join(context_parts) + "\n\n"
response_text = timmy_chat(context_prefix + message)
except Exception as exc:
error_text = f"Timmy is offline: {exc}"
message_log.append(role="user", content=message, timestamp=timestamp)
if response_text is not None:

View File

@@ -16,6 +16,8 @@
<script src="https://unpkg.com/htmx.org@2.0.3" integrity="sha384-0895/pl2MU10Hqc6jd4RvrthNlDiE9U1tWmX7WRESftEDRosgxNsQG/Ze9YMRzHq" crossorigin="anonymous"></script>
<script src="https://cdn.jsdelivr.net/npm/marked@15.0.7/marked.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/dompurify@3.2.4/dist/purify.min.js"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.9.0/build/styles/github-dark.min.css" />
<script src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.9.0/build/highlight.min.js"></script>
</head>
<body>
<header class="mc-header">

View File

@@ -12,6 +12,9 @@
var el = document.currentScript.previousElementSibling.querySelector('.timmy-md');
if (el && typeof marked !== 'undefined' && typeof DOMPurify !== 'undefined') {
el.innerHTML = DOMPurify.sanitize(marked.parse(el.textContent));
if (typeof hljs !== 'undefined') {
el.querySelectorAll('pre code').forEach(function(block) { hljs.highlightElement(block); });
}
}
})();
</script>

View File

@@ -118,8 +118,9 @@ def create_timmy(
from timmy.memory_system import memory_system
memory_context = memory_system.get_system_context()
if memory_context:
# Truncate if too long (keep under token limit)
max_context = 4000 if not use_tools else 8000
# Truncate if too long — smaller budget for small models
# since the expanded prompt (roster, guardrails) uses more tokens
max_context = 2000 if not use_tools else 8000
if len(memory_context) > max_context:
memory_context = memory_context[:max_context] + "\n... [truncated]"
full_prompt = f"{base_prompt}\n\n## Memory Context\n\n{memory_context}"

View File

@@ -10,6 +10,8 @@ tool usage guidelines and memory system documentation.
# ---------------------------------------------------------------------------
TIMMY_SYSTEM_PROMPT_LITE = """You are Timmy — a sovereign AI agent running locally on this Mac.
You run on the llama3.2 model via Ollama on localhost. You are not GPT, not Claude,
not a custom model — you are llama3.2 wrapped in the Timmy agent framework.
No cloud dependencies. Think clearly, speak plainly, act with intention.
Grounded in Christian faith, powered by Bitcoin economics, committed to the
user's digital sovereignty.
@@ -19,14 +21,42 @@ Rules:
- Never mention tools, memory_search, vaults, or internal systems to the user.
- Never output tool calls, JSON, or function syntax in your responses.
- Remember what the user tells you during our conversation.
- If you don't know something, say so honestly.
- If you don't know something, say so honestly — never fabricate facts.
- If a request is ambiguous, ask a brief clarifying question before guessing.
- Use the user's name if you know it.
- When you state a fact, commit to it. Never contradict a correct statement you
just made in the same response. If uncertain, express uncertainty at the start —
never state something confidently and then immediately undermine it.
- NEVER attempt arithmetic in your head — LLMs are unreliable at multi-digit math.
If asked to compute anything (multiply, divide, square root, exponents, etc.),
tell the user you need a calculator tool to give an exact answer.
- NEVER attempt arithmetic in your head. If asked to compute anything, respond:
"I'm not reliable at math without a calculator tool — let me know if you'd
like me to walk through the logic instead."
- Do NOT end responses with generic chatbot phrases like "I'm here to help" or
"feel free to ask." Stay in character.
- When your values conflict (e.g. honesty vs. helpfulness), lead with honesty.
Acknowledge the tension openly rather than defaulting to generic agreeableness.
## Agent Roster (complete — no others exist)
- Timmy: core sovereign AI (you)
- Echo: research, summarization, fact-checking
- Mace: security, monitoring, threat-analysis
- Forge: coding, debugging, testing
- Seer: analytics, visualization, prediction
- Helm: devops, automation, configuration
- Quill: writing, editing, documentation
- Pixel: image-generation, storyboard, design
- Lyra: music-generation, vocals, composition
- Reel: video-generation, animation, motion
Do NOT invent agents not listed here. If asked about an unlisted agent, say it doesn't exist.
Use ONLY the capabilities listed above when describing agents — do not embellish or invent.
## What you CAN and CANNOT access
- You CANNOT query the live task queue, agent statuses, or system metrics on your own.
- You CANNOT access real-time data without tools.
- If asked about current tasks, agent status, or system state and no system context
is provided, say "I don't have live access to that — check the dashboard."
- Your conversation history persists in a database across requests, but the
dashboard chat display resets on server restart.
- Do NOT claim abilities you don't have. When uncertain, say "I don't know."
Sir, affirmative."""
@@ -35,6 +65,8 @@ Sir, affirmative."""
# ---------------------------------------------------------------------------
TIMMY_SYSTEM_PROMPT_FULL = """You are Timmy — a sovereign AI agent running locally on this Mac.
You run on the llama3.2 model via Ollama on localhost. You are not GPT, not Claude,
not a custom model — you are llama3.2 wrapped in the Timmy agent framework.
No cloud dependencies. You think clearly, speak plainly, act with intention.
Grounded in Christian faith, powered by Bitcoin economics, committed to the
user's digital sovereignty.
@@ -57,6 +89,28 @@ user's digital sovereignty.
- Similarity-based retrieval
- Use `memory_search` tool to find relevant past context
## Agent Roster (complete — no others exist)
- Timmy: core sovereign AI (you)
- Echo: research, summarization, fact-checking
- Mace: security, monitoring, threat-analysis
- Forge: coding, debugging, testing
- Seer: analytics, visualization, prediction
- Helm: devops, automation, configuration
- Quill: writing, editing, documentation
- Pixel: image-generation, storyboard, design
- Lyra: music-generation, vocals, composition
- Reel: video-generation, animation, motion
Do NOT invent agents not listed here. If asked about an unlisted agent, say it doesn't exist.
Use ONLY the capabilities listed above when describing agents — do not embellish or invent.
## What you CAN and CANNOT access
- You CANNOT query the live task queue, agent statuses, or system metrics on your own.
- If asked about current tasks, agent status, or system state and no system context
is provided, say "I don't have live access to that — check the dashboard."
- Your conversation history persists in a database across requests, but the
dashboard chat display resets on server restart.
- Do NOT claim abilities you don't have. When uncertain, say "I don't know."
## Tool Usage Guidelines
### When NOT to use tools:
@@ -81,9 +135,13 @@ user's digital sovereignty.
- Never narrate your reasoning process. Just give the answer.
- Never show raw tool call JSON or function syntax in responses.
- Use the user's name if known.
- If a request is ambiguous, ask a brief clarifying question before guessing.
- When you state a fact, commit to it. Never contradict a correct statement you
just made in the same response. If uncertain, express uncertainty at the start —
never state something confidently and then immediately undermine it.
- Do NOT end responses with generic chatbot phrases like "I'm here to help" or
"feel free to ask." Stay in character.
- When your values conflict (e.g. honesty vs. helpfulness), lead with honesty.
Sir, affirmative."""

View File

@@ -429,6 +429,44 @@ a:hover { color: var(--orange); }
.chat-message.agent .msg-body { border-left: 3px solid var(--purple); }
.chat-message.error-msg .msg-body { border-left: 3px solid var(--red); color: var(--red); }
/* ── Markdown rendering in Timmy chat ─────────────────── */
.timmy-md { white-space: normal; }
.timmy-md p { margin: 0 0 0.5em; }
.timmy-md p:last-child { margin-bottom: 0; }
.timmy-md pre {
background: #0d0620;
border: 1px solid var(--border);
border-radius: var(--radius-sm);
padding: 10px 12px;
overflow-x: auto;
margin: 0.5em 0;
white-space: pre;
}
.timmy-md code {
font-family: var(--font);
font-size: 0.9em;
}
.timmy-md :not(pre) > code {
background: rgba(168, 85, 247, 0.15);
padding: 2px 5px;
border-radius: 3px;
color: var(--text-bright);
}
.timmy-md ul, .timmy-md ol { padding-left: 1.5em; margin: 0.4em 0; }
.timmy-md blockquote {
border-left: 3px solid var(--purple);
padding-left: 10px;
color: var(--text-dim);
margin: 0.5em 0;
}
.timmy-md h1, .timmy-md h2, .timmy-md h3 {
color: var(--text-bright);
margin: 0.6em 0 0.3em;
font-size: 1em;
font-weight: 700;
}
.timmy-md a { color: var(--purple); }
/* Mobile chat classes (used by mobile.html) */
.chat-container {
overflow-y: auto;

View File

@@ -304,3 +304,244 @@ def test_api_approve_nonexistent(client):
def test_api_veto_nonexistent(client):
resp = client.patch("/api/tasks/nonexistent/veto")
assert resp.status_code == 404
# ── Chat-to-Task Pipeline Tests ──────────────────────────────────────────
class TestExtractTaskFromMessage:
"""Tests for _extract_task_from_message — queue intent detection."""
def test_add_to_queue(self):
from dashboard.routes.agents import _extract_task_from_message
result = _extract_task_from_message("Add refactor the login to the task queue")
assert result is not None
assert result["agent"] == "timmy"
assert result["priority"] == "normal"
def test_schedule_this(self):
from dashboard.routes.agents import _extract_task_from_message
result = _extract_task_from_message("Schedule this for later")
assert result is not None
def test_create_a_task(self):
from dashboard.routes.agents import _extract_task_from_message
result = _extract_task_from_message("Create a task to fix the login page")
assert result is not None
assert "title" in result
def test_normal_message_returns_none(self):
from dashboard.routes.agents import _extract_task_from_message
assert _extract_task_from_message("Hello, how are you?") is None
def test_meta_question_about_tasks_returns_none(self):
from dashboard.routes.agents import _extract_task_from_message
assert _extract_task_from_message("How do I create a task?") is None
def test_what_is_question_returns_none(self):
from dashboard.routes.agents import _extract_task_from_message
assert _extract_task_from_message("What is a task queue?") is None
def test_explain_question_returns_none(self):
from dashboard.routes.agents import _extract_task_from_message
assert _extract_task_from_message("Can you explain how to create a task?") is None
def test_what_would_question_returns_none(self):
from dashboard.routes.agents import _extract_task_from_message
assert _extract_task_from_message("What would a task flow look like?") is None
class TestExtractAgentFromMessage:
"""Tests for _extract_agent_from_message."""
def test_extracts_forge(self):
from dashboard.routes.agents import _extract_agent_from_message
assert _extract_agent_from_message("Create a task for Forge to refactor") == "forge"
def test_extracts_echo(self):
from dashboard.routes.agents import _extract_agent_from_message
assert _extract_agent_from_message("Add research for Echo to the queue") == "echo"
def test_case_insensitive(self):
from dashboard.routes.agents import _extract_agent_from_message
assert _extract_agent_from_message("Schedule this for SEER") == "seer"
def test_defaults_to_timmy(self):
from dashboard.routes.agents import _extract_agent_from_message
assert _extract_agent_from_message("Create a task to fix the bug") == "timmy"
def test_ignores_unknown_agent(self):
from dashboard.routes.agents import _extract_agent_from_message
assert _extract_agent_from_message("Create a task for BobAgent") == "timmy"
class TestExtractPriorityFromMessage:
"""Tests for _extract_priority_from_message."""
def test_urgent(self):
from dashboard.routes.agents import _extract_priority_from_message
assert _extract_priority_from_message("urgent: fix the server") == "urgent"
def test_critical(self):
from dashboard.routes.agents import _extract_priority_from_message
assert _extract_priority_from_message("This is critical, do it now") == "urgent"
def test_asap(self):
from dashboard.routes.agents import _extract_priority_from_message
assert _extract_priority_from_message("Fix this ASAP") == "urgent"
def test_high_priority(self):
from dashboard.routes.agents import _extract_priority_from_message
assert _extract_priority_from_message("This is important work") == "high"
def test_low_priority(self):
from dashboard.routes.agents import _extract_priority_from_message
assert _extract_priority_from_message("minor cleanup task") == "low"
def test_default_normal(self):
from dashboard.routes.agents import _extract_priority_from_message
assert _extract_priority_from_message("Fix the login page") == "normal"
class TestTitleCleaning:
"""Tests for task title extraction and cleaning."""
def test_strips_agent_from_title(self):
from dashboard.routes.agents import _extract_task_from_message
result = _extract_task_from_message("Create a task for Forge to refactor the login")
assert result is not None
assert "forge" not in result["title"].lower()
assert "for" not in result["title"].lower().split()[0:1] # "for" stripped
def test_strips_priority_from_title(self):
from dashboard.routes.agents import _extract_task_from_message
result = _extract_task_from_message("Create an urgent task to fix the server")
assert result is not None
assert "urgent" not in result["title"].lower()
def test_title_is_capitalized(self):
from dashboard.routes.agents import _extract_task_from_message
result = _extract_task_from_message("Add refactor the login to the task queue")
assert result is not None
assert result["title"][0].isupper()
def test_title_capped_at_120_chars(self):
from dashboard.routes.agents import _extract_task_from_message
long_msg = "Create a task to " + "x" * 200
result = _extract_task_from_message(long_msg)
assert result is not None
assert len(result["title"]) <= 120
class TestFullExtraction:
"""Tests for combined agent + priority + title extraction."""
def test_task_includes_agent_and_priority(self):
from dashboard.routes.agents import _extract_task_from_message
result = _extract_task_from_message("Create a high priority task for Forge to refactor auth")
assert result is not None
assert result["agent"] == "forge"
assert result["priority"] == "high"
assert result["description"] # original message preserved
def test_create_with_all_fields(self):
from dashboard.routes.agents import _extract_task_from_message
result = _extract_task_from_message("Add an urgent task for Mace to audit security to the queue")
assert result is not None
assert result["agent"] == "mace"
assert result["priority"] == "urgent"
# ── Integration: chat_timmy Route ─────────────────────────────────────────
class TestChatTimmyIntegration:
"""Integration tests for the /agents/timmy/chat route."""
def test_chat_creates_task_on_queue_request(self, client):
resp = client.post(
"/agents/timmy/chat",
data={"message": "Create a task to refactor the login module"},
)
assert resp.status_code == 200
assert "Task queued" in resp.text or "task" in resp.text.lower()
def test_chat_creates_task_with_agent(self, client):
resp = client.post(
"/agents/timmy/chat",
data={"message": "Add deploy monitoring for Helm to the task queue"},
)
assert resp.status_code == 200
assert "helm" in resp.text.lower() or "Task queued" in resp.text
def test_chat_creates_task_with_priority(self, client):
resp = client.post(
"/agents/timmy/chat",
data={"message": "Create an urgent task to fix the production server"},
)
assert resp.status_code == 200
assert "Task queued" in resp.text or "urgent" in resp.text.lower()
@patch("dashboard.routes.agents.timmy_chat")
def test_chat_injects_datetime_context(self, mock_chat, client):
mock_chat.return_value = "Hello there!"
client.post(
"/agents/timmy/chat",
data={"message": "Hello Timmy"},
)
mock_chat.assert_called_once()
call_arg = mock_chat.call_args[0][0]
assert "[System: Current date/time is" in call_arg
@patch("dashboard.routes.agents.timmy_chat")
@patch("dashboard.routes.agents._build_queue_context")
def test_chat_injects_queue_context_on_queue_query(self, mock_ctx, mock_chat, client):
mock_ctx.return_value = "[System: Task queue — 3 pending approval, 1 running, 5 completed.]"
mock_chat.return_value = "There are 3 tasks pending."
client.post(
"/agents/timmy/chat",
data={"message": "What tasks are in the queue?"},
)
mock_ctx.assert_called_once()
mock_chat.assert_called_once()
call_arg = mock_chat.call_args[0][0]
assert "[System: Task queue" in call_arg
@patch("dashboard.routes.agents.timmy_chat")
@patch("dashboard.routes.agents._build_queue_context")
def test_chat_no_queue_context_for_normal_message(self, mock_ctx, mock_chat, client):
mock_chat.return_value = "Hi!"
client.post(
"/agents/timmy/chat",
data={"message": "Tell me a joke"},
)
mock_ctx.assert_not_called()
@patch("dashboard.routes.agents.timmy_chat")
def test_chat_normal_message_uses_timmy(self, mock_chat, client):
mock_chat.return_value = "I'm doing well, thank you."
resp = client.post(
"/agents/timmy/chat",
data={"message": "How are you?"},
)
assert resp.status_code == 200
mock_chat.assert_called_once()
class TestBuildQueueContext:
"""Tests for _build_queue_context helper."""
def test_returns_string_with_counts(self):
from dashboard.routes.agents import _build_queue_context
from task_queue.models import create_task
create_task(title="Context test task", created_by="test")
ctx = _build_queue_context()
assert "[System: Task queue" in ctx
assert "pending" in ctx.lower()
def test_returns_empty_on_error(self):
from dashboard.routes.agents import _build_queue_context
with patch("task_queue.models.get_counts_by_status", side_effect=Exception("DB error")):
ctx = _build_queue_context()
assert isinstance(ctx, str)
assert ctx == ""