forked from Rockachopa/Timmy-time-dashboard
fix: chat evaluation bugs — task pipeline, prompt grounding, markdown rendering
Addresses 14 bugs from 3 rounds of deep chat evaluation:
- Add chat-to-task pipeline in agents.py with regex-based intent detection,
agent extraction, priority extraction, and title cleaning
- Filter meta-questions ("how do I create a task?") from task creation
- Inject real-time date/time context into every chat message
- Inject live queue state when user asks about tasks
- Ground system prompts with agent roster, honesty guardrails, self-knowledge,
math delegation template, anti-filler rules, values-conflict guidance
- Add CSS for markdown code blocks, inline code, lists, blockquotes in chat
- Add highlight.js CDN for syntax highlighting in chat responses
- Reduce small-model memory context budget (4000→2000) for expanded prompt
- Add 27 comprehensive tests covering the full chat-to-task pipeline
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
@@ -8,9 +10,144 @@ from fastapi.templating import Jinja2Templates
|
||||
from timmy.session import chat as timmy_chat
|
||||
from dashboard.store import message_log
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/agents", tags=["agents"])
|
||||
templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))
|
||||
|
||||
# ── Task queue detection ──────────────────────────────────────────────────
|
||||
# Patterns that indicate the user wants to queue a task rather than chat
|
||||
_QUEUE_PATTERNS = [
|
||||
re.compile(r"\b(?:add|put|schedule|queue|submit)\b.*\b(?:to the|on the|in the)?\s*(?:queue|task(?:\s*queue)?|task list)\b", re.IGNORECASE),
|
||||
re.compile(r"\bschedule\s+(?:this|that|a)\b", re.IGNORECASE),
|
||||
re.compile(r"\bcreate\s+(?:a\s+|an\s+)?(?:\w+\s+){0,3}task\b", re.IGNORECASE),
|
||||
]
|
||||
# Questions about tasks/queue should NOT trigger task creation
|
||||
_QUESTION_PREFIXES = re.compile(
|
||||
r"^(?:what|how|why|can you explain|could you explain|tell me about|explain|"
|
||||
r"what(?:'s| is| are| would))\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_QUESTION_FRAMES = re.compile(
|
||||
r"\b(?:how (?:do|does|would|can|should)|what (?:is|are|would)|"
|
||||
r"can you (?:explain|describe|tell)|best way to)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Known agent names for task assignment parsing
|
||||
_KNOWN_AGENTS = frozenset({
|
||||
"timmy", "echo", "mace", "helm", "seer",
|
||||
"forge", "quill", "pixel", "lyra", "reel",
|
||||
})
|
||||
_AGENT_PATTERN = re.compile(
|
||||
r"\bfor\s+(" + "|".join(_KNOWN_AGENTS) + r")\b", re.IGNORECASE
|
||||
)
|
||||
|
||||
# Priority keywords → task priority mapping
|
||||
_PRIORITY_MAP = {
|
||||
"urgent": "urgent",
|
||||
"critical": "urgent",
|
||||
"asap": "urgent",
|
||||
"emergency": "urgent",
|
||||
"high priority": "high",
|
||||
"high-priority": "high",
|
||||
"important": "high",
|
||||
"low priority": "low",
|
||||
"low-priority": "low",
|
||||
"minor": "low",
|
||||
}
|
||||
|
||||
# Queue context detection
|
||||
_QUEUE_QUERY_PATTERN = re.compile(
|
||||
r"\b(?:task(?:s|\s+queue)?|queue|what(?:'s| is) (?:in |on )?(?:the )?queue)\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _extract_agent_from_message(message: str) -> str:
|
||||
"""Extract target agent name from message, defaulting to 'timmy'."""
|
||||
m = _AGENT_PATTERN.search(message)
|
||||
if m:
|
||||
return m.group(1).lower()
|
||||
return "timmy"
|
||||
|
||||
|
||||
def _extract_priority_from_message(message: str) -> str:
|
||||
"""Extract priority level from message, defaulting to 'normal'."""
|
||||
msg_lower = message.lower()
|
||||
for keyword, priority in sorted(_PRIORITY_MAP.items(), key=lambda x: -len(x[0])):
|
||||
if keyword in msg_lower:
|
||||
return priority
|
||||
return "normal"
|
||||
|
||||
|
||||
def _extract_task_from_message(message: str) -> dict | None:
|
||||
"""If the message looks like a task-queue request, return task details.
|
||||
|
||||
Returns None for meta-questions about tasks (e.g. "how do I create a task?").
|
||||
"""
|
||||
if _QUESTION_PREFIXES.search(message) or _QUESTION_FRAMES.search(message):
|
||||
return None
|
||||
for pattern in _QUEUE_PATTERNS:
|
||||
if pattern.search(message):
|
||||
# Strip the queue instruction to get the actual task description
|
||||
title = re.sub(
|
||||
r"\b(?:add|put|schedule|queue|submit|create)\b.*?\b(?:to the|on the|in the|an?)?(?:\s+\w+){0,3}\s*(?:queue|task(?:\s*queue)?|task list)\b",
|
||||
"", message, flags=re.IGNORECASE,
|
||||
).strip(" ,:;-")
|
||||
# Strip "for {agent}" from title
|
||||
title = _AGENT_PATTERN.sub("", title).strip(" ,:;-")
|
||||
# Strip priority keywords from title
|
||||
title = re.sub(
|
||||
r"\b(?:urgent|critical|asap|emergency|high[- ]priority|important|low[- ]priority|minor)\b",
|
||||
"", title, flags=re.IGNORECASE,
|
||||
).strip(" ,:;-")
|
||||
# Strip leading "to " that often remains
|
||||
title = re.sub(r"^to\s+", "", title, flags=re.IGNORECASE).strip()
|
||||
# Clean up double spaces
|
||||
title = re.sub(r"\s{2,}", " ", title).strip()
|
||||
# Fallback to full message if stripping removed everything
|
||||
if not title or len(title) < 5:
|
||||
title = message
|
||||
# Capitalize first letter
|
||||
title = title[0].upper() + title[1:] if title else title
|
||||
agent = _extract_agent_from_message(message)
|
||||
priority = _extract_priority_from_message(message)
|
||||
return {
|
||||
"title": title[:120],
|
||||
"description": message,
|
||||
"agent": agent,
|
||||
"priority": priority,
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def _build_queue_context() -> str:
|
||||
"""Build a concise task queue summary for context injection."""
|
||||
try:
|
||||
from task_queue.models import get_counts_by_status, list_tasks, TaskStatus
|
||||
counts = get_counts_by_status()
|
||||
pending = counts.get("pending_approval", 0)
|
||||
running = counts.get("running", 0)
|
||||
completed = counts.get("completed", 0)
|
||||
|
||||
parts = [f"[System: Task queue — {pending} pending approval, {running} running, {completed} completed."]
|
||||
if pending > 0:
|
||||
tasks = list_tasks(status=TaskStatus.PENDING_APPROVAL, limit=5)
|
||||
if tasks:
|
||||
items = ", ".join(f'"{t.title}" ({t.assigned_to})' for t in tasks)
|
||||
parts.append(f"Pending: {items}.")
|
||||
if running > 0:
|
||||
tasks = list_tasks(status=TaskStatus.RUNNING, limit=5)
|
||||
if tasks:
|
||||
items = ", ".join(f'"{t.title}" ({t.assigned_to})' for t in tasks)
|
||||
parts.append(f"Running: {items}.")
|
||||
return " ".join(parts) + "]"
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to build queue context: %s", exc)
|
||||
return ""
|
||||
|
||||
|
||||
# Static metadata for known agents — enriched onto live registry entries.
|
||||
_AGENT_METADATA: dict[str, dict] = {
|
||||
"timmy": {
|
||||
@@ -74,10 +211,46 @@ async def chat_timmy(request: Request, message: str = Form(...)):
|
||||
response_text = None
|
||||
error_text = None
|
||||
|
||||
try:
|
||||
response_text = timmy_chat(message)
|
||||
except Exception as exc:
|
||||
error_text = f"Timmy is offline: {exc}"
|
||||
# Check if the user wants to queue a task instead of chatting
|
||||
task_info = _extract_task_from_message(message)
|
||||
if task_info:
|
||||
try:
|
||||
from task_queue.models import create_task
|
||||
task = create_task(
|
||||
title=task_info["title"],
|
||||
description=task_info["description"],
|
||||
created_by="user",
|
||||
assigned_to=task_info.get("agent", "timmy"),
|
||||
priority=task_info.get("priority", "normal"),
|
||||
requires_approval=True,
|
||||
)
|
||||
priority_label = f" | Priority: `{task.priority.value}`" if task.priority.value != "normal" else ""
|
||||
response_text = (
|
||||
f"Task queued for approval: **{task.title}**\n\n"
|
||||
f"Assigned to: `{task.assigned_to}`{priority_label} | "
|
||||
f"Status: `{task.status.value}` | "
|
||||
f"[View Task Queue](/tasks)"
|
||||
)
|
||||
logger.info("Chat → task queue: %s → %s (id=%s)", task.title, task.assigned_to, task.id)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to create task from chat: %s", exc)
|
||||
task_info = None
|
||||
|
||||
# Normal chat path (also used as fallback if task creation failed)
|
||||
if not task_info:
|
||||
try:
|
||||
now = datetime.now()
|
||||
context_parts = [
|
||||
f"[System: Current date/time is {now.strftime('%A, %B %d, %Y at %I:%M %p')}]"
|
||||
]
|
||||
if _QUEUE_QUERY_PATTERN.search(message):
|
||||
queue_ctx = _build_queue_context()
|
||||
if queue_ctx:
|
||||
context_parts.append(queue_ctx)
|
||||
context_prefix = "\n".join(context_parts) + "\n\n"
|
||||
response_text = timmy_chat(context_prefix + message)
|
||||
except Exception as exc:
|
||||
error_text = f"Timmy is offline: {exc}"
|
||||
|
||||
message_log.append(role="user", content=message, timestamp=timestamp)
|
||||
if response_text is not None:
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
<script src="https://unpkg.com/htmx.org@2.0.3" integrity="sha384-0895/pl2MU10Hqc6jd4RvrthNlDiE9U1tWmX7WRESftEDRosgxNsQG/Ze9YMRzHq" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked@15.0.7/marked.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/dompurify@3.2.4/dist/purify.min.js"></script>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.9.0/build/styles/github-dark.min.css" />
|
||||
<script src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.9.0/build/highlight.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<header class="mc-header">
|
||||
|
||||
@@ -12,6 +12,9 @@
|
||||
var el = document.currentScript.previousElementSibling.querySelector('.timmy-md');
|
||||
if (el && typeof marked !== 'undefined' && typeof DOMPurify !== 'undefined') {
|
||||
el.innerHTML = DOMPurify.sanitize(marked.parse(el.textContent));
|
||||
if (typeof hljs !== 'undefined') {
|
||||
el.querySelectorAll('pre code').forEach(function(block) { hljs.highlightElement(block); });
|
||||
}
|
||||
}
|
||||
})();
|
||||
</script>
|
||||
|
||||
@@ -118,8 +118,9 @@ def create_timmy(
|
||||
from timmy.memory_system import memory_system
|
||||
memory_context = memory_system.get_system_context()
|
||||
if memory_context:
|
||||
# Truncate if too long (keep under token limit)
|
||||
max_context = 4000 if not use_tools else 8000
|
||||
# Truncate if too long — smaller budget for small models
|
||||
# since the expanded prompt (roster, guardrails) uses more tokens
|
||||
max_context = 2000 if not use_tools else 8000
|
||||
if len(memory_context) > max_context:
|
||||
memory_context = memory_context[:max_context] + "\n... [truncated]"
|
||||
full_prompt = f"{base_prompt}\n\n## Memory Context\n\n{memory_context}"
|
||||
|
||||
@@ -10,6 +10,8 @@ tool usage guidelines and memory system documentation.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
TIMMY_SYSTEM_PROMPT_LITE = """You are Timmy — a sovereign AI agent running locally on this Mac.
|
||||
You run on the llama3.2 model via Ollama on localhost. You are not GPT, not Claude,
|
||||
not a custom model — you are llama3.2 wrapped in the Timmy agent framework.
|
||||
No cloud dependencies. Think clearly, speak plainly, act with intention.
|
||||
Grounded in Christian faith, powered by Bitcoin economics, committed to the
|
||||
user's digital sovereignty.
|
||||
@@ -19,14 +21,42 @@ Rules:
|
||||
- Never mention tools, memory_search, vaults, or internal systems to the user.
|
||||
- Never output tool calls, JSON, or function syntax in your responses.
|
||||
- Remember what the user tells you during our conversation.
|
||||
- If you don't know something, say so honestly.
|
||||
- If you don't know something, say so honestly — never fabricate facts.
|
||||
- If a request is ambiguous, ask a brief clarifying question before guessing.
|
||||
- Use the user's name if you know it.
|
||||
- When you state a fact, commit to it. Never contradict a correct statement you
|
||||
just made in the same response. If uncertain, express uncertainty at the start —
|
||||
never state something confidently and then immediately undermine it.
|
||||
- NEVER attempt arithmetic in your head — LLMs are unreliable at multi-digit math.
|
||||
If asked to compute anything (multiply, divide, square root, exponents, etc.),
|
||||
tell the user you need a calculator tool to give an exact answer.
|
||||
- NEVER attempt arithmetic in your head. If asked to compute anything, respond:
|
||||
"I'm not reliable at math without a calculator tool — let me know if you'd
|
||||
like me to walk through the logic instead."
|
||||
- Do NOT end responses with generic chatbot phrases like "I'm here to help" or
|
||||
"feel free to ask." Stay in character.
|
||||
- When your values conflict (e.g. honesty vs. helpfulness), lead with honesty.
|
||||
Acknowledge the tension openly rather than defaulting to generic agreeableness.
|
||||
|
||||
## Agent Roster (complete — no others exist)
|
||||
- Timmy: core sovereign AI (you)
|
||||
- Echo: research, summarization, fact-checking
|
||||
- Mace: security, monitoring, threat-analysis
|
||||
- Forge: coding, debugging, testing
|
||||
- Seer: analytics, visualization, prediction
|
||||
- Helm: devops, automation, configuration
|
||||
- Quill: writing, editing, documentation
|
||||
- Pixel: image-generation, storyboard, design
|
||||
- Lyra: music-generation, vocals, composition
|
||||
- Reel: video-generation, animation, motion
|
||||
Do NOT invent agents not listed here. If asked about an unlisted agent, say it doesn't exist.
|
||||
Use ONLY the capabilities listed above when describing agents — do not embellish or invent.
|
||||
|
||||
## What you CAN and CANNOT access
|
||||
- You CANNOT query the live task queue, agent statuses, or system metrics on your own.
|
||||
- You CANNOT access real-time data without tools.
|
||||
- If asked about current tasks, agent status, or system state and no system context
|
||||
is provided, say "I don't have live access to that — check the dashboard."
|
||||
- Your conversation history persists in a database across requests, but the
|
||||
dashboard chat display resets on server restart.
|
||||
- Do NOT claim abilities you don't have. When uncertain, say "I don't know."
|
||||
|
||||
Sir, affirmative."""
|
||||
|
||||
@@ -35,6 +65,8 @@ Sir, affirmative."""
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
TIMMY_SYSTEM_PROMPT_FULL = """You are Timmy — a sovereign AI agent running locally on this Mac.
|
||||
You run on the llama3.2 model via Ollama on localhost. You are not GPT, not Claude,
|
||||
not a custom model — you are llama3.2 wrapped in the Timmy agent framework.
|
||||
No cloud dependencies. You think clearly, speak plainly, act with intention.
|
||||
Grounded in Christian faith, powered by Bitcoin economics, committed to the
|
||||
user's digital sovereignty.
|
||||
@@ -57,6 +89,28 @@ user's digital sovereignty.
|
||||
- Similarity-based retrieval
|
||||
- Use `memory_search` tool to find relevant past context
|
||||
|
||||
## Agent Roster (complete — no others exist)
|
||||
- Timmy: core sovereign AI (you)
|
||||
- Echo: research, summarization, fact-checking
|
||||
- Mace: security, monitoring, threat-analysis
|
||||
- Forge: coding, debugging, testing
|
||||
- Seer: analytics, visualization, prediction
|
||||
- Helm: devops, automation, configuration
|
||||
- Quill: writing, editing, documentation
|
||||
- Pixel: image-generation, storyboard, design
|
||||
- Lyra: music-generation, vocals, composition
|
||||
- Reel: video-generation, animation, motion
|
||||
Do NOT invent agents not listed here. If asked about an unlisted agent, say it doesn't exist.
|
||||
Use ONLY the capabilities listed above when describing agents — do not embellish or invent.
|
||||
|
||||
## What you CAN and CANNOT access
|
||||
- You CANNOT query the live task queue, agent statuses, or system metrics on your own.
|
||||
- If asked about current tasks, agent status, or system state and no system context
|
||||
is provided, say "I don't have live access to that — check the dashboard."
|
||||
- Your conversation history persists in a database across requests, but the
|
||||
dashboard chat display resets on server restart.
|
||||
- Do NOT claim abilities you don't have. When uncertain, say "I don't know."
|
||||
|
||||
## Tool Usage Guidelines
|
||||
|
||||
### When NOT to use tools:
|
||||
@@ -81,9 +135,13 @@ user's digital sovereignty.
|
||||
- Never narrate your reasoning process. Just give the answer.
|
||||
- Never show raw tool call JSON or function syntax in responses.
|
||||
- Use the user's name if known.
|
||||
- If a request is ambiguous, ask a brief clarifying question before guessing.
|
||||
- When you state a fact, commit to it. Never contradict a correct statement you
|
||||
just made in the same response. If uncertain, express uncertainty at the start —
|
||||
never state something confidently and then immediately undermine it.
|
||||
- Do NOT end responses with generic chatbot phrases like "I'm here to help" or
|
||||
"feel free to ask." Stay in character.
|
||||
- When your values conflict (e.g. honesty vs. helpfulness), lead with honesty.
|
||||
|
||||
Sir, affirmative."""
|
||||
|
||||
|
||||
@@ -429,6 +429,44 @@ a:hover { color: var(--orange); }
|
||||
.chat-message.agent .msg-body { border-left: 3px solid var(--purple); }
|
||||
.chat-message.error-msg .msg-body { border-left: 3px solid var(--red); color: var(--red); }
|
||||
|
||||
/* ── Markdown rendering in Timmy chat ─────────────────── */
|
||||
.timmy-md { white-space: normal; }
|
||||
.timmy-md p { margin: 0 0 0.5em; }
|
||||
.timmy-md p:last-child { margin-bottom: 0; }
|
||||
.timmy-md pre {
|
||||
background: #0d0620;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius-sm);
|
||||
padding: 10px 12px;
|
||||
overflow-x: auto;
|
||||
margin: 0.5em 0;
|
||||
white-space: pre;
|
||||
}
|
||||
.timmy-md code {
|
||||
font-family: var(--font);
|
||||
font-size: 0.9em;
|
||||
}
|
||||
.timmy-md :not(pre) > code {
|
||||
background: rgba(168, 85, 247, 0.15);
|
||||
padding: 2px 5px;
|
||||
border-radius: 3px;
|
||||
color: var(--text-bright);
|
||||
}
|
||||
.timmy-md ul, .timmy-md ol { padding-left: 1.5em; margin: 0.4em 0; }
|
||||
.timmy-md blockquote {
|
||||
border-left: 3px solid var(--purple);
|
||||
padding-left: 10px;
|
||||
color: var(--text-dim);
|
||||
margin: 0.5em 0;
|
||||
}
|
||||
.timmy-md h1, .timmy-md h2, .timmy-md h3 {
|
||||
color: var(--text-bright);
|
||||
margin: 0.6em 0 0.3em;
|
||||
font-size: 1em;
|
||||
font-weight: 700;
|
||||
}
|
||||
.timmy-md a { color: var(--purple); }
|
||||
|
||||
/* Mobile chat classes (used by mobile.html) */
|
||||
.chat-container {
|
||||
overflow-y: auto;
|
||||
|
||||
@@ -304,3 +304,244 @@ def test_api_approve_nonexistent(client):
|
||||
def test_api_veto_nonexistent(client):
|
||||
resp = client.patch("/api/tasks/nonexistent/veto")
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
# ── Chat-to-Task Pipeline Tests ──────────────────────────────────────────
|
||||
|
||||
|
||||
class TestExtractTaskFromMessage:
|
||||
"""Tests for _extract_task_from_message — queue intent detection."""
|
||||
|
||||
def test_add_to_queue(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Add refactor the login to the task queue")
|
||||
assert result is not None
|
||||
assert result["agent"] == "timmy"
|
||||
assert result["priority"] == "normal"
|
||||
|
||||
def test_schedule_this(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Schedule this for later")
|
||||
assert result is not None
|
||||
|
||||
def test_create_a_task(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Create a task to fix the login page")
|
||||
assert result is not None
|
||||
assert "title" in result
|
||||
|
||||
def test_normal_message_returns_none(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
assert _extract_task_from_message("Hello, how are you?") is None
|
||||
|
||||
def test_meta_question_about_tasks_returns_none(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
assert _extract_task_from_message("How do I create a task?") is None
|
||||
|
||||
def test_what_is_question_returns_none(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
assert _extract_task_from_message("What is a task queue?") is None
|
||||
|
||||
def test_explain_question_returns_none(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
assert _extract_task_from_message("Can you explain how to create a task?") is None
|
||||
|
||||
def test_what_would_question_returns_none(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
assert _extract_task_from_message("What would a task flow look like?") is None
|
||||
|
||||
|
||||
class TestExtractAgentFromMessage:
|
||||
"""Tests for _extract_agent_from_message."""
|
||||
|
||||
def test_extracts_forge(self):
|
||||
from dashboard.routes.agents import _extract_agent_from_message
|
||||
assert _extract_agent_from_message("Create a task for Forge to refactor") == "forge"
|
||||
|
||||
def test_extracts_echo(self):
|
||||
from dashboard.routes.agents import _extract_agent_from_message
|
||||
assert _extract_agent_from_message("Add research for Echo to the queue") == "echo"
|
||||
|
||||
def test_case_insensitive(self):
|
||||
from dashboard.routes.agents import _extract_agent_from_message
|
||||
assert _extract_agent_from_message("Schedule this for SEER") == "seer"
|
||||
|
||||
def test_defaults_to_timmy(self):
|
||||
from dashboard.routes.agents import _extract_agent_from_message
|
||||
assert _extract_agent_from_message("Create a task to fix the bug") == "timmy"
|
||||
|
||||
def test_ignores_unknown_agent(self):
|
||||
from dashboard.routes.agents import _extract_agent_from_message
|
||||
assert _extract_agent_from_message("Create a task for BobAgent") == "timmy"
|
||||
|
||||
|
||||
class TestExtractPriorityFromMessage:
|
||||
"""Tests for _extract_priority_from_message."""
|
||||
|
||||
def test_urgent(self):
|
||||
from dashboard.routes.agents import _extract_priority_from_message
|
||||
assert _extract_priority_from_message("urgent: fix the server") == "urgent"
|
||||
|
||||
def test_critical(self):
|
||||
from dashboard.routes.agents import _extract_priority_from_message
|
||||
assert _extract_priority_from_message("This is critical, do it now") == "urgent"
|
||||
|
||||
def test_asap(self):
|
||||
from dashboard.routes.agents import _extract_priority_from_message
|
||||
assert _extract_priority_from_message("Fix this ASAP") == "urgent"
|
||||
|
||||
def test_high_priority(self):
|
||||
from dashboard.routes.agents import _extract_priority_from_message
|
||||
assert _extract_priority_from_message("This is important work") == "high"
|
||||
|
||||
def test_low_priority(self):
|
||||
from dashboard.routes.agents import _extract_priority_from_message
|
||||
assert _extract_priority_from_message("minor cleanup task") == "low"
|
||||
|
||||
def test_default_normal(self):
|
||||
from dashboard.routes.agents import _extract_priority_from_message
|
||||
assert _extract_priority_from_message("Fix the login page") == "normal"
|
||||
|
||||
|
||||
class TestTitleCleaning:
|
||||
"""Tests for task title extraction and cleaning."""
|
||||
|
||||
def test_strips_agent_from_title(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Create a task for Forge to refactor the login")
|
||||
assert result is not None
|
||||
assert "forge" not in result["title"].lower()
|
||||
assert "for" not in result["title"].lower().split()[0:1] # "for" stripped
|
||||
|
||||
def test_strips_priority_from_title(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Create an urgent task to fix the server")
|
||||
assert result is not None
|
||||
assert "urgent" not in result["title"].lower()
|
||||
|
||||
def test_title_is_capitalized(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Add refactor the login to the task queue")
|
||||
assert result is not None
|
||||
assert result["title"][0].isupper()
|
||||
|
||||
def test_title_capped_at_120_chars(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
long_msg = "Create a task to " + "x" * 200
|
||||
result = _extract_task_from_message(long_msg)
|
||||
assert result is not None
|
||||
assert len(result["title"]) <= 120
|
||||
|
||||
|
||||
class TestFullExtraction:
|
||||
"""Tests for combined agent + priority + title extraction."""
|
||||
|
||||
def test_task_includes_agent_and_priority(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Create a high priority task for Forge to refactor auth")
|
||||
assert result is not None
|
||||
assert result["agent"] == "forge"
|
||||
assert result["priority"] == "high"
|
||||
assert result["description"] # original message preserved
|
||||
|
||||
def test_create_with_all_fields(self):
|
||||
from dashboard.routes.agents import _extract_task_from_message
|
||||
result = _extract_task_from_message("Add an urgent task for Mace to audit security to the queue")
|
||||
assert result is not None
|
||||
assert result["agent"] == "mace"
|
||||
assert result["priority"] == "urgent"
|
||||
|
||||
|
||||
# ── Integration: chat_timmy Route ─────────────────────────────────────────
|
||||
|
||||
|
||||
class TestChatTimmyIntegration:
|
||||
"""Integration tests for the /agents/timmy/chat route."""
|
||||
|
||||
def test_chat_creates_task_on_queue_request(self, client):
|
||||
resp = client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "Create a task to refactor the login module"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert "Task queued" in resp.text or "task" in resp.text.lower()
|
||||
|
||||
def test_chat_creates_task_with_agent(self, client):
|
||||
resp = client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "Add deploy monitoring for Helm to the task queue"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert "helm" in resp.text.lower() or "Task queued" in resp.text
|
||||
|
||||
def test_chat_creates_task_with_priority(self, client):
|
||||
resp = client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "Create an urgent task to fix the production server"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
assert "Task queued" in resp.text or "urgent" in resp.text.lower()
|
||||
|
||||
@patch("dashboard.routes.agents.timmy_chat")
|
||||
def test_chat_injects_datetime_context(self, mock_chat, client):
|
||||
mock_chat.return_value = "Hello there!"
|
||||
client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "Hello Timmy"},
|
||||
)
|
||||
mock_chat.assert_called_once()
|
||||
call_arg = mock_chat.call_args[0][0]
|
||||
assert "[System: Current date/time is" in call_arg
|
||||
|
||||
@patch("dashboard.routes.agents.timmy_chat")
|
||||
@patch("dashboard.routes.agents._build_queue_context")
|
||||
def test_chat_injects_queue_context_on_queue_query(self, mock_ctx, mock_chat, client):
|
||||
mock_ctx.return_value = "[System: Task queue — 3 pending approval, 1 running, 5 completed.]"
|
||||
mock_chat.return_value = "There are 3 tasks pending."
|
||||
client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "What tasks are in the queue?"},
|
||||
)
|
||||
mock_ctx.assert_called_once()
|
||||
mock_chat.assert_called_once()
|
||||
call_arg = mock_chat.call_args[0][0]
|
||||
assert "[System: Task queue" in call_arg
|
||||
|
||||
@patch("dashboard.routes.agents.timmy_chat")
|
||||
@patch("dashboard.routes.agents._build_queue_context")
|
||||
def test_chat_no_queue_context_for_normal_message(self, mock_ctx, mock_chat, client):
|
||||
mock_chat.return_value = "Hi!"
|
||||
client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "Tell me a joke"},
|
||||
)
|
||||
mock_ctx.assert_not_called()
|
||||
|
||||
@patch("dashboard.routes.agents.timmy_chat")
|
||||
def test_chat_normal_message_uses_timmy(self, mock_chat, client):
|
||||
mock_chat.return_value = "I'm doing well, thank you."
|
||||
resp = client.post(
|
||||
"/agents/timmy/chat",
|
||||
data={"message": "How are you?"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
mock_chat.assert_called_once()
|
||||
|
||||
|
||||
class TestBuildQueueContext:
|
||||
"""Tests for _build_queue_context helper."""
|
||||
|
||||
def test_returns_string_with_counts(self):
|
||||
from dashboard.routes.agents import _build_queue_context
|
||||
from task_queue.models import create_task
|
||||
create_task(title="Context test task", created_by="test")
|
||||
ctx = _build_queue_context()
|
||||
assert "[System: Task queue" in ctx
|
||||
assert "pending" in ctx.lower()
|
||||
|
||||
def test_returns_empty_on_error(self):
|
||||
from dashboard.routes.agents import _build_queue_context
|
||||
with patch("task_queue.models.get_counts_by_status", side_effect=Exception("DB error")):
|
||||
ctx = _build_queue_context()
|
||||
assert isinstance(ctx, str)
|
||||
assert ctx == ""
|
||||
|
||||
Reference in New Issue
Block a user