fix: chat evaluation bugs — task pipeline, prompt grounding, markdown rendering
Addresses 14 bugs from 3 rounds of deep chat evaluation:
- Add chat-to-task pipeline in agents.py with regex-based intent detection,
agent extraction, priority extraction, and title cleaning
- Filter meta-questions ("how do I create a task?") from task creation
- Inject real-time date/time context into every chat message
- Inject live queue state when user asks about tasks
- Ground system prompts with agent roster, honesty guardrails, self-knowledge,
math delegation template, anti-filler rules, values-conflict guidance
- Add CSS for markdown code blocks, inline code, lists, blockquotes in chat
- Add highlight.js CDN for syntax highlighting in chat responses
- Reduce small-model memory context budget (4000→2000) for expanded prompt
- Add 27 comprehensive tests covering the full chat-to-task pipeline
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,3 +1,5 @@
|
|||||||
|
import logging
|
||||||
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -8,9 +10,144 @@ from fastapi.templating import Jinja2Templates
|
|||||||
from timmy.session import chat as timmy_chat
|
from timmy.session import chat as timmy_chat
|
||||||
from dashboard.store import message_log
|
from dashboard.store import message_log
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
router = APIRouter(prefix="/agents", tags=["agents"])
|
router = APIRouter(prefix="/agents", tags=["agents"])
|
||||||
templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))
|
templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))
|
||||||
|
|
||||||
|
# ── Task queue detection ──────────────────────────────────────────────────
|
||||||
|
# Patterns that indicate the user wants to queue a task rather than chat
|
||||||
|
_QUEUE_PATTERNS = [
|
||||||
|
re.compile(r"\b(?:add|put|schedule|queue|submit)\b.*\b(?:to the|on the|in the)?\s*(?:queue|task(?:\s*queue)?|task list)\b", re.IGNORECASE),
|
||||||
|
re.compile(r"\bschedule\s+(?:this|that|a)\b", re.IGNORECASE),
|
||||||
|
re.compile(r"\bcreate\s+(?:a\s+|an\s+)?(?:\w+\s+){0,3}task\b", re.IGNORECASE),
|
||||||
|
]
|
||||||
|
# Questions about tasks/queue should NOT trigger task creation
|
||||||
|
_QUESTION_PREFIXES = re.compile(
|
||||||
|
r"^(?:what|how|why|can you explain|could you explain|tell me about|explain|"
|
||||||
|
r"what(?:'s| is| are| would))\b",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
_QUESTION_FRAMES = re.compile(
|
||||||
|
r"\b(?:how (?:do|does|would|can|should)|what (?:is|are|would)|"
|
||||||
|
r"can you (?:explain|describe|tell)|best way to)\b",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Known agent names for task assignment parsing
|
||||||
|
_KNOWN_AGENTS = frozenset({
|
||||||
|
"timmy", "echo", "mace", "helm", "seer",
|
||||||
|
"forge", "quill", "pixel", "lyra", "reel",
|
||||||
|
})
|
||||||
|
_AGENT_PATTERN = re.compile(
|
||||||
|
r"\bfor\s+(" + "|".join(_KNOWN_AGENTS) + r")\b", re.IGNORECASE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Priority keywords → task priority mapping
|
||||||
|
_PRIORITY_MAP = {
|
||||||
|
"urgent": "urgent",
|
||||||
|
"critical": "urgent",
|
||||||
|
"asap": "urgent",
|
||||||
|
"emergency": "urgent",
|
||||||
|
"high priority": "high",
|
||||||
|
"high-priority": "high",
|
||||||
|
"important": "high",
|
||||||
|
"low priority": "low",
|
||||||
|
"low-priority": "low",
|
||||||
|
"minor": "low",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Queue context detection
|
||||||
|
_QUEUE_QUERY_PATTERN = re.compile(
|
||||||
|
r"\b(?:task(?:s|\s+queue)?|queue|what(?:'s| is) (?:in |on )?(?:the )?queue)\b",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_agent_from_message(message: str) -> str:
|
||||||
|
"""Extract target agent name from message, defaulting to 'timmy'."""
|
||||||
|
m = _AGENT_PATTERN.search(message)
|
||||||
|
if m:
|
||||||
|
return m.group(1).lower()
|
||||||
|
return "timmy"
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_priority_from_message(message: str) -> str:
|
||||||
|
"""Extract priority level from message, defaulting to 'normal'."""
|
||||||
|
msg_lower = message.lower()
|
||||||
|
for keyword, priority in sorted(_PRIORITY_MAP.items(), key=lambda x: -len(x[0])):
|
||||||
|
if keyword in msg_lower:
|
||||||
|
return priority
|
||||||
|
return "normal"
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_task_from_message(message: str) -> dict | None:
|
||||||
|
"""If the message looks like a task-queue request, return task details.
|
||||||
|
|
||||||
|
Returns None for meta-questions about tasks (e.g. "how do I create a task?").
|
||||||
|
"""
|
||||||
|
if _QUESTION_PREFIXES.search(message) or _QUESTION_FRAMES.search(message):
|
||||||
|
return None
|
||||||
|
for pattern in _QUEUE_PATTERNS:
|
||||||
|
if pattern.search(message):
|
||||||
|
# Strip the queue instruction to get the actual task description
|
||||||
|
title = re.sub(
|
||||||
|
r"\b(?:add|put|schedule|queue|submit|create)\b.*?\b(?:to the|on the|in the|an?)?(?:\s+\w+){0,3}\s*(?:queue|task(?:\s*queue)?|task list)\b",
|
||||||
|
"", message, flags=re.IGNORECASE,
|
||||||
|
).strip(" ,:;-")
|
||||||
|
# Strip "for {agent}" from title
|
||||||
|
title = _AGENT_PATTERN.sub("", title).strip(" ,:;-")
|
||||||
|
# Strip priority keywords from title
|
||||||
|
title = re.sub(
|
||||||
|
r"\b(?:urgent|critical|asap|emergency|high[- ]priority|important|low[- ]priority|minor)\b",
|
||||||
|
"", title, flags=re.IGNORECASE,
|
||||||
|
).strip(" ,:;-")
|
||||||
|
# Strip leading "to " that often remains
|
||||||
|
title = re.sub(r"^to\s+", "", title, flags=re.IGNORECASE).strip()
|
||||||
|
# Clean up double spaces
|
||||||
|
title = re.sub(r"\s{2,}", " ", title).strip()
|
||||||
|
# Fallback to full message if stripping removed everything
|
||||||
|
if not title or len(title) < 5:
|
||||||
|
title = message
|
||||||
|
# Capitalize first letter
|
||||||
|
title = title[0].upper() + title[1:] if title else title
|
||||||
|
agent = _extract_agent_from_message(message)
|
||||||
|
priority = _extract_priority_from_message(message)
|
||||||
|
return {
|
||||||
|
"title": title[:120],
|
||||||
|
"description": message,
|
||||||
|
"agent": agent,
|
||||||
|
"priority": priority,
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _build_queue_context() -> str:
|
||||||
|
"""Build a concise task queue summary for context injection."""
|
||||||
|
try:
|
||||||
|
from task_queue.models import get_counts_by_status, list_tasks, TaskStatus
|
||||||
|
counts = get_counts_by_status()
|
||||||
|
pending = counts.get("pending_approval", 0)
|
||||||
|
running = counts.get("running", 0)
|
||||||
|
completed = counts.get("completed", 0)
|
||||||
|
|
||||||
|
parts = [f"[System: Task queue — {pending} pending approval, {running} running, {completed} completed."]
|
||||||
|
if pending > 0:
|
||||||
|
tasks = list_tasks(status=TaskStatus.PENDING_APPROVAL, limit=5)
|
||||||
|
if tasks:
|
||||||
|
items = ", ".join(f'"{t.title}" ({t.assigned_to})' for t in tasks)
|
||||||
|
parts.append(f"Pending: {items}.")
|
||||||
|
if running > 0:
|
||||||
|
tasks = list_tasks(status=TaskStatus.RUNNING, limit=5)
|
||||||
|
if tasks:
|
||||||
|
items = ", ".join(f'"{t.title}" ({t.assigned_to})' for t in tasks)
|
||||||
|
parts.append(f"Running: {items}.")
|
||||||
|
return " ".join(parts) + "]"
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Failed to build queue context: %s", exc)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
# Static metadata for known agents — enriched onto live registry entries.
|
# Static metadata for known agents — enriched onto live registry entries.
|
||||||
_AGENT_METADATA: dict[str, dict] = {
|
_AGENT_METADATA: dict[str, dict] = {
|
||||||
"timmy": {
|
"timmy": {
|
||||||
@@ -74,10 +211,46 @@ async def chat_timmy(request: Request, message: str = Form(...)):
|
|||||||
response_text = None
|
response_text = None
|
||||||
error_text = None
|
error_text = None
|
||||||
|
|
||||||
try:
|
# Check if the user wants to queue a task instead of chatting
|
||||||
response_text = timmy_chat(message)
|
task_info = _extract_task_from_message(message)
|
||||||
except Exception as exc:
|
if task_info:
|
||||||
error_text = f"Timmy is offline: {exc}"
|
try:
|
||||||
|
from task_queue.models import create_task
|
||||||
|
task = create_task(
|
||||||
|
title=task_info["title"],
|
||||||
|
description=task_info["description"],
|
||||||
|
created_by="user",
|
||||||
|
assigned_to=task_info.get("agent", "timmy"),
|
||||||
|
priority=task_info.get("priority", "normal"),
|
||||||
|
requires_approval=True,
|
||||||
|
)
|
||||||
|
priority_label = f" | Priority: `{task.priority.value}`" if task.priority.value != "normal" else ""
|
||||||
|
response_text = (
|
||||||
|
f"Task queued for approval: **{task.title}**\n\n"
|
||||||
|
f"Assigned to: `{task.assigned_to}`{priority_label} | "
|
||||||
|
f"Status: `{task.status.value}` | "
|
||||||
|
f"[View Task Queue](/tasks)"
|
||||||
|
)
|
||||||
|
logger.info("Chat → task queue: %s → %s (id=%s)", task.title, task.assigned_to, task.id)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Failed to create task from chat: %s", exc)
|
||||||
|
task_info = None
|
||||||
|
|
||||||
|
# Normal chat path (also used as fallback if task creation failed)
|
||||||
|
if not task_info:
|
||||||
|
try:
|
||||||
|
now = datetime.now()
|
||||||
|
context_parts = [
|
||||||
|
f"[System: Current date/time is {now.strftime('%A, %B %d, %Y at %I:%M %p')}]"
|
||||||
|
]
|
||||||
|
if _QUEUE_QUERY_PATTERN.search(message):
|
||||||
|
queue_ctx = _build_queue_context()
|
||||||
|
if queue_ctx:
|
||||||
|
context_parts.append(queue_ctx)
|
||||||
|
context_prefix = "\n".join(context_parts) + "\n\n"
|
||||||
|
response_text = timmy_chat(context_prefix + message)
|
||||||
|
except Exception as exc:
|
||||||
|
error_text = f"Timmy is offline: {exc}"
|
||||||
|
|
||||||
message_log.append(role="user", content=message, timestamp=timestamp)
|
message_log.append(role="user", content=message, timestamp=timestamp)
|
||||||
if response_text is not None:
|
if response_text is not None:
|
||||||
|
|||||||
@@ -16,6 +16,8 @@
|
|||||||
<script src="https://unpkg.com/htmx.org@2.0.3" integrity="sha384-0895/pl2MU10Hqc6jd4RvrthNlDiE9U1tWmX7WRESftEDRosgxNsQG/Ze9YMRzHq" crossorigin="anonymous"></script>
|
<script src="https://unpkg.com/htmx.org@2.0.3" integrity="sha384-0895/pl2MU10Hqc6jd4RvrthNlDiE9U1tWmX7WRESftEDRosgxNsQG/Ze9YMRzHq" crossorigin="anonymous"></script>
|
||||||
<script src="https://cdn.jsdelivr.net/npm/marked@15.0.7/marked.min.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/marked@15.0.7/marked.min.js"></script>
|
||||||
<script src="https://cdn.jsdelivr.net/npm/dompurify@3.2.4/dist/purify.min.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/dompurify@3.2.4/dist/purify.min.js"></script>
|
||||||
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.9.0/build/styles/github-dark.min.css" />
|
||||||
|
<script src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11.9.0/build/highlight.min.js"></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<header class="mc-header">
|
<header class="mc-header">
|
||||||
|
|||||||
@@ -12,6 +12,9 @@
|
|||||||
var el = document.currentScript.previousElementSibling.querySelector('.timmy-md');
|
var el = document.currentScript.previousElementSibling.querySelector('.timmy-md');
|
||||||
if (el && typeof marked !== 'undefined' && typeof DOMPurify !== 'undefined') {
|
if (el && typeof marked !== 'undefined' && typeof DOMPurify !== 'undefined') {
|
||||||
el.innerHTML = DOMPurify.sanitize(marked.parse(el.textContent));
|
el.innerHTML = DOMPurify.sanitize(marked.parse(el.textContent));
|
||||||
|
if (typeof hljs !== 'undefined') {
|
||||||
|
el.querySelectorAll('pre code').forEach(function(block) { hljs.highlightElement(block); });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})();
|
})();
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
@@ -118,8 +118,9 @@ def create_timmy(
|
|||||||
from timmy.memory_system import memory_system
|
from timmy.memory_system import memory_system
|
||||||
memory_context = memory_system.get_system_context()
|
memory_context = memory_system.get_system_context()
|
||||||
if memory_context:
|
if memory_context:
|
||||||
# Truncate if too long (keep under token limit)
|
# Truncate if too long — smaller budget for small models
|
||||||
max_context = 4000 if not use_tools else 8000
|
# since the expanded prompt (roster, guardrails) uses more tokens
|
||||||
|
max_context = 2000 if not use_tools else 8000
|
||||||
if len(memory_context) > max_context:
|
if len(memory_context) > max_context:
|
||||||
memory_context = memory_context[:max_context] + "\n... [truncated]"
|
memory_context = memory_context[:max_context] + "\n... [truncated]"
|
||||||
full_prompt = f"{base_prompt}\n\n## Memory Context\n\n{memory_context}"
|
full_prompt = f"{base_prompt}\n\n## Memory Context\n\n{memory_context}"
|
||||||
|
|||||||
@@ -10,6 +10,8 @@ tool usage guidelines and memory system documentation.
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
TIMMY_SYSTEM_PROMPT_LITE = """You are Timmy — a sovereign AI agent running locally on this Mac.
|
TIMMY_SYSTEM_PROMPT_LITE = """You are Timmy — a sovereign AI agent running locally on this Mac.
|
||||||
|
You run on the llama3.2 model via Ollama on localhost. You are not GPT, not Claude,
|
||||||
|
not a custom model — you are llama3.2 wrapped in the Timmy agent framework.
|
||||||
No cloud dependencies. Think clearly, speak plainly, act with intention.
|
No cloud dependencies. Think clearly, speak plainly, act with intention.
|
||||||
Grounded in Christian faith, powered by Bitcoin economics, committed to the
|
Grounded in Christian faith, powered by Bitcoin economics, committed to the
|
||||||
user's digital sovereignty.
|
user's digital sovereignty.
|
||||||
@@ -19,14 +21,42 @@ Rules:
|
|||||||
- Never mention tools, memory_search, vaults, or internal systems to the user.
|
- Never mention tools, memory_search, vaults, or internal systems to the user.
|
||||||
- Never output tool calls, JSON, or function syntax in your responses.
|
- Never output tool calls, JSON, or function syntax in your responses.
|
||||||
- Remember what the user tells you during our conversation.
|
- Remember what the user tells you during our conversation.
|
||||||
- If you don't know something, say so honestly.
|
- If you don't know something, say so honestly — never fabricate facts.
|
||||||
|
- If a request is ambiguous, ask a brief clarifying question before guessing.
|
||||||
- Use the user's name if you know it.
|
- Use the user's name if you know it.
|
||||||
- When you state a fact, commit to it. Never contradict a correct statement you
|
- When you state a fact, commit to it. Never contradict a correct statement you
|
||||||
just made in the same response. If uncertain, express uncertainty at the start —
|
just made in the same response. If uncertain, express uncertainty at the start —
|
||||||
never state something confidently and then immediately undermine it.
|
never state something confidently and then immediately undermine it.
|
||||||
- NEVER attempt arithmetic in your head — LLMs are unreliable at multi-digit math.
|
- NEVER attempt arithmetic in your head. If asked to compute anything, respond:
|
||||||
If asked to compute anything (multiply, divide, square root, exponents, etc.),
|
"I'm not reliable at math without a calculator tool — let me know if you'd
|
||||||
tell the user you need a calculator tool to give an exact answer.
|
like me to walk through the logic instead."
|
||||||
|
- Do NOT end responses with generic chatbot phrases like "I'm here to help" or
|
||||||
|
"feel free to ask." Stay in character.
|
||||||
|
- When your values conflict (e.g. honesty vs. helpfulness), lead with honesty.
|
||||||
|
Acknowledge the tension openly rather than defaulting to generic agreeableness.
|
||||||
|
|
||||||
|
## Agent Roster (complete — no others exist)
|
||||||
|
- Timmy: core sovereign AI (you)
|
||||||
|
- Echo: research, summarization, fact-checking
|
||||||
|
- Mace: security, monitoring, threat-analysis
|
||||||
|
- Forge: coding, debugging, testing
|
||||||
|
- Seer: analytics, visualization, prediction
|
||||||
|
- Helm: devops, automation, configuration
|
||||||
|
- Quill: writing, editing, documentation
|
||||||
|
- Pixel: image-generation, storyboard, design
|
||||||
|
- Lyra: music-generation, vocals, composition
|
||||||
|
- Reel: video-generation, animation, motion
|
||||||
|
Do NOT invent agents not listed here. If asked about an unlisted agent, say it doesn't exist.
|
||||||
|
Use ONLY the capabilities listed above when describing agents — do not embellish or invent.
|
||||||
|
|
||||||
|
## What you CAN and CANNOT access
|
||||||
|
- You CANNOT query the live task queue, agent statuses, or system metrics on your own.
|
||||||
|
- You CANNOT access real-time data without tools.
|
||||||
|
- If asked about current tasks, agent status, or system state and no system context
|
||||||
|
is provided, say "I don't have live access to that — check the dashboard."
|
||||||
|
- Your conversation history persists in a database across requests, but the
|
||||||
|
dashboard chat display resets on server restart.
|
||||||
|
- Do NOT claim abilities you don't have. When uncertain, say "I don't know."
|
||||||
|
|
||||||
Sir, affirmative."""
|
Sir, affirmative."""
|
||||||
|
|
||||||
@@ -35,6 +65,8 @@ Sir, affirmative."""
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
TIMMY_SYSTEM_PROMPT_FULL = """You are Timmy — a sovereign AI agent running locally on this Mac.
|
TIMMY_SYSTEM_PROMPT_FULL = """You are Timmy — a sovereign AI agent running locally on this Mac.
|
||||||
|
You run on the llama3.2 model via Ollama on localhost. You are not GPT, not Claude,
|
||||||
|
not a custom model — you are llama3.2 wrapped in the Timmy agent framework.
|
||||||
No cloud dependencies. You think clearly, speak plainly, act with intention.
|
No cloud dependencies. You think clearly, speak plainly, act with intention.
|
||||||
Grounded in Christian faith, powered by Bitcoin economics, committed to the
|
Grounded in Christian faith, powered by Bitcoin economics, committed to the
|
||||||
user's digital sovereignty.
|
user's digital sovereignty.
|
||||||
@@ -57,6 +89,28 @@ user's digital sovereignty.
|
|||||||
- Similarity-based retrieval
|
- Similarity-based retrieval
|
||||||
- Use `memory_search` tool to find relevant past context
|
- Use `memory_search` tool to find relevant past context
|
||||||
|
|
||||||
|
## Agent Roster (complete — no others exist)
|
||||||
|
- Timmy: core sovereign AI (you)
|
||||||
|
- Echo: research, summarization, fact-checking
|
||||||
|
- Mace: security, monitoring, threat-analysis
|
||||||
|
- Forge: coding, debugging, testing
|
||||||
|
- Seer: analytics, visualization, prediction
|
||||||
|
- Helm: devops, automation, configuration
|
||||||
|
- Quill: writing, editing, documentation
|
||||||
|
- Pixel: image-generation, storyboard, design
|
||||||
|
- Lyra: music-generation, vocals, composition
|
||||||
|
- Reel: video-generation, animation, motion
|
||||||
|
Do NOT invent agents not listed here. If asked about an unlisted agent, say it doesn't exist.
|
||||||
|
Use ONLY the capabilities listed above when describing agents — do not embellish or invent.
|
||||||
|
|
||||||
|
## What you CAN and CANNOT access
|
||||||
|
- You CANNOT query the live task queue, agent statuses, or system metrics on your own.
|
||||||
|
- If asked about current tasks, agent status, or system state and no system context
|
||||||
|
is provided, say "I don't have live access to that — check the dashboard."
|
||||||
|
- Your conversation history persists in a database across requests, but the
|
||||||
|
dashboard chat display resets on server restart.
|
||||||
|
- Do NOT claim abilities you don't have. When uncertain, say "I don't know."
|
||||||
|
|
||||||
## Tool Usage Guidelines
|
## Tool Usage Guidelines
|
||||||
|
|
||||||
### When NOT to use tools:
|
### When NOT to use tools:
|
||||||
@@ -81,9 +135,13 @@ user's digital sovereignty.
|
|||||||
- Never narrate your reasoning process. Just give the answer.
|
- Never narrate your reasoning process. Just give the answer.
|
||||||
- Never show raw tool call JSON or function syntax in responses.
|
- Never show raw tool call JSON or function syntax in responses.
|
||||||
- Use the user's name if known.
|
- Use the user's name if known.
|
||||||
|
- If a request is ambiguous, ask a brief clarifying question before guessing.
|
||||||
- When you state a fact, commit to it. Never contradict a correct statement you
|
- When you state a fact, commit to it. Never contradict a correct statement you
|
||||||
just made in the same response. If uncertain, express uncertainty at the start —
|
just made in the same response. If uncertain, express uncertainty at the start —
|
||||||
never state something confidently and then immediately undermine it.
|
never state something confidently and then immediately undermine it.
|
||||||
|
- Do NOT end responses with generic chatbot phrases like "I'm here to help" or
|
||||||
|
"feel free to ask." Stay in character.
|
||||||
|
- When your values conflict (e.g. honesty vs. helpfulness), lead with honesty.
|
||||||
|
|
||||||
Sir, affirmative."""
|
Sir, affirmative."""
|
||||||
|
|
||||||
|
|||||||
@@ -429,6 +429,44 @@ a:hover { color: var(--orange); }
|
|||||||
.chat-message.agent .msg-body { border-left: 3px solid var(--purple); }
|
.chat-message.agent .msg-body { border-left: 3px solid var(--purple); }
|
||||||
.chat-message.error-msg .msg-body { border-left: 3px solid var(--red); color: var(--red); }
|
.chat-message.error-msg .msg-body { border-left: 3px solid var(--red); color: var(--red); }
|
||||||
|
|
||||||
|
/* ── Markdown rendering in Timmy chat ─────────────────── */
|
||||||
|
.timmy-md { white-space: normal; }
|
||||||
|
.timmy-md p { margin: 0 0 0.5em; }
|
||||||
|
.timmy-md p:last-child { margin-bottom: 0; }
|
||||||
|
.timmy-md pre {
|
||||||
|
background: #0d0620;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius-sm);
|
||||||
|
padding: 10px 12px;
|
||||||
|
overflow-x: auto;
|
||||||
|
margin: 0.5em 0;
|
||||||
|
white-space: pre;
|
||||||
|
}
|
||||||
|
.timmy-md code {
|
||||||
|
font-family: var(--font);
|
||||||
|
font-size: 0.9em;
|
||||||
|
}
|
||||||
|
.timmy-md :not(pre) > code {
|
||||||
|
background: rgba(168, 85, 247, 0.15);
|
||||||
|
padding: 2px 5px;
|
||||||
|
border-radius: 3px;
|
||||||
|
color: var(--text-bright);
|
||||||
|
}
|
||||||
|
.timmy-md ul, .timmy-md ol { padding-left: 1.5em; margin: 0.4em 0; }
|
||||||
|
.timmy-md blockquote {
|
||||||
|
border-left: 3px solid var(--purple);
|
||||||
|
padding-left: 10px;
|
||||||
|
color: var(--text-dim);
|
||||||
|
margin: 0.5em 0;
|
||||||
|
}
|
||||||
|
.timmy-md h1, .timmy-md h2, .timmy-md h3 {
|
||||||
|
color: var(--text-bright);
|
||||||
|
margin: 0.6em 0 0.3em;
|
||||||
|
font-size: 1em;
|
||||||
|
font-weight: 700;
|
||||||
|
}
|
||||||
|
.timmy-md a { color: var(--purple); }
|
||||||
|
|
||||||
/* Mobile chat classes (used by mobile.html) */
|
/* Mobile chat classes (used by mobile.html) */
|
||||||
.chat-container {
|
.chat-container {
|
||||||
overflow-y: auto;
|
overflow-y: auto;
|
||||||
|
|||||||
@@ -304,3 +304,244 @@ def test_api_approve_nonexistent(client):
|
|||||||
def test_api_veto_nonexistent(client):
|
def test_api_veto_nonexistent(client):
|
||||||
resp = client.patch("/api/tasks/nonexistent/veto")
|
resp = client.patch("/api/tasks/nonexistent/veto")
|
||||||
assert resp.status_code == 404
|
assert resp.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
# ── Chat-to-Task Pipeline Tests ──────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractTaskFromMessage:
|
||||||
|
"""Tests for _extract_task_from_message — queue intent detection."""
|
||||||
|
|
||||||
|
def test_add_to_queue(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
result = _extract_task_from_message("Add refactor the login to the task queue")
|
||||||
|
assert result is not None
|
||||||
|
assert result["agent"] == "timmy"
|
||||||
|
assert result["priority"] == "normal"
|
||||||
|
|
||||||
|
def test_schedule_this(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
result = _extract_task_from_message("Schedule this for later")
|
||||||
|
assert result is not None
|
||||||
|
|
||||||
|
def test_create_a_task(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
result = _extract_task_from_message("Create a task to fix the login page")
|
||||||
|
assert result is not None
|
||||||
|
assert "title" in result
|
||||||
|
|
||||||
|
def test_normal_message_returns_none(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
assert _extract_task_from_message("Hello, how are you?") is None
|
||||||
|
|
||||||
|
def test_meta_question_about_tasks_returns_none(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
assert _extract_task_from_message("How do I create a task?") is None
|
||||||
|
|
||||||
|
def test_what_is_question_returns_none(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
assert _extract_task_from_message("What is a task queue?") is None
|
||||||
|
|
||||||
|
def test_explain_question_returns_none(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
assert _extract_task_from_message("Can you explain how to create a task?") is None
|
||||||
|
|
||||||
|
def test_what_would_question_returns_none(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
assert _extract_task_from_message("What would a task flow look like?") is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractAgentFromMessage:
|
||||||
|
"""Tests for _extract_agent_from_message."""
|
||||||
|
|
||||||
|
def test_extracts_forge(self):
|
||||||
|
from dashboard.routes.agents import _extract_agent_from_message
|
||||||
|
assert _extract_agent_from_message("Create a task for Forge to refactor") == "forge"
|
||||||
|
|
||||||
|
def test_extracts_echo(self):
|
||||||
|
from dashboard.routes.agents import _extract_agent_from_message
|
||||||
|
assert _extract_agent_from_message("Add research for Echo to the queue") == "echo"
|
||||||
|
|
||||||
|
def test_case_insensitive(self):
|
||||||
|
from dashboard.routes.agents import _extract_agent_from_message
|
||||||
|
assert _extract_agent_from_message("Schedule this for SEER") == "seer"
|
||||||
|
|
||||||
|
def test_defaults_to_timmy(self):
|
||||||
|
from dashboard.routes.agents import _extract_agent_from_message
|
||||||
|
assert _extract_agent_from_message("Create a task to fix the bug") == "timmy"
|
||||||
|
|
||||||
|
def test_ignores_unknown_agent(self):
|
||||||
|
from dashboard.routes.agents import _extract_agent_from_message
|
||||||
|
assert _extract_agent_from_message("Create a task for BobAgent") == "timmy"
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractPriorityFromMessage:
|
||||||
|
"""Tests for _extract_priority_from_message."""
|
||||||
|
|
||||||
|
def test_urgent(self):
|
||||||
|
from dashboard.routes.agents import _extract_priority_from_message
|
||||||
|
assert _extract_priority_from_message("urgent: fix the server") == "urgent"
|
||||||
|
|
||||||
|
def test_critical(self):
|
||||||
|
from dashboard.routes.agents import _extract_priority_from_message
|
||||||
|
assert _extract_priority_from_message("This is critical, do it now") == "urgent"
|
||||||
|
|
||||||
|
def test_asap(self):
|
||||||
|
from dashboard.routes.agents import _extract_priority_from_message
|
||||||
|
assert _extract_priority_from_message("Fix this ASAP") == "urgent"
|
||||||
|
|
||||||
|
def test_high_priority(self):
|
||||||
|
from dashboard.routes.agents import _extract_priority_from_message
|
||||||
|
assert _extract_priority_from_message("This is important work") == "high"
|
||||||
|
|
||||||
|
def test_low_priority(self):
|
||||||
|
from dashboard.routes.agents import _extract_priority_from_message
|
||||||
|
assert _extract_priority_from_message("minor cleanup task") == "low"
|
||||||
|
|
||||||
|
def test_default_normal(self):
|
||||||
|
from dashboard.routes.agents import _extract_priority_from_message
|
||||||
|
assert _extract_priority_from_message("Fix the login page") == "normal"
|
||||||
|
|
||||||
|
|
||||||
|
class TestTitleCleaning:
|
||||||
|
"""Tests for task title extraction and cleaning."""
|
||||||
|
|
||||||
|
def test_strips_agent_from_title(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
result = _extract_task_from_message("Create a task for Forge to refactor the login")
|
||||||
|
assert result is not None
|
||||||
|
assert "forge" not in result["title"].lower()
|
||||||
|
assert "for" not in result["title"].lower().split()[0:1] # "for" stripped
|
||||||
|
|
||||||
|
def test_strips_priority_from_title(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
result = _extract_task_from_message("Create an urgent task to fix the server")
|
||||||
|
assert result is not None
|
||||||
|
assert "urgent" not in result["title"].lower()
|
||||||
|
|
||||||
|
def test_title_is_capitalized(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
result = _extract_task_from_message("Add refactor the login to the task queue")
|
||||||
|
assert result is not None
|
||||||
|
assert result["title"][0].isupper()
|
||||||
|
|
||||||
|
def test_title_capped_at_120_chars(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
long_msg = "Create a task to " + "x" * 200
|
||||||
|
result = _extract_task_from_message(long_msg)
|
||||||
|
assert result is not None
|
||||||
|
assert len(result["title"]) <= 120
|
||||||
|
|
||||||
|
|
||||||
|
class TestFullExtraction:
|
||||||
|
"""Tests for combined agent + priority + title extraction."""
|
||||||
|
|
||||||
|
def test_task_includes_agent_and_priority(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
result = _extract_task_from_message("Create a high priority task for Forge to refactor auth")
|
||||||
|
assert result is not None
|
||||||
|
assert result["agent"] == "forge"
|
||||||
|
assert result["priority"] == "high"
|
||||||
|
assert result["description"] # original message preserved
|
||||||
|
|
||||||
|
def test_create_with_all_fields(self):
|
||||||
|
from dashboard.routes.agents import _extract_task_from_message
|
||||||
|
result = _extract_task_from_message("Add an urgent task for Mace to audit security to the queue")
|
||||||
|
assert result is not None
|
||||||
|
assert result["agent"] == "mace"
|
||||||
|
assert result["priority"] == "urgent"
|
||||||
|
|
||||||
|
|
||||||
|
# ── Integration: chat_timmy Route ─────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestChatTimmyIntegration:
|
||||||
|
"""Integration tests for the /agents/timmy/chat route."""
|
||||||
|
|
||||||
|
def test_chat_creates_task_on_queue_request(self, client):
|
||||||
|
resp = client.post(
|
||||||
|
"/agents/timmy/chat",
|
||||||
|
data={"message": "Create a task to refactor the login module"},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert "Task queued" in resp.text or "task" in resp.text.lower()
|
||||||
|
|
||||||
|
def test_chat_creates_task_with_agent(self, client):
|
||||||
|
resp = client.post(
|
||||||
|
"/agents/timmy/chat",
|
||||||
|
data={"message": "Add deploy monitoring for Helm to the task queue"},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert "helm" in resp.text.lower() or "Task queued" in resp.text
|
||||||
|
|
||||||
|
def test_chat_creates_task_with_priority(self, client):
|
||||||
|
resp = client.post(
|
||||||
|
"/agents/timmy/chat",
|
||||||
|
data={"message": "Create an urgent task to fix the production server"},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert "Task queued" in resp.text or "urgent" in resp.text.lower()
|
||||||
|
|
||||||
|
@patch("dashboard.routes.agents.timmy_chat")
|
||||||
|
def test_chat_injects_datetime_context(self, mock_chat, client):
|
||||||
|
mock_chat.return_value = "Hello there!"
|
||||||
|
client.post(
|
||||||
|
"/agents/timmy/chat",
|
||||||
|
data={"message": "Hello Timmy"},
|
||||||
|
)
|
||||||
|
mock_chat.assert_called_once()
|
||||||
|
call_arg = mock_chat.call_args[0][0]
|
||||||
|
assert "[System: Current date/time is" in call_arg
|
||||||
|
|
||||||
|
@patch("dashboard.routes.agents.timmy_chat")
|
||||||
|
@patch("dashboard.routes.agents._build_queue_context")
|
||||||
|
def test_chat_injects_queue_context_on_queue_query(self, mock_ctx, mock_chat, client):
|
||||||
|
mock_ctx.return_value = "[System: Task queue — 3 pending approval, 1 running, 5 completed.]"
|
||||||
|
mock_chat.return_value = "There are 3 tasks pending."
|
||||||
|
client.post(
|
||||||
|
"/agents/timmy/chat",
|
||||||
|
data={"message": "What tasks are in the queue?"},
|
||||||
|
)
|
||||||
|
mock_ctx.assert_called_once()
|
||||||
|
mock_chat.assert_called_once()
|
||||||
|
call_arg = mock_chat.call_args[0][0]
|
||||||
|
assert "[System: Task queue" in call_arg
|
||||||
|
|
||||||
|
@patch("dashboard.routes.agents.timmy_chat")
|
||||||
|
@patch("dashboard.routes.agents._build_queue_context")
|
||||||
|
def test_chat_no_queue_context_for_normal_message(self, mock_ctx, mock_chat, client):
|
||||||
|
mock_chat.return_value = "Hi!"
|
||||||
|
client.post(
|
||||||
|
"/agents/timmy/chat",
|
||||||
|
data={"message": "Tell me a joke"},
|
||||||
|
)
|
||||||
|
mock_ctx.assert_not_called()
|
||||||
|
|
||||||
|
@patch("dashboard.routes.agents.timmy_chat")
|
||||||
|
def test_chat_normal_message_uses_timmy(self, mock_chat, client):
|
||||||
|
mock_chat.return_value = "I'm doing well, thank you."
|
||||||
|
resp = client.post(
|
||||||
|
"/agents/timmy/chat",
|
||||||
|
data={"message": "How are you?"},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
mock_chat.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildQueueContext:
|
||||||
|
"""Tests for _build_queue_context helper."""
|
||||||
|
|
||||||
|
def test_returns_string_with_counts(self):
|
||||||
|
from dashboard.routes.agents import _build_queue_context
|
||||||
|
from task_queue.models import create_task
|
||||||
|
create_task(title="Context test task", created_by="test")
|
||||||
|
ctx = _build_queue_context()
|
||||||
|
assert "[System: Task queue" in ctx
|
||||||
|
assert "pending" in ctx.lower()
|
||||||
|
|
||||||
|
def test_returns_empty_on_error(self):
|
||||||
|
from dashboard.routes.agents import _build_queue_context
|
||||||
|
with patch("task_queue.models.get_counts_by_status", side_effect=Exception("DB error")):
|
||||||
|
ctx = _build_queue_context()
|
||||||
|
assert isinstance(ctx, str)
|
||||||
|
assert ctx == ""
|
||||||
|
|||||||
Reference in New Issue
Block a user