feat: migrate to Agno native HITL tool confirmation flow (#158)

Replace the homebrew regex-based tool extraction and manual dispatch (tool_executor.py) with Agno's built-in Human-In-The-Loop confirmation: - Toolkit(requires_confirmation_tools=...) marks dangerous tools - agent.run() returns RunOutput with status=paused when confirmation needed - RunRequirement.confirm()/reject() + agent.continue_run() resumes execution Dashboard and Discord vendor both use the native flow. DuckDuckGo import isolated so its absence doesn't kill all tools. Test stubs cleaned up (agno is a real dependency, only truly optional packages stubbed). 1384 tests pass in parallel (~14s). Co-authored-by: Trip T <trip@local> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 21:54:04 -04:00
parent 574031a55c
commit 904a7c564e
18 changed files with 1317 additions and 85 deletions
--- a/src/dashboard/routes/agents.py
+++ b/src/dashboard/routes/agents.py
@@ -1,4 +1,5 @@
 import asyncio
+import json
 import logging
 from datetime import datetime

@@ -7,12 +8,22 @@ from fastapi.responses import HTMLResponse

 from dashboard.store import message_log
 from dashboard.templating import templates
-from timmy.session import chat as agent_chat
+from timmy.session import _clean_response, chat_with_tools, continue_chat
+from timmy.tool_safety import (
+    format_action_description,
+    get_impact_level,
+)

 logger = logging.getLogger(__name__)

 router = APIRouter(prefix="/agents", tags=["agents"])

+MAX_MESSAGE_LENGTH = 10_000  # chars — reject before hitting the model
+
+# In-memory store for paused runs (approval_id -> run context).
+# Each entry holds the RunOutput, the RunRequirement ref, and tool metadata.
+_pending_runs: dict[str, dict] = {}
+

@router.get("")
 async def list_agents():
@@ -62,25 +73,72 @@ async def clear_history(request: Request):

@router.post("/default/chat", response_class=HTMLResponse)
 async def chat_agent(request: Request, message: str = Form(...)):
-    """Chat — synchronous response."""
+    """Chat — synchronous response with native Agno tool confirmation."""
    message = message.strip()
    if not message:
        from fastapi import HTTPException

        raise HTTPException(status_code=400, detail="Message cannot be empty")

+    if len(message) > MAX_MESSAGE_LENGTH:
+        from fastapi import HTTPException
+
+        raise HTTPException(status_code=422, detail="Message too long")
+
    timestamp = datetime.now().strftime("%H:%M:%S")
    response_text = None
    error_text = None

    try:
-        response_text = await asyncio.to_thread(agent_chat, message)
+        run_output = await asyncio.to_thread(chat_with_tools, message)
    except Exception as exc:
        logger.error("Chat error: %s", exc)
        error_text = f"Chat error: {exc}"
+        run_output = None
+
+    # Check if Agno paused the run for tool confirmation
+    tool_actions = []
+    if run_output is not None:
+        status = getattr(run_output, "status", None)
+        is_paused = status == "PAUSED" or str(status) == "RunStatus.paused"
+
+        if is_paused and getattr(run_output, "active_requirements", None):
+            for req in run_output.active_requirements:
+                if getattr(req, "needs_confirmation", False):
+                    te = req.tool_execution
+                    tool_name = getattr(te, "tool_name", "unknown")
+                    tool_args = getattr(te, "tool_args", {}) or {}
+
+                    from timmy.approvals import create_item
+
+                    item = create_item(
+                        title=f"Dashboard: {tool_name}",
+                        description=format_action_description(tool_name, tool_args),
+                        proposed_action=json.dumps({"tool": tool_name, "args": tool_args}),
+                        impact=get_impact_level(tool_name),
+                    )
+                    _pending_runs[item.id] = {
+                        "run_output": run_output,
+                        "requirement": req,
+                        "tool_name": tool_name,
+                        "tool_args": tool_args,
+                    }
+                    tool_actions.append(
+                        {
+                            "approval_id": item.id,
+                            "tool_name": tool_name,
+                            "description": format_action_description(tool_name, tool_args),
+                            "impact": get_impact_level(tool_name),
+                        }
+                    )
+
+        raw_content = run_output.content if hasattr(run_output, "content") else ""
+        response_text = _clean_response(raw_content or "")
+        if not response_text and not tool_actions:
+            response_text = None  # let error template show if needed

    message_log.append(role="user", content=message, timestamp=timestamp, source="browser")
-    if response_text is not None:
+    if response_text:
        message_log.append(
            role="agent", content=response_text, timestamp=timestamp, source="browser"
        )
@@ -97,5 +155,84 @@ async def chat_agent(request: Request, message: str = Form(...)):
            "timestamp": timestamp,
            "task_id": None,
            "queue_info": None,
+            "tool_actions": tool_actions,
+        },
+    )
+
+
+@router.post("/default/tool/{approval_id}/approve", response_class=HTMLResponse)
+async def approve_tool(request: Request, approval_id: str):
+    """Confirm a paused tool and resume execution via Agno."""
+    from timmy.approvals import approve
+
+    pending = _pending_runs.pop(approval_id, None)
+    if not pending:
+        return HTMLResponse(
+            "<p class='text-danger'>Action not found or already processed.</p>",
+            status_code=404,
+        )
+
+    approve(approval_id)
+
+    tool_name = pending["tool_name"]
+
+    # Confirm the requirement — Agno will execute the tool on continue_run
+    req = pending["requirement"]
+    req.confirm()
+
+    try:
+        result_run = await asyncio.to_thread(continue_chat, pending["run_output"])
+        # Extract tool result from the resumed run
+        tool_result = ""
+        for te in getattr(result_run, "tools", None) or []:
+            if getattr(te, "tool_name", None) == tool_name and getattr(te, "result", None):
+                tool_result = te.result
+                break
+        if not tool_result:
+            tool_result = getattr(result_run, "content", None) or "Tool executed successfully."
+    except Exception as exc:
+        logger.error("Tool execution failed: %s", exc)
+        tool_result = f"Error: {exc}"
+
+    return templates.TemplateResponse(
+        request,
+        "partials/chat_tool_result.html",
+        {
+            "approval_id": approval_id,
+            "tool_name": tool_name,
+            "status": "approved",
+            "result": str(tool_result)[:2000],
+        },
+    )
+
+
+@router.post("/default/tool/{approval_id}/reject", response_class=HTMLResponse)
+async def reject_tool(request: Request, approval_id: str):
+    """Reject a pending tool action."""
+    from timmy.approvals import reject
+
+    pending = _pending_runs.pop(approval_id, None)
+    tool_name = "action"
+
+    if pending:
+        tool_name = pending["tool_name"]
+        req = pending["requirement"]
+        req.reject(note="User rejected from dashboard")
+        # Resume so the agent knows the tool was rejected
+        try:
+            await asyncio.to_thread(continue_chat, pending["run_output"])
+        except Exception:
+            pass
+
+    reject(approval_id)
+
+    return templates.TemplateResponse(
+        request,
+        "partials/chat_tool_result.html",
+        {
+            "approval_id": approval_id,
+            "tool_name": tool_name,
+            "status": "rejected",
+            "result": "",
        },
    )