feat: migrate to Agno native HITL tool confirmation flow (#158)

Replace the homebrew regex-based tool extraction and manual dispatch (tool_executor.py) with Agno's built-in Human-In-The-Loop confirmation: - Toolkit(requires_confirmation_tools=...) marks dangerous tools - agent.run() returns RunOutput with status=paused when confirmation needed - RunRequirement.confirm()/reject() + agent.continue_run() resumes execution Dashboard and Discord vendor both use the native flow. DuckDuckGo import isolated so its absence doesn't kill all tools. Test stubs cleaned up (agno is a real dependency, only truly optional packages stubbed). 1384 tests pass in parallel (~14s). Co-authored-by: Trip T <trip@local> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 21:54:04 -04:00
parent 574031a55c
commit 904a7c564e
18 changed files with 1317 additions and 85 deletions
--- a/tests/timmy/test_agent.py
+++ b/tests/timmy/test_agent.py
@@ -232,16 +232,21 @@ def test_model_supports_tools_unknown_model_gets_tools():


 def test_create_timmy_no_tools_for_small_model():
-    """llama3.2 should get no tools."""
+    """Small models (llama3.2) should get no tools."""
+    mock_toolkit = MagicMock()
    with patch("timmy.agent.Agent") as MockAgent, patch("timmy.agent.Ollama"), patch(
        "timmy.agent.SqliteDb"
+    ), patch("timmy.agent.create_full_toolkit", return_value=mock_toolkit), patch(
+        "timmy.agent._resolve_model_with_fallback", return_value=("llama3.2:3b", False)
+    ), patch(
+        "timmy.agent._check_model_available", return_value=True
    ):
        from timmy.agent import create_timmy

        create_timmy()

        kwargs = MockAgent.call_args.kwargs
-        # Default model is llama3.2 → tools should be None
+        # llama3.2 is in _SMALL_MODEL_PATTERNS → tools should be None
        assert kwargs["tools"] is None