forked from Rockachopa/Timmy-time-dashboard
* fix: name extraction blocklist, memory preview escaping, and gitignore cleanup - Add _NAME_BLOCKLIST to extract_user_name() to reject gerunds and UI-state words like "Sending" that were incorrectly captured as user names - Collapse whitespace in get_memory_status() preview so newlines survive JSON serialization without showing raw \n escape sequences - Broaden .gitignore from specific memory/self/user_profile.md to memory/self/ and untrack memory/self/methodology.md (runtime-edited file) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: catch Ollama connection errors in session.py + add 71 smoke tests - Wrap agent.run() in session.py with try/except so Ollama connection failures return a graceful fallback message instead of dumping raw tracebacks to Docker logs - Add tests/test_smoke.py with 71 tests covering every GET route: core pages, feature pages, JSON APIs, and a parametrized no-500 sweep — catches import errors, template failures, and schema mismatches that unit tests miss Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: agentic loop for multi-step tasks + Round 10 regression fixes Agentic loop (Parts 1-4): - Add multi-step chaining instructions to system prompt - New agentic_loop.py with plan→execute→adapt→summarize flow - Register plan_and_execute tool for background task execution - Add max_agent_steps config setting (default: 10) - Discord fix: 300s timeout, typing indicator, send error handling - 16 new unit + e2e tests for agentic loop Round 10 regressions (R1-R5, P1): - R1: Fix literal \n escape sequences in tool responses - R2: Chat timeout/error feedback in agent panel - R3: /hands infinite spinner → static empty states - R4: /self-coding infinite spinner → static stats + journal - R5: /grok/status raw JSON → HTML dashboard template - P1: VETO confirmation dialog on task cards Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: briefing route 500 in CI when agno is MagicMock stub _call_agent() returned a MagicMock instead of a string when agno is stubbed in tests, causing SQLite "Error binding parameter 4" on save. Ensure the return value is always an actual string. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: briefing route 500 in CI — graceful degradation at route level When agno is stubbed with MagicMock in CI, agent.run() returns a MagicMock instead of raising — so the exception handler never fires and a MagicMock propagates as the summary to SQLite, which can't bind it. Fix: catch at the route level and return a fallback Briefing object. This follows the project's graceful degradation pattern — the briefing page always renders, even when the backend is completely unavailable. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Trip T <trip@local> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
103 lines
3.5 KiB
Python
103 lines
3.5 KiB
Python
"""E2E: verify multi-step tool chaining works end-to-end.
|
|
|
|
These tests validate the full agentic loop pipeline: planning,
|
|
execution, adaptation, and progress tracking.
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import MagicMock, patch, AsyncMock
|
|
from timmy.agentic_loop import run_agentic_loop
|
|
|
|
|
|
def _mock_run(content: str):
|
|
"""Create a mock return value for agent.run()."""
|
|
m = MagicMock()
|
|
m.content = content
|
|
return m
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multistep_chain_completes_all_steps():
|
|
"""GREEN PATH: multi-step prompt executes all steps."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.run = MagicMock(side_effect=[
|
|
_mock_run("1. Search AI news\n2. Write to file\n3. Verify"),
|
|
_mock_run("Found 5 articles about AI in March 2026."),
|
|
_mock_run("Wrote summary to /tmp/ai_news.md"),
|
|
_mock_run("File exists, 15 lines."),
|
|
_mock_run("Searched, wrote, verified."),
|
|
])
|
|
|
|
with patch("timmy.agentic_loop._get_loop_agent", return_value=mock_agent), \
|
|
patch("timmy.agentic_loop._broadcast_progress", new_callable=AsyncMock):
|
|
result = await run_agentic_loop("Search AI news and write summary to file")
|
|
|
|
assert result.status == "completed"
|
|
assert len(result.steps) == 3
|
|
assert mock_agent.run.call_count == 5 # plan + 3 steps + summary
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_multistep_chain_adapts_on_failure():
|
|
"""Step failure -> model adapts -> continues."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.run = MagicMock(side_effect=[
|
|
_mock_run("1. Read config\n2. Update setting\n3. Verify"),
|
|
_mock_run("Config: timeout=30"),
|
|
Exception("Permission denied"),
|
|
_mock_run("Adapted: wrote to ~/config.yaml instead"),
|
|
_mock_run("Verified: timeout=60"),
|
|
_mock_run("Updated config. Used ~/config.yaml due to permissions."),
|
|
])
|
|
|
|
with patch("timmy.agentic_loop._get_loop_agent", return_value=mock_agent), \
|
|
patch("timmy.agentic_loop._broadcast_progress", new_callable=AsyncMock):
|
|
result = await run_agentic_loop("Update config timeout to 60")
|
|
|
|
assert result.status == "completed"
|
|
assert any(s.status == "adapted" for s in result.steps)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_max_steps_enforced():
|
|
"""Loop stops at max_steps."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.run = MagicMock(side_effect=[
|
|
_mock_run("1. A\n2. B\n3. C\n4. D\n5. E"),
|
|
_mock_run("A done"),
|
|
_mock_run("B done"),
|
|
_mock_run("Completed 2 of 5 steps."),
|
|
])
|
|
|
|
with patch("timmy.agentic_loop._get_loop_agent", return_value=mock_agent), \
|
|
patch("timmy.agentic_loop._broadcast_progress", new_callable=AsyncMock):
|
|
result = await run_agentic_loop("Do 5 things", max_steps=2)
|
|
|
|
assert len(result.steps) == 2
|
|
assert result.status == "partial"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_progress_events_fire():
|
|
"""Progress callback fires per step."""
|
|
events = []
|
|
|
|
async def on_progress(desc, step, total):
|
|
events.append((step, total))
|
|
|
|
mock_agent = MagicMock()
|
|
mock_agent.run = MagicMock(side_effect=[
|
|
_mock_run("1. Do A\n2. Do B"),
|
|
_mock_run("A done"),
|
|
_mock_run("B done"),
|
|
_mock_run("All done"),
|
|
])
|
|
|
|
with patch("timmy.agentic_loop._get_loop_agent", return_value=mock_agent), \
|
|
patch("timmy.agentic_loop._broadcast_progress", new_callable=AsyncMock):
|
|
await run_agentic_loop("Do A and B", on_progress=on_progress)
|
|
|
|
assert len(events) == 2
|
|
assert events[0] == (1, 2)
|
|
assert events[1] == (2, 2)
|