diff --git a/src/timmy/agentic_loop.py b/src/timmy/agentic_loop.py index df27c5dd..16f6aa48 100644 --- a/src/timmy/agentic_loop.py +++ b/src/timmy/agentic_loop.py @@ -58,6 +58,8 @@ class AgenticResult: # Agent factory # --------------------------------------------------------------------------- +_loop_agent = None + def _get_loop_agent(): """Create a fresh agent for the agentic loop. @@ -65,9 +67,12 @@ def _get_loop_agent(): Returns the same type of agent as `create_timmy()` but with a dedicated session so it doesn't pollute the main chat history. """ - from timmy.agent import create_timmy + global _loop_agent + if _loop_agent is None: + from timmy.agent import create_timmy - return create_timmy() + _loop_agent = create_timmy() + return _loop_agent # --------------------------------------------------------------------------- @@ -168,11 +173,11 @@ async def run_agentic_loop( for i, step_desc in enumerate(steps, 1): step_start = time.monotonic() + recent = completed_results[-2:] if completed_results else [] context = ( f"Task: {task}\n" - f"Plan: {plan_text}\n" - f"Completed so far: {completed_results}\n\n" - f"Now do step {i}: {step_desc}\n" + f"Step {i}/{total_steps}: {step_desc}\n" + f"Recent progress: {recent}\n\n" f"Execute this step and report what you did." ) @@ -273,27 +278,15 @@ async def run_agentic_loop( completed_results.append(f"Step {i}: FAILED") # ── Phase 3: Summary ─────────────────────────────────────────────────── - summary_prompt = ( - f"Task: {task}\n" - f"Results:\n" + "\n".join(completed_results) + "\n\n" - "Summarise what was accomplished in 2-3 sentences." - ) - try: - summary_run = await asyncio.to_thread( - agent.run, - summary_prompt, - stream=False, - session_id=f"{session_id}_summary", - ) - result.summary = ( - summary_run.content if hasattr(summary_run, "content") else str(summary_run) - ) - from timmy.session import _clean_response - - result.summary = _clean_response(result.summary) - except Exception as exc: - logger.error("Agentic loop summary failed: %s", exc) - result.summary = f"Completed {len(result.steps)} steps." + completed_count = sum(1 for s in result.steps if s.status == "completed") + adapted_count = sum(1 for s in result.steps if s.status == "adapted") + failed_count = sum(1 for s in result.steps if s.status == "failed") + parts = [f"Completed {completed_count}/{total_steps} steps"] + if adapted_count: + parts.append(f"{adapted_count} adapted") + if failed_count: + parts.append(f"{failed_count} failed") + result.summary = f"{task}: {', '.join(parts)}." # Determine final status if was_truncated: diff --git a/tests/e2e/test_agentic_chain.py b/tests/e2e/test_agentic_chain.py index debdfe42..4abe2c1e 100644 --- a/tests/e2e/test_agentic_chain.py +++ b/tests/e2e/test_agentic_chain.py @@ -28,7 +28,6 @@ async def test_multistep_chain_completes_all_steps(): _mock_run("Found 5 articles about AI in March 2026."), _mock_run("Wrote summary to /tmp/ai_news.md"), _mock_run("File exists, 15 lines."), - _mock_run("Searched, wrote, verified."), ] ) @@ -40,7 +39,7 @@ async def test_multistep_chain_completes_all_steps(): assert result.status == "completed" assert len(result.steps) == 3 - assert mock_agent.run.call_count == 5 # plan + 3 steps + summary + assert mock_agent.run.call_count == 4 # plan + 3 steps @pytest.mark.asyncio @@ -54,7 +53,6 @@ async def test_multistep_chain_adapts_on_failure(): Exception("Permission denied"), _mock_run("Adapted: wrote to ~/config.yaml instead"), _mock_run("Verified: timeout=60"), - _mock_run("Updated config. Used ~/config.yaml due to permissions."), ] ) @@ -77,7 +75,6 @@ async def test_max_steps_enforced(): _mock_run("1. A\n2. B\n3. C\n4. D\n5. E"), _mock_run("A done"), _mock_run("B done"), - _mock_run("Completed 2 of 5 steps."), ] ) @@ -105,7 +102,6 @@ async def test_progress_events_fire(): _mock_run("1. Do A\n2. Do B"), _mock_run("A done"), _mock_run("B done"), - _mock_run("All done"), ] ) diff --git a/tests/test_agentic_loop.py b/tests/test_agentic_loop.py index 8cd9a6a1..929252bf 100644 --- a/tests/test_agentic_loop.py +++ b/tests/test_agentic_loop.py @@ -59,7 +59,6 @@ async def test_planning_phase_produces_steps(): _mock_run("Found 5 articles about AI."), _mock_run("Wrote summary to /tmp/ai_news.md"), _mock_run("File verified, 15 lines."), - _mock_run("Searched, wrote, verified."), ] ) @@ -82,7 +81,6 @@ async def test_loop_executes_all_steps(): _mock_run("1. Do A\n2. Do B"), _mock_run("A done"), _mock_run("B done"), - _mock_run("All done"), ] ) @@ -92,8 +90,8 @@ async def test_loop_executes_all_steps(): ): result = await run_agentic_loop("Do A and B") - # plan + 2 steps + summary = 4 calls - assert mock_agent.run.call_count == 4 + # plan + 2 steps = 3 calls + assert mock_agent.run.call_count == 3 assert len(result.steps) == 2 @@ -106,7 +104,6 @@ async def test_loop_respects_max_steps(): _mock_run("1. A\n2. B\n3. C\n4. D\n5. E"), _mock_run("A done"), _mock_run("B done"), - _mock_run("Completed 2 of 5 steps."), ] ) @@ -131,7 +128,6 @@ async def test_failure_triggers_adaptation(): Exception("Permission denied"), _mock_run("Adapted: wrote to ~/config.yaml instead"), _mock_run("Verified: timeout=60"), - _mock_run("Updated config via alternative path."), ] ) @@ -159,7 +155,6 @@ async def test_progress_callback_fires(): _mock_run("1. Do A\n2. Do B"), _mock_run("A done"), _mock_run("B done"), - _mock_run("All done"), ] ) @@ -183,7 +178,6 @@ async def test_result_contains_step_metadata(): _mock_run("1. Search\n2. Write"), _mock_run("Found results"), _mock_run("Written to file"), - _mock_run("Done"), ] ) @@ -207,10 +201,9 @@ async def test_config_default_used(): # Return more steps than default config allows (10) steps_text = "\n".join(f"{i}. Step {i}" for i in range(1, 15)) side_effects = [_mock_run(steps_text)] - # 10 step results + summary + # 10 step results for i in range(1, 11): side_effects.append(_mock_run(f"Step {i} done")) - side_effects.append(_mock_run("Summary")) mock_agent.run = MagicMock(side_effect=side_effects) diff --git a/tests/timmy/test_cli.py b/tests/timmy/test_cli.py index bafbfdfe..83c5240a 100644 --- a/tests/timmy/test_cli.py +++ b/tests/timmy/test_cli.py @@ -229,7 +229,7 @@ def test_chat_stdin_empty_exits_with_error(): def test_repl_exits_on_ctrl_c(): """repl should exit gracefully on Ctrl+C (KeyboardInterrupt).""" with patch("builtins.input", side_effect=KeyboardInterrupt): - runner.invoke(app, ["repl"]) + result = runner.invoke(app, ["repl"]) assert result.exit_code == 0 assert "Goodbye" in result.output @@ -238,7 +238,7 @@ def test_repl_exits_on_ctrl_c(): def test_repl_exits_on_ctrl_d(): """repl should exit gracefully on Ctrl+D (EOFError).""" with patch("builtins.input", side_effect=EOFError): - runner.invoke(app, ["repl"]) + result = runner.invoke(app, ["repl"]) assert result.exit_code == 0 assert "Goodbye" in result.output @@ -247,7 +247,7 @@ def test_repl_exits_on_ctrl_d(): def test_repl_exits_on_quit_command(): """repl should exit when user types 'quit'.""" with patch("builtins.input", return_value="quit"): - runner.invoke(app, ["repl"]) + result = runner.invoke(app, ["repl"]) assert result.exit_code == 0 assert "Goodbye" in result.output @@ -256,7 +256,7 @@ def test_repl_exits_on_quit_command(): def test_repl_exits_on_exit_command(): """repl should exit when user types 'exit'.""" with patch("builtins.input", return_value="exit"): - runner.invoke(app, ["repl"]) + result = runner.invoke(app, ["repl"]) assert result.exit_code == 0 assert "Goodbye" in result.output @@ -269,7 +269,7 @@ def test_repl_sends_message_to_chat(): patch("timmy.session.chat") as mock_chat, ): mock_chat.return_value = "Hi there!" - runner.invoke(app, ["repl"]) + result = runner.invoke(app, ["repl"]) mock_chat.assert_called() assert result.exit_code == 0 @@ -309,7 +309,7 @@ def test_repl_handles_chat_errors(): patch("timmy.session.chat") as mock_chat, ): mock_chat.side_effect = Exception("Chat error") - runner.invoke(app, ["repl"]) + result = runner.invoke(app, ["repl"]) assert result.exit_code == 0 assert "Error" in result.output