perf: eliminate redundant LLM calls in agentic loop (#24)

Three optimizations to the agentic loop: 1. Cache loop agent as singleton (avoid repeated warmups) 2. Sliding window for step context (last 2 results, not all) 3. Replace summary LLM call with deterministic summary Saves 1 full LLM inference call per agentic loop invocation (30-60s on local models) and reduces context window pressure. Also fixes pre-existing test_cli.py repl test bugs (missing result= assignment).
2026-03-14 20:54:33 -04:00
parent bbbbdcdfa9
commit b9b78adaa2
4 changed files with 29 additions and 47 deletions
--- a/tests/e2e/test_agentic_chain.py
+++ b/tests/e2e/test_agentic_chain.py
@@ -28,7 +28,6 @@ async def test_multistep_chain_completes_all_steps():
            _mock_run("Found 5 articles about AI in March 2026."),
            _mock_run("Wrote summary to /tmp/ai_news.md"),
            _mock_run("File exists, 15 lines."),
-            _mock_run("Searched, wrote, verified."),
        ]
    )

@@ -40,7 +39,7 @@ async def test_multistep_chain_completes_all_steps():

    assert result.status == "completed"
    assert len(result.steps) == 3
-    assert mock_agent.run.call_count == 5  # plan + 3 steps + summary
+    assert mock_agent.run.call_count == 4  # plan + 3 steps


@pytest.mark.asyncio
@@ -54,7 +53,6 @@ async def test_multistep_chain_adapts_on_failure():
            Exception("Permission denied"),
            _mock_run("Adapted: wrote to ~/config.yaml instead"),
            _mock_run("Verified: timeout=60"),
-            _mock_run("Updated config. Used ~/config.yaml due to permissions."),
        ]
    )

@@ -77,7 +75,6 @@ async def test_max_steps_enforced():
            _mock_run("1. A\n2. B\n3. C\n4. D\n5. E"),
            _mock_run("A done"),
            _mock_run("B done"),
-            _mock_run("Completed 2 of 5 steps."),
        ]
    )

@@ -105,7 +102,6 @@ async def test_progress_events_fire():
            _mock_run("1. Do A\n2. Do B"),
            _mock_run("A done"),
            _mock_run("B done"),
-            _mock_run("All done"),
        ]
    )

--- a/tests/test_agentic_loop.py
+++ b/tests/test_agentic_loop.py
@@ -59,7 +59,6 @@ async def test_planning_phase_produces_steps():
            _mock_run("Found 5 articles about AI."),
            _mock_run("Wrote summary to /tmp/ai_news.md"),
            _mock_run("File verified, 15 lines."),
-            _mock_run("Searched, wrote, verified."),
        ]
    )

@@ -82,7 +81,6 @@ async def test_loop_executes_all_steps():
            _mock_run("1. Do A\n2. Do B"),
            _mock_run("A done"),
            _mock_run("B done"),
-            _mock_run("All done"),
        ]
    )

@@ -92,8 +90,8 @@ async def test_loop_executes_all_steps():
    ):
        result = await run_agentic_loop("Do A and B")

-    # plan + 2 steps + summary = 4 calls
-    assert mock_agent.run.call_count == 4
+    # plan + 2 steps = 3 calls
+    assert mock_agent.run.call_count == 3
    assert len(result.steps) == 2


@@ -106,7 +104,6 @@ async def test_loop_respects_max_steps():
            _mock_run("1. A\n2. B\n3. C\n4. D\n5. E"),
            _mock_run("A done"),
            _mock_run("B done"),
-            _mock_run("Completed 2 of 5 steps."),
        ]
    )

@@ -131,7 +128,6 @@ async def test_failure_triggers_adaptation():
            Exception("Permission denied"),
            _mock_run("Adapted: wrote to ~/config.yaml instead"),
            _mock_run("Verified: timeout=60"),
-            _mock_run("Updated config via alternative path."),
        ]
    )

@@ -159,7 +155,6 @@ async def test_progress_callback_fires():
            _mock_run("1. Do A\n2. Do B"),
            _mock_run("A done"),
            _mock_run("B done"),
-            _mock_run("All done"),
        ]
    )

@@ -183,7 +178,6 @@ async def test_result_contains_step_metadata():
            _mock_run("1. Search\n2. Write"),
            _mock_run("Found results"),
            _mock_run("Written to file"),
-            _mock_run("Done"),
        ]
    )

@@ -207,10 +201,9 @@ async def test_config_default_used():
    # Return more steps than default config allows (10)
    steps_text = "\n".join(f"{i}. Step {i}" for i in range(1, 15))
    side_effects = [_mock_run(steps_text)]
-    # 10 step results + summary
+    # 10 step results
    for i in range(1, 11):
        side_effects.append(_mock_run(f"Step {i} done"))
-    side_effects.append(_mock_run("Summary"))

    mock_agent.run = MagicMock(side_effect=side_effects)

--- a/tests/timmy/test_cli.py
+++ b/tests/timmy/test_cli.py
@@ -229,7 +229,7 @@ def test_chat_stdin_empty_exits_with_error():
 def test_repl_exits_on_ctrl_c():
    """repl should exit gracefully on Ctrl+C (KeyboardInterrupt)."""
    with patch("builtins.input", side_effect=KeyboardInterrupt):
-        runner.invoke(app, ["repl"])
+        result = runner.invoke(app, ["repl"])

    assert result.exit_code == 0
    assert "Goodbye" in result.output
@@ -238,7 +238,7 @@ def test_repl_exits_on_ctrl_c():
 def test_repl_exits_on_ctrl_d():
    """repl should exit gracefully on Ctrl+D (EOFError)."""
    with patch("builtins.input", side_effect=EOFError):
-        runner.invoke(app, ["repl"])
+        result = runner.invoke(app, ["repl"])

    assert result.exit_code == 0
    assert "Goodbye" in result.output
@@ -247,7 +247,7 @@ def test_repl_exits_on_ctrl_d():
 def test_repl_exits_on_quit_command():
    """repl should exit when user types 'quit'."""
    with patch("builtins.input", return_value="quit"):
-        runner.invoke(app, ["repl"])
+        result = runner.invoke(app, ["repl"])

    assert result.exit_code == 0
    assert "Goodbye" in result.output
@@ -256,7 +256,7 @@ def test_repl_exits_on_quit_command():
 def test_repl_exits_on_exit_command():
    """repl should exit when user types 'exit'."""
    with patch("builtins.input", return_value="exit"):
-        runner.invoke(app, ["repl"])
+        result = runner.invoke(app, ["repl"])

    assert result.exit_code == 0
    assert "Goodbye" in result.output
@@ -269,7 +269,7 @@ def test_repl_sends_message_to_chat():
        patch("timmy.session.chat") as mock_chat,
    ):
        mock_chat.return_value = "Hi there!"
-        runner.invoke(app, ["repl"])
+        result = runner.invoke(app, ["repl"])

    mock_chat.assert_called()
    assert result.exit_code == 0
@@ -309,7 +309,7 @@ def test_repl_handles_chat_errors():
        patch("timmy.session.chat") as mock_chat,
    ):
        mock_chat.side_effect = Exception("Chat error")
-        runner.invoke(app, ["repl"])
+        result = runner.invoke(app, ["repl"])

    assert result.exit_code == 0
    assert "Error" in result.output