1
0

perf: eliminate redundant LLM calls in agentic loop (#24)

Three optimizations to the agentic loop:
1. Cache loop agent as singleton (avoid repeated warmups)
2. Sliding window for step context (last 2 results, not all)
3. Replace summary LLM call with deterministic summary

Saves 1 full LLM inference call per agentic loop invocation
(30-60s on local models) and reduces context window pressure.

Also fixes pre-existing test_cli.py repl test bugs (missing result= assignment).
This commit is contained in:
2026-03-14 20:54:33 -04:00
parent bbbbdcdfa9
commit b9b78adaa2
4 changed files with 29 additions and 47 deletions

View File

@@ -28,7 +28,6 @@ async def test_multistep_chain_completes_all_steps():
_mock_run("Found 5 articles about AI in March 2026."),
_mock_run("Wrote summary to /tmp/ai_news.md"),
_mock_run("File exists, 15 lines."),
_mock_run("Searched, wrote, verified."),
]
)
@@ -40,7 +39,7 @@ async def test_multistep_chain_completes_all_steps():
assert result.status == "completed"
assert len(result.steps) == 3
assert mock_agent.run.call_count == 5 # plan + 3 steps + summary
assert mock_agent.run.call_count == 4 # plan + 3 steps
@pytest.mark.asyncio
@@ -54,7 +53,6 @@ async def test_multistep_chain_adapts_on_failure():
Exception("Permission denied"),
_mock_run("Adapted: wrote to ~/config.yaml instead"),
_mock_run("Verified: timeout=60"),
_mock_run("Updated config. Used ~/config.yaml due to permissions."),
]
)
@@ -77,7 +75,6 @@ async def test_max_steps_enforced():
_mock_run("1. A\n2. B\n3. C\n4. D\n5. E"),
_mock_run("A done"),
_mock_run("B done"),
_mock_run("Completed 2 of 5 steps."),
]
)
@@ -105,7 +102,6 @@ async def test_progress_events_fire():
_mock_run("1. Do A\n2. Do B"),
_mock_run("A done"),
_mock_run("B done"),
_mock_run("All done"),
]
)