forked from Rockachopa/Timmy-time-dashboard
perf: eliminate redundant LLM calls in agentic loop (#24)
Three optimizations to the agentic loop: 1. Cache loop agent as singleton (avoid repeated warmups) 2. Sliding window for step context (last 2 results, not all) 3. Replace summary LLM call with deterministic summary Saves 1 full LLM inference call per agentic loop invocation (30-60s on local models) and reduces context window pressure. Also fixes pre-existing test_cli.py repl test bugs (missing result= assignment).
This commit is contained in:
@@ -58,6 +58,8 @@ class AgenticResult:
|
||||
# Agent factory
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_loop_agent = None
|
||||
|
||||
|
||||
def _get_loop_agent():
|
||||
"""Create a fresh agent for the agentic loop.
|
||||
@@ -65,9 +67,12 @@ def _get_loop_agent():
|
||||
Returns the same type of agent as `create_timmy()` but with a
|
||||
dedicated session so it doesn't pollute the main chat history.
|
||||
"""
|
||||
from timmy.agent import create_timmy
|
||||
global _loop_agent
|
||||
if _loop_agent is None:
|
||||
from timmy.agent import create_timmy
|
||||
|
||||
return create_timmy()
|
||||
_loop_agent = create_timmy()
|
||||
return _loop_agent
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -168,11 +173,11 @@ async def run_agentic_loop(
|
||||
for i, step_desc in enumerate(steps, 1):
|
||||
step_start = time.monotonic()
|
||||
|
||||
recent = completed_results[-2:] if completed_results else []
|
||||
context = (
|
||||
f"Task: {task}\n"
|
||||
f"Plan: {plan_text}\n"
|
||||
f"Completed so far: {completed_results}\n\n"
|
||||
f"Now do step {i}: {step_desc}\n"
|
||||
f"Step {i}/{total_steps}: {step_desc}\n"
|
||||
f"Recent progress: {recent}\n\n"
|
||||
f"Execute this step and report what you did."
|
||||
)
|
||||
|
||||
@@ -273,27 +278,15 @@ async def run_agentic_loop(
|
||||
completed_results.append(f"Step {i}: FAILED")
|
||||
|
||||
# ── Phase 3: Summary ───────────────────────────────────────────────────
|
||||
summary_prompt = (
|
||||
f"Task: {task}\n"
|
||||
f"Results:\n" + "\n".join(completed_results) + "\n\n"
|
||||
"Summarise what was accomplished in 2-3 sentences."
|
||||
)
|
||||
try:
|
||||
summary_run = await asyncio.to_thread(
|
||||
agent.run,
|
||||
summary_prompt,
|
||||
stream=False,
|
||||
session_id=f"{session_id}_summary",
|
||||
)
|
||||
result.summary = (
|
||||
summary_run.content if hasattr(summary_run, "content") else str(summary_run)
|
||||
)
|
||||
from timmy.session import _clean_response
|
||||
|
||||
result.summary = _clean_response(result.summary)
|
||||
except Exception as exc:
|
||||
logger.error("Agentic loop summary failed: %s", exc)
|
||||
result.summary = f"Completed {len(result.steps)} steps."
|
||||
completed_count = sum(1 for s in result.steps if s.status == "completed")
|
||||
adapted_count = sum(1 for s in result.steps if s.status == "adapted")
|
||||
failed_count = sum(1 for s in result.steps if s.status == "failed")
|
||||
parts = [f"Completed {completed_count}/{total_steps} steps"]
|
||||
if adapted_count:
|
||||
parts.append(f"{adapted_count} adapted")
|
||||
if failed_count:
|
||||
parts.append(f"{failed_count} failed")
|
||||
result.summary = f"{task}: {', '.join(parts)}."
|
||||
|
||||
# Determine final status
|
||||
if was_truncated:
|
||||
|
||||
Reference in New Issue
Block a user