feat: iteration budget pressure via tool result injection
Two-tier warning system that nudges the LLM as it approaches
max_iterations, injected into the last tool result JSON rather
than as a separate system message:
- Caution (70%): {"_budget_warning": "[BUDGET: 42/60...]"}
- Warning (90%): {"_budget_warning": "[BUDGET WARNING: 54/60...]"}
For JSON tool results, adds a _budget_warning field to the existing
dict. For plain text results, appends the warning as text.
Key properties:
- No system messages injected mid-conversation
- No changes to message structure
- Prompt cache stays valid
- Configurable thresholds (0.7 / 0.9)
- Can be disabled: _budget_pressure_enabled = False
Inspired by PR #421 (@Bartok9) and issue #414.
8 tests covering thresholds, edge cases, JSON and text injection.
This commit is contained in:
56
run_agent.py
56
run_agent.py
@@ -297,6 +297,13 @@ class AIAgent:
|
||||
self._use_prompt_caching = is_openrouter and is_claude
|
||||
self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost)
|
||||
|
||||
# Iteration budget pressure: warn the LLM as it approaches max_iterations.
|
||||
# Warnings are injected into the last tool result JSON (not as separate
|
||||
# messages) so they don't break message structure or invalidate caching.
|
||||
self._budget_caution_threshold = 0.7 # 70% — nudge to start wrapping up
|
||||
self._budget_warning_threshold = 0.9 # 90% — urgent, respond now
|
||||
self._budget_pressure_enabled = True
|
||||
|
||||
# Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
|
||||
# so tool failures, API errors, etc. are inspectable after the fact.
|
||||
from agent.redact import RedactingFormatter
|
||||
@@ -2691,7 +2698,7 @@ class AIAgent:
|
||||
|
||||
return compressed, new_system_prompt
|
||||
|
||||
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str) -> None:
|
||||
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
|
||||
"""Execute tool calls from the assistant message and append results to messages."""
|
||||
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
|
||||
# SAFETY: check interrupt BEFORE starting each tool.
|
||||
@@ -2938,6 +2945,51 @@ class AIAgent:
|
||||
if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
|
||||
time.sleep(self.tool_delay)
|
||||
|
||||
# ── Budget pressure injection ─────────────────────────────────
|
||||
# After all tool calls in this turn are processed, check if we're
|
||||
# approaching max_iterations. If so, inject a warning into the LAST
|
||||
# tool result's JSON so the LLM sees it naturally when reading results.
|
||||
budget_warning = self._get_budget_warning(api_call_count)
|
||||
if budget_warning and messages and messages[-1].get("role") == "tool":
|
||||
last_content = messages[-1]["content"]
|
||||
try:
|
||||
parsed = json.loads(last_content)
|
||||
if isinstance(parsed, dict):
|
||||
parsed["_budget_warning"] = budget_warning
|
||||
messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False)
|
||||
else:
|
||||
messages[-1]["content"] = last_content + f"\n\n{budget_warning}"
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
messages[-1]["content"] = last_content + f"\n\n{budget_warning}"
|
||||
if not self.quiet_mode:
|
||||
remaining = self.max_iterations - api_call_count
|
||||
tier = "⚠️ WARNING" if remaining <= self.max_iterations * 0.1 else "💡 CAUTION"
|
||||
print(f"{self.log_prefix}{tier}: {remaining} iterations remaining")
|
||||
|
||||
def _get_budget_warning(self, api_call_count: int) -> Optional[str]:
|
||||
"""Return a budget pressure string, or None if not yet needed.
|
||||
|
||||
Two-tier system:
|
||||
- Caution (70%): nudge to consolidate work
|
||||
- Warning (90%): urgent, must respond now
|
||||
"""
|
||||
if not self._budget_pressure_enabled or self.max_iterations <= 0:
|
||||
return None
|
||||
progress = api_call_count / self.max_iterations
|
||||
remaining = self.max_iterations - api_call_count
|
||||
if progress >= self._budget_warning_threshold:
|
||||
return (
|
||||
f"[BUDGET WARNING: Iteration {api_call_count}/{self.max_iterations}. "
|
||||
f"Only {remaining} iteration(s) left. "
|
||||
"Provide your final response NOW. No more tool calls unless absolutely critical.]"
|
||||
)
|
||||
if progress >= self._budget_caution_threshold:
|
||||
return (
|
||||
f"[BUDGET: Iteration {api_call_count}/{self.max_iterations}. "
|
||||
f"{remaining} iterations left. Start consolidating your work.]"
|
||||
)
|
||||
return None
|
||||
|
||||
def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
|
||||
"""Request a summary when max iterations are reached. Returns the final response text."""
|
||||
print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary...")
|
||||
@@ -4183,7 +4235,7 @@ class AIAgent:
|
||||
|
||||
messages.append(assistant_msg)
|
||||
|
||||
self._execute_tool_calls(assistant_message, messages, effective_task_id)
|
||||
self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
|
||||
|
||||
# Refund the iteration if the ONLY tool(s) called were
|
||||
# execute_code (programmatic tool calling). These are
|
||||
|
||||
@@ -1208,3 +1208,78 @@ class TestSystemPromptStability:
|
||||
conversation_history = []
|
||||
should_prefetch = not conversation_history
|
||||
assert should_prefetch is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Iteration budget pressure warnings
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestBudgetPressure:
|
||||
"""Budget pressure warning system (issue #414)."""
|
||||
|
||||
def test_no_warning_below_caution(self, agent):
|
||||
agent.max_iterations = 60
|
||||
assert agent._get_budget_warning(30) is None
|
||||
|
||||
def test_caution_at_70_percent(self, agent):
|
||||
agent.max_iterations = 60
|
||||
msg = agent._get_budget_warning(42)
|
||||
assert msg is not None
|
||||
assert "[BUDGET:" in msg
|
||||
assert "18 iterations left" in msg
|
||||
|
||||
def test_warning_at_90_percent(self, agent):
|
||||
agent.max_iterations = 60
|
||||
msg = agent._get_budget_warning(54)
|
||||
assert "[BUDGET WARNING:" in msg
|
||||
assert "Provide your final response NOW" in msg
|
||||
|
||||
def test_last_iteration(self, agent):
|
||||
agent.max_iterations = 60
|
||||
msg = agent._get_budget_warning(59)
|
||||
assert "1 iteration(s) left" in msg
|
||||
|
||||
def test_disabled(self, agent):
|
||||
agent.max_iterations = 60
|
||||
agent._budget_pressure_enabled = False
|
||||
assert agent._get_budget_warning(55) is None
|
||||
|
||||
def test_zero_max_iterations(self, agent):
|
||||
agent.max_iterations = 0
|
||||
assert agent._get_budget_warning(0) is None
|
||||
|
||||
def test_injects_into_json_tool_result(self, agent):
|
||||
"""Warning should be injected as _budget_warning field in JSON tool results."""
|
||||
import json
|
||||
agent.max_iterations = 10
|
||||
messages = [
|
||||
{"role": "tool", "content": json.dumps({"output": "done", "exit_code": 0}), "tool_call_id": "tc1"}
|
||||
]
|
||||
warning = agent._get_budget_warning(9)
|
||||
assert warning is not None
|
||||
# Simulate the injection logic
|
||||
last_content = messages[-1]["content"]
|
||||
parsed = json.loads(last_content)
|
||||
parsed["_budget_warning"] = warning
|
||||
messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False)
|
||||
result = json.loads(messages[-1]["content"])
|
||||
assert "_budget_warning" in result
|
||||
assert "BUDGET WARNING" in result["_budget_warning"]
|
||||
assert result["output"] == "done" # original content preserved
|
||||
|
||||
def test_appends_to_non_json_tool_result(self, agent):
|
||||
"""Warning should be appended as text for non-JSON tool results."""
|
||||
agent.max_iterations = 10
|
||||
messages = [
|
||||
{"role": "tool", "content": "plain text result", "tool_call_id": "tc1"}
|
||||
]
|
||||
warning = agent._get_budget_warning(9)
|
||||
# Simulate injection logic for non-JSON
|
||||
last_content = messages[-1]["content"]
|
||||
try:
|
||||
import json
|
||||
json.loads(last_content)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
messages[-1]["content"] = last_content + f"\n\n{warning}"
|
||||
assert "plain text result" in messages[-1]["content"]
|
||||
assert "BUDGET WARNING" in messages[-1]["content"]
|
||||
|
||||
Reference in New Issue
Block a user