feat: shared iteration budget across parent + subagents

Subagent tool calls now count toward the same session-wide iteration
limit as the parent agent. Previously, each subagent had its own
independent counter, so a parent with max_iterations=60 could spawn
3 subagents each doing 50 calls = 150 total tool calls unmetered.

Changes:
- IterationBudget: thread-safe shared counter (run_agent.py)
  - consume(): try to use one iteration, returns False if exhausted
  - refund(): give back one iteration (for execute_code turns)
  - Thread-safe via Lock (subagents run in ThreadPoolExecutor)
- Parent creates the budget, children inherit it via delegate_tool.py
- execute_code turns are refunded (don't count against budget)
- Default raised from 60 → 90 to account for shared consumption
- Per-child cap (50) still applies as a safety valve

The per-child max_iterations (default 50) remains as a per-child
ceiling, but the shared budget is the hard session-wide limit.
A child stops at whichever comes first.
This commit is contained in:
teknium1
2026-03-07 08:16:37 -08:00
parent 5da55ea1e3
commit 0a82396718
4 changed files with 67 additions and 7 deletions

6
cli.py
View File

@@ -169,7 +169,7 @@ def load_cli_config() -> Dict[str, Any]:
"summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries
},
"agent": {
"max_turns": 60, # Default max tool-calling iterations
"max_turns": 90, # Default max tool-calling iterations (shared with subagents)
"verbose": False,
"system_prompt": "",
"prefill_messages_file": "",
@@ -836,7 +836,7 @@ class HermesCLI:
provider: Inference provider ("auto", "openrouter", "nous", "openai-codex", "zai", "kimi-coding", "minimax", "minimax-cn")
api_key: API key (default: from environment)
base_url: API base URL (default: OpenRouter)
max_turns: Maximum tool-calling iterations (default: 60)
max_turns: Maximum tool-calling iterations shared with subagents (default: 90)
verbose: Enable verbose logging
compact: Use compact display mode
resume: Session ID to resume (restores conversation history from SQLite)
@@ -889,7 +889,7 @@ class HermesCLI:
elif os.getenv("HERMES_MAX_ITERATIONS"):
self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS"))
else:
self.max_turns = 60
self.max_turns = 90
# Parse and validate toolsets
self.enabled_toolsets = toolsets

View File

@@ -2097,7 +2097,7 @@ class GatewayRunner:
os.environ["HERMES_SESSION_KEY"] = session_key or ""
# Read from env var or use default (same as CLI)
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "60"))
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
# Map platform enum to the platform hint key the agent understands.
# Platform.LOCAL ("local") maps to "cli"; others pass through as-is.

View File

@@ -99,6 +99,46 @@ from agent.trajectory import (
)
class IterationBudget:
"""Thread-safe shared iteration counter for parent and child agents.
Tracks total LLM-call iterations consumed across a parent agent and all
its subagents. A single ``IterationBudget`` is created by the parent
and passed to every child so they share the same cap.
``execute_code`` (programmatic tool calling) iterations are refunded via
:meth:`refund` so they don't eat into the budget.
"""
def __init__(self, max_total: int):
self.max_total = max_total
self._used = 0
self._lock = threading.Lock()
def consume(self) -> bool:
"""Try to consume one iteration. Returns True if allowed."""
with self._lock:
if self._used >= self.max_total:
return False
self._used += 1
return True
def refund(self) -> None:
"""Give back one iteration (e.g. for execute_code turns)."""
with self._lock:
if self._used > 0:
self._used -= 1
@property
def used(self) -> int:
return self._used
@property
def remaining(self) -> int:
with self._lock:
return max(0, self.max_total - self._used)
class AIAgent:
"""
AI Agent with tool calling capabilities.
@@ -114,7 +154,7 @@ class AIAgent:
provider: str = None,
api_mode: str = None,
model: str = "anthropic/claude-opus-4.6", # OpenRouter format
max_iterations: int = 60, # Default tool-calling iterations
max_iterations: int = 90, # Default tool-calling iterations (shared with subagents)
tool_delay: float = 1.0,
enabled_toolsets: List[str] = None,
disabled_toolsets: List[str] = None,
@@ -142,6 +182,7 @@ class AIAgent:
skip_memory: bool = False,
session_db=None,
honcho_session_key: str = None,
iteration_budget: "IterationBudget" = None,
):
"""
Initialize the AI Agent.
@@ -152,7 +193,7 @@ class AIAgent:
provider (str): Provider identifier (optional; used for telemetry/routing hints)
api_mode (str): API mode override: "chat_completions" or "codex_responses"
model (str): Model name to use (default: "anthropic/claude-opus-4.6")
max_iterations (int): Maximum number of tool calling iterations (default: 60)
max_iterations (int): Maximum number of tool calling iterations (default: 90)
tool_delay (float): Delay between tool calls in seconds (default: 1.0)
enabled_toolsets (List[str]): Only enable tools from these toolsets (optional)
disabled_toolsets (List[str]): Disable tools from these toolsets (optional)
@@ -186,6 +227,9 @@ class AIAgent:
"""
self.model = model
self.max_iterations = max_iterations
# Shared iteration budget — parent creates, children inherit.
# Consumed by every LLM turn across parent + all subagents.
self.iteration_budget = iteration_budget or IterationBudget(max_iterations)
self.tool_delay = tool_delay
self.save_trajectories = save_trajectories
self.verbose_logging = verbose_logging
@@ -2974,7 +3018,7 @@ class AIAgent:
# Clear any stale interrupt state at start
self.clear_interrupt()
while api_call_count < self.max_iterations:
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
# Check for interrupt request (e.g., user sent new message)
if self._interrupt_requested:
interrupted = True
@@ -2983,6 +3027,10 @@ class AIAgent:
break
api_call_count += 1
if not self.iteration_budget.consume():
if not self.quiet_mode:
print(f"\n⚠️ Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)")
break
# Fire step_callback for gateway hooks (agent:step event)
if self.step_callback is not None:
@@ -3749,6 +3797,13 @@ class AIAgent:
self._log_msg_to_db(assistant_msg)
self._execute_tool_calls(assistant_message, messages, effective_task_id)
# Refund the iteration if the ONLY tool(s) called were
# execute_code (programmatic tool calling). These are
# cheap RPC-style calls that shouldn't eat the budget.
_tc_names = {tc.function.name for tc in assistant_message.tool_calls}
if _tc_names == {"execute_code"}:
self.iteration_budget.refund()
if self.compression_enabled and self.context_compressor.should_compress():
messages, active_system_prompt = self._compress_context(

View File

@@ -194,6 +194,10 @@ def _run_single_child(
# Build progress callback to relay tool calls to parent display
child_progress_cb = _build_child_progress_callback(task_index, parent_agent, task_count)
# Share the parent's iteration budget so subagent tool calls
# count toward the session-wide limit.
shared_budget = getattr(parent_agent, "iteration_budget", None)
child = AIAgent(
base_url=parent_agent.base_url,
api_key=parent_api_key,
@@ -215,6 +219,7 @@ def _run_single_child(
providers_order=parent_agent.providers_order,
provider_sort=parent_agent.provider_sort,
tool_progress_callback=child_progress_cb,
iteration_budget=shared_budget,
)
# Set delegation depth so children can't spawn grandchildren