feat: shared iteration budget across parent + subagents
Subagent tool calls now count toward the same session-wide iteration limit as the parent agent. Previously, each subagent had its own independent counter, so a parent with max_iterations=60 could spawn 3 subagents each doing 50 calls = 150 total tool calls unmetered. Changes: - IterationBudget: thread-safe shared counter (run_agent.py) - consume(): try to use one iteration, returns False if exhausted - refund(): give back one iteration (for execute_code turns) - Thread-safe via Lock (subagents run in ThreadPoolExecutor) - Parent creates the budget, children inherit it via delegate_tool.py - execute_code turns are refunded (don't count against budget) - Default raised from 60 → 90 to account for shared consumption - Per-child cap (50) still applies as a safety valve The per-child max_iterations (default 50) remains as a per-child ceiling, but the shared budget is the hard session-wide limit. A child stops at whichever comes first.
This commit is contained in:
6
cli.py
6
cli.py
@@ -169,7 +169,7 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
"summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries
|
||||
},
|
||||
"agent": {
|
||||
"max_turns": 60, # Default max tool-calling iterations
|
||||
"max_turns": 90, # Default max tool-calling iterations (shared with subagents)
|
||||
"verbose": False,
|
||||
"system_prompt": "",
|
||||
"prefill_messages_file": "",
|
||||
@@ -836,7 +836,7 @@ class HermesCLI:
|
||||
provider: Inference provider ("auto", "openrouter", "nous", "openai-codex", "zai", "kimi-coding", "minimax", "minimax-cn")
|
||||
api_key: API key (default: from environment)
|
||||
base_url: API base URL (default: OpenRouter)
|
||||
max_turns: Maximum tool-calling iterations (default: 60)
|
||||
max_turns: Maximum tool-calling iterations shared with subagents (default: 90)
|
||||
verbose: Enable verbose logging
|
||||
compact: Use compact display mode
|
||||
resume: Session ID to resume (restores conversation history from SQLite)
|
||||
@@ -889,7 +889,7 @@ class HermesCLI:
|
||||
elif os.getenv("HERMES_MAX_ITERATIONS"):
|
||||
self.max_turns = int(os.getenv("HERMES_MAX_ITERATIONS"))
|
||||
else:
|
||||
self.max_turns = 60
|
||||
self.max_turns = 90
|
||||
|
||||
# Parse and validate toolsets
|
||||
self.enabled_toolsets = toolsets
|
||||
|
||||
@@ -2097,7 +2097,7 @@ class GatewayRunner:
|
||||
os.environ["HERMES_SESSION_KEY"] = session_key or ""
|
||||
|
||||
# Read from env var or use default (same as CLI)
|
||||
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "60"))
|
||||
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
|
||||
|
||||
# Map platform enum to the platform hint key the agent understands.
|
||||
# Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
|
||||
|
||||
61
run_agent.py
61
run_agent.py
@@ -99,6 +99,46 @@ from agent.trajectory import (
|
||||
)
|
||||
|
||||
|
||||
class IterationBudget:
|
||||
"""Thread-safe shared iteration counter for parent and child agents.
|
||||
|
||||
Tracks total LLM-call iterations consumed across a parent agent and all
|
||||
its subagents. A single ``IterationBudget`` is created by the parent
|
||||
and passed to every child so they share the same cap.
|
||||
|
||||
``execute_code`` (programmatic tool calling) iterations are refunded via
|
||||
:meth:`refund` so they don't eat into the budget.
|
||||
"""
|
||||
|
||||
def __init__(self, max_total: int):
|
||||
self.max_total = max_total
|
||||
self._used = 0
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def consume(self) -> bool:
|
||||
"""Try to consume one iteration. Returns True if allowed."""
|
||||
with self._lock:
|
||||
if self._used >= self.max_total:
|
||||
return False
|
||||
self._used += 1
|
||||
return True
|
||||
|
||||
def refund(self) -> None:
|
||||
"""Give back one iteration (e.g. for execute_code turns)."""
|
||||
with self._lock:
|
||||
if self._used > 0:
|
||||
self._used -= 1
|
||||
|
||||
@property
|
||||
def used(self) -> int:
|
||||
return self._used
|
||||
|
||||
@property
|
||||
def remaining(self) -> int:
|
||||
with self._lock:
|
||||
return max(0, self.max_total - self._used)
|
||||
|
||||
|
||||
class AIAgent:
|
||||
"""
|
||||
AI Agent with tool calling capabilities.
|
||||
@@ -114,7 +154,7 @@ class AIAgent:
|
||||
provider: str = None,
|
||||
api_mode: str = None,
|
||||
model: str = "anthropic/claude-opus-4.6", # OpenRouter format
|
||||
max_iterations: int = 60, # Default tool-calling iterations
|
||||
max_iterations: int = 90, # Default tool-calling iterations (shared with subagents)
|
||||
tool_delay: float = 1.0,
|
||||
enabled_toolsets: List[str] = None,
|
||||
disabled_toolsets: List[str] = None,
|
||||
@@ -142,6 +182,7 @@ class AIAgent:
|
||||
skip_memory: bool = False,
|
||||
session_db=None,
|
||||
honcho_session_key: str = None,
|
||||
iteration_budget: "IterationBudget" = None,
|
||||
):
|
||||
"""
|
||||
Initialize the AI Agent.
|
||||
@@ -152,7 +193,7 @@ class AIAgent:
|
||||
provider (str): Provider identifier (optional; used for telemetry/routing hints)
|
||||
api_mode (str): API mode override: "chat_completions" or "codex_responses"
|
||||
model (str): Model name to use (default: "anthropic/claude-opus-4.6")
|
||||
max_iterations (int): Maximum number of tool calling iterations (default: 60)
|
||||
max_iterations (int): Maximum number of tool calling iterations (default: 90)
|
||||
tool_delay (float): Delay between tool calls in seconds (default: 1.0)
|
||||
enabled_toolsets (List[str]): Only enable tools from these toolsets (optional)
|
||||
disabled_toolsets (List[str]): Disable tools from these toolsets (optional)
|
||||
@@ -186,6 +227,9 @@ class AIAgent:
|
||||
"""
|
||||
self.model = model
|
||||
self.max_iterations = max_iterations
|
||||
# Shared iteration budget — parent creates, children inherit.
|
||||
# Consumed by every LLM turn across parent + all subagents.
|
||||
self.iteration_budget = iteration_budget or IterationBudget(max_iterations)
|
||||
self.tool_delay = tool_delay
|
||||
self.save_trajectories = save_trajectories
|
||||
self.verbose_logging = verbose_logging
|
||||
@@ -2974,7 +3018,7 @@ class AIAgent:
|
||||
# Clear any stale interrupt state at start
|
||||
self.clear_interrupt()
|
||||
|
||||
while api_call_count < self.max_iterations:
|
||||
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
|
||||
# Check for interrupt request (e.g., user sent new message)
|
||||
if self._interrupt_requested:
|
||||
interrupted = True
|
||||
@@ -2983,6 +3027,10 @@ class AIAgent:
|
||||
break
|
||||
|
||||
api_call_count += 1
|
||||
if not self.iteration_budget.consume():
|
||||
if not self.quiet_mode:
|
||||
print(f"\n⚠️ Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)")
|
||||
break
|
||||
|
||||
# Fire step_callback for gateway hooks (agent:step event)
|
||||
if self.step_callback is not None:
|
||||
@@ -3749,6 +3797,13 @@ class AIAgent:
|
||||
self._log_msg_to_db(assistant_msg)
|
||||
|
||||
self._execute_tool_calls(assistant_message, messages, effective_task_id)
|
||||
|
||||
# Refund the iteration if the ONLY tool(s) called were
|
||||
# execute_code (programmatic tool calling). These are
|
||||
# cheap RPC-style calls that shouldn't eat the budget.
|
||||
_tc_names = {tc.function.name for tc in assistant_message.tool_calls}
|
||||
if _tc_names == {"execute_code"}:
|
||||
self.iteration_budget.refund()
|
||||
|
||||
if self.compression_enabled and self.context_compressor.should_compress():
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
|
||||
@@ -194,6 +194,10 @@ def _run_single_child(
|
||||
# Build progress callback to relay tool calls to parent display
|
||||
child_progress_cb = _build_child_progress_callback(task_index, parent_agent, task_count)
|
||||
|
||||
# Share the parent's iteration budget so subagent tool calls
|
||||
# count toward the session-wide limit.
|
||||
shared_budget = getattr(parent_agent, "iteration_budget", None)
|
||||
|
||||
child = AIAgent(
|
||||
base_url=parent_agent.base_url,
|
||||
api_key=parent_api_key,
|
||||
@@ -215,6 +219,7 @@ def _run_single_child(
|
||||
providers_order=parent_agent.providers_order,
|
||||
provider_sort=parent_agent.provider_sort,
|
||||
tool_progress_callback=child_progress_cb,
|
||||
iteration_budget=shared_budget,
|
||||
)
|
||||
|
||||
# Set delegation depth so children can't spawn grandchildren
|
||||
|
||||
Reference in New Issue
Block a user