fix(delegate): give subagents independent iteration budgets (#3004)

Each subagent now gets its own IterationBudget instead of sharing the parent's. The per-subagent cap is controlled by delegation.max_iterations in config.yaml (default 50). Total iterations across parent + subagents can exceed the parent's max_iterations, but the user retains control via the config setting. Previously, subagents shared the parent's budget, so three parallel subagents configured for max_iterations=50 racing against a parent that already used 60 of 90 would each only get ~10 iterations. Inspired by PR #2928 (Bartok9) which identified the issue (#2873).
2026-03-25 11:29:49 -07:00
parent 65dace1b1a
commit 68ab37e891
3 changed files with 17 additions and 10 deletions
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -317,6 +317,8 @@ DEFAULT_CONFIG = {
        "provider": "",    # e.g. "openrouter" (empty = inherit parent provider + credentials)
        "base_url": "",    # direct OpenAI-compatible endpoint for subagents
        "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
+        "max_iterations": 50,  # per-subagent iteration cap (each subagent gets its own budget,
+                               # independent of the parent's max_iterations)
    },

    # Ephemeral prefill messages file — JSON list of {role, content} dicts
--- a/run_agent.py
+++ b/run_agent.py
@@ -162,11 +162,15 @@ def _install_safe_stdio() -> None:


 class IterationBudget:
-    """Thread-safe shared iteration counter for parent and child agents.
+    """Thread-safe iteration counter for an agent.

-    Tracks total LLM-call iterations consumed across a parent agent and all
-    its subagents.  A single ``IterationBudget`` is created by the parent
-    and passed to every child so they share the same cap.
+    Each agent (parent or subagent) gets its own ``IterationBudget``.
+    The parent's budget is capped at ``max_iterations`` (default 90).
+    Each subagent gets an independent budget capped at
+    ``delegation.max_iterations`` (default 50) — this means total
+    iterations across parent + subagents can exceed the parent's cap.
+    Users control the per-subagent limit via ``delegation.max_iterations``
+    in config.yaml.

    ``execute_code`` (programmatic tool calling) iterations are refunded via
    :meth:`refund` so they don't eat into the budget.
@@ -5719,7 +5723,7 @@ class AIAgent:
            api_call_count += 1
            if not self.iteration_budget.consume():
                if not self.quiet_mode:
-                    self._safe_print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)")
+                    self._safe_print(f"\n⚠️  Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)")
                break

            # Fire step_callback for gateway hooks (agent:step event)
@@ -7178,7 +7182,7 @@ class AIAgent:
            or self.iteration_budget.remaining <= 0
        ):
            if self.iteration_budget.remaining <= 0 and not self.quiet_mode:
-                print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} used, including subagents)")
+                print(f"\n⚠️  Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)")
            final_response = self._handle_max_iterations(messages, api_call_count)
        
        # Determine if conversation completed successfully
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -191,9 +191,10 @@ def _build_child_agent(
    # Build progress callback to relay tool calls to parent display
    child_progress_cb = _build_child_progress_callback(task_index, parent_agent)

-    # Share the parent's iteration budget so subagent tool calls
-    # count toward the session-wide limit.
-    shared_budget = getattr(parent_agent, "iteration_budget", None)
+    # Each subagent gets its own iteration budget capped at max_iterations
+    # (configurable via delegation.max_iterations, default 50).  This means
+    # total iterations across parent + subagents can exceed the parent's
+    # max_iterations.  The user controls the per-subagent cap in config.yaml.

    # Resolve effective credentials: config override > parent inherit
    effective_model = model or parent_agent.model
@@ -230,7 +231,7 @@ def _build_child_agent(
        providers_order=parent_agent.providers_order,
        provider_sort=parent_agent.provider_sort,
        tool_progress_callback=child_progress_cb,
-        iteration_budget=shared_budget,
+        iteration_budget=None,  # fresh budget per subagent
    )
    # Set delegation depth so children can't spawn grandchildren
    child._delegate_depth = getattr(parent_agent, '_delegate_depth', 0) + 1