diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 64927c2b6..bd63901e1 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -507,6 +507,13 @@ agent: # finish, then interrupts anything still running after this timeout. # 0 = no drain, interrupt immediately. # restart_drain_timeout: 60 + + # Max app-level retry attempts for API errors (connection drops, provider + # timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this + # to 1 if you use fallback providers and want fast failover on flaky + # primaries (default 3). The OpenAI SDK does its own low-level retries + # underneath this wrapper — this is the Hermes-level loop. + # api_max_retries: 3 # Enable verbose logging verbose: false diff --git a/hermes_cli/config.py b/hermes_cli/config.py index c78b01b15..36e478a70 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -361,6 +361,15 @@ DEFAULT_CONFIG = { # to finish, then interrupts any remaining runs after the timeout. # 0 = no drain, interrupt immediately. "restart_drain_timeout": 60, + # Max app-level retry attempts for API errors (connection drops, + # provider timeouts, 5xx, etc.) before the agent surfaces the + # failure. The OpenAI SDK already does its own low-level retries + # (max_retries=2 default) for transient network errors; this is + # the Hermes-level retry loop that wraps the whole call. Lower + # this to 1 if you use fallback providers and want fast failover + # on flaky primaries; raise it if you prefer to tolerate longer + # provider hiccups on a single provider. + "api_max_retries": 3, "service_tier": "", # Tool-use enforcement: injects system prompt guidance that tells the # model to actually call tools instead of describing intended actions. diff --git a/hermes_cli/tips.py b/hermes_cli/tips.py index 24acc15f5..0c1bebe67 100644 --- a/hermes_cli/tips.py +++ b/hermes_cli/tips.py @@ -289,6 +289,7 @@ TIPS = [ "When a provider returns HTTP 402 (payment required), the auxiliary client auto-falls back to the next one.", "agent.tool_use_enforcement steers models that describe actions instead of calling tools — auto for GPT/Codex.", "agent.restart_drain_timeout (default 60s) lets running agents finish before a gateway restart takes effect.", + "agent.api_max_retries (default 3) controls how many times the agent retries a failed API call before surfacing the error — lower it for fast fallback.", "The gateway caches AIAgent instances per session — destroying this cache breaks Anthropic prompt caching.", "Any website can expose skills via /.well-known/skills/index.json — the skills hub discovers them automatically.", "The skills audit log at ~/.hermes/skills/.hub/audit.log tracks every install and removal operation.", diff --git a/run_agent.py b/run_agent.py index 855b67a84..63b0adb42 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1548,6 +1548,17 @@ class AIAgent: _agent_section = {} self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto") + # App-level API retry count (wraps each model API call). Default 3, + # overridable via agent.api_max_retries in config.yaml. See #11616. + try: + _raw_api_retries = _agent_section.get("api_max_retries", 3) + _api_retries = int(_raw_api_retries) + if _api_retries < 1: + _api_retries = 1 # 1 = no retry (single attempt) + except (TypeError, ValueError): + _api_retries = 3 + self._api_max_retries = _api_retries + # Initialize context compressor for automatic context management # Compresses conversation when approaching model's context limit # Configuration via config.yaml (compression section) @@ -9259,7 +9270,7 @@ class AIAgent: api_start_time = time.time() retry_count = 0 - max_retries = 3 + max_retries = self._api_max_retries primary_recovery_attempted = False max_compression_attempts = 3 codex_auth_retry_attempted=False diff --git a/tests/run_agent/test_api_max_retries_config.py b/tests/run_agent/test_api_max_retries_config.py new file mode 100644 index 000000000..44e859986 --- /dev/null +++ b/tests/run_agent/test_api_max_retries_config.py @@ -0,0 +1,65 @@ +"""Tests for agent.api_max_retries config surface. + +Closes #11616 — make the hardcoded ``max_retries = 3`` in the agent's API +retry loop user-configurable so fallback-provider setups can fail over +faster on flaky primaries instead of burning ~3x180s on the same stall. +""" +from unittest.mock import MagicMock, patch + +from run_agent import AIAgent + + +def _make_agent(api_max_retries=None): + """Build an AIAgent with a mocked config.load_config that returns a + config tree containing the given agent.api_max_retries (or default).""" + cfg = {"agent": {}} + if api_max_retries is not None: + cfg["agent"]["api_max_retries"] = api_max_retries + + with patch("run_agent.OpenAI"), \ + patch("hermes_cli.config.load_config", return_value=cfg): + return AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", + model="test/model", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + +def test_default_api_max_retries_is_three(): + """No config override → legacy default of 3 retries preserved.""" + agent = _make_agent() + assert agent._api_max_retries == 3 + + +def test_api_max_retries_honors_config_override(): + """Setting agent.api_max_retries in config propagates to the agent.""" + agent = _make_agent(api_max_retries=1) + assert agent._api_max_retries == 1 + + agent2 = _make_agent(api_max_retries=5) + assert agent2._api_max_retries == 5 + + +def test_api_max_retries_clamps_below_one_to_one(): + """0 or negative values would disable the retry loop entirely + (the ``while retry_count < max_retries`` guard would never execute), + so clamp to 1 = single attempt, no retry.""" + agent = _make_agent(api_max_retries=0) + assert agent._api_max_retries == 1 + + agent2 = _make_agent(api_max_retries=-3) + assert agent2._api_max_retries == 1 + + +def test_api_max_retries_falls_back_on_invalid_value(): + """Garbage values in config don't crash agent init — fall back to 3.""" + agent = _make_agent(api_max_retries="not-a-number") + assert agent._api_max_retries == 3 + + agent2 = _make_agent(api_max_retries=None) + # None with dict.get default fires → default(3), then int(None) raises + # TypeError → except branch sets to 3. + assert agent2._api_max_retries == 3