fix(anthropic): retry 429/529 errors and surface error details to users
- 429 rate limit and 529 overloaded were incorrectly treated as non-retryable client errors, causing immediate failure instead of exponential backoff retry. Users hitting Anthropic rate limits got silent failures or no response at all. - Generic "Sorry, I encountered an unexpected error" now includes error type, details, and status-specific hints (auth, rate limit, overloaded). - Failed agent with final_response=None now surfaces the actual error message instead of returning an empty response.
This commit is contained in:
480
tests/test_anthropic_error_handling.py
Normal file
480
tests/test_anthropic_error_handling.py
Normal file
@@ -0,0 +1,480 @@
|
||||
"""Tests for Anthropic error handling in the agent retry loop.
|
||||
|
||||
Covers all error paths in run_agent.py's run_conversation() for api_mode=anthropic_messages:
|
||||
- 429 rate limit → retried with backoff
|
||||
- 529 overloaded → retried with backoff
|
||||
- 400 bad request → non-retryable, immediate fail
|
||||
- 401 unauthorized → credential refresh + retry
|
||||
- 500 server error → retried with backoff
|
||||
- "prompt is too long" → context length error triggers compression
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
|
||||
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
|
||||
sys.modules.setdefault("fal_client", types.SimpleNamespace())
|
||||
|
||||
import gateway.run as gateway_run
|
||||
import run_agent
|
||||
from gateway.config import Platform
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _patch_agent_bootstrap(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
run_agent,
|
||||
"get_tool_definitions",
|
||||
lambda **kwargs: [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "terminal",
|
||||
"description": "Run shell commands.",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
|
||||
|
||||
|
||||
def _anthropic_response(text: str):
|
||||
"""Simulate an Anthropic messages.create() response object."""
|
||||
return SimpleNamespace(
|
||||
content=[SimpleNamespace(type="text", text=text)],
|
||||
stop_reason="end_turn",
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=5),
|
||||
model="claude-sonnet-4-6-20250514",
|
||||
)
|
||||
|
||||
|
||||
class _RateLimitError(Exception):
|
||||
"""Simulates Anthropic 429 rate limit error."""
|
||||
def __init__(self):
|
||||
super().__init__("Error code: 429 - Rate limit exceeded. Please retry after 30s.")
|
||||
self.status_code = 429
|
||||
|
||||
|
||||
class _OverloadedError(Exception):
|
||||
"""Simulates Anthropic 529 overloaded error."""
|
||||
def __init__(self):
|
||||
super().__init__("Error code: 529 - API is temporarily overloaded.")
|
||||
self.status_code = 529
|
||||
|
||||
|
||||
class _BadRequestError(Exception):
|
||||
"""Simulates Anthropic 400 bad request error (non-retryable)."""
|
||||
def __init__(self):
|
||||
super().__init__("Error code: 400 - Invalid model specified.")
|
||||
self.status_code = 400
|
||||
|
||||
|
||||
class _UnauthorizedError(Exception):
|
||||
"""Simulates Anthropic 401 unauthorized error."""
|
||||
def __init__(self):
|
||||
super().__init__("Error code: 401 - Unauthorized. Invalid API key.")
|
||||
self.status_code = 401
|
||||
|
||||
|
||||
class _ServerError(Exception):
|
||||
"""Simulates Anthropic 500 internal server error."""
|
||||
def __init__(self):
|
||||
super().__init__("Error code: 500 - Internal server error.")
|
||||
self.status_code = 500
|
||||
|
||||
|
||||
class _PromptTooLongError(Exception):
|
||||
"""Simulates Anthropic prompt-too-long error (triggers context compression)."""
|
||||
def __init__(self):
|
||||
super().__init__("prompt is too long: 250000 tokens > 200000 maximum")
|
||||
self.status_code = 400
|
||||
|
||||
|
||||
class _FakeAnthropicClient:
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
def _fake_build_anthropic_client(key, base_url=None):
|
||||
return _FakeAnthropicClient()
|
||||
|
||||
|
||||
def _make_agent_cls(error_cls, recover_after=None):
|
||||
"""Create an AIAgent subclass that raises error_cls on API calls.
|
||||
|
||||
If recover_after is set, the agent succeeds after that many failures.
|
||||
"""
|
||||
|
||||
class _Agent(run_agent.AIAgent):
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs.setdefault("skip_context_files", True)
|
||||
kwargs.setdefault("skip_memory", True)
|
||||
kwargs.setdefault("max_iterations", 4)
|
||||
super().__init__(*args, **kwargs)
|
||||
self._cleanup_task_resources = lambda task_id: None
|
||||
self._persist_session = lambda messages, history=None: None
|
||||
self._save_trajectory = lambda messages, user_message, completed: None
|
||||
self._save_session_log = lambda messages: None
|
||||
|
||||
def run_conversation(self, user_message, conversation_history=None, task_id=None):
|
||||
calls = {"n": 0}
|
||||
|
||||
def _fake_api_call(api_kwargs):
|
||||
calls["n"] += 1
|
||||
if recover_after is not None and calls["n"] > recover_after:
|
||||
return _anthropic_response("Recovered")
|
||||
raise error_cls()
|
||||
|
||||
self._interruptible_api_call = _fake_api_call
|
||||
return super().run_conversation(
|
||||
user_message, conversation_history=conversation_history, task_id=task_id
|
||||
)
|
||||
|
||||
return _Agent
|
||||
|
||||
|
||||
def _run_with_agent(monkeypatch, agent_cls):
|
||||
"""Run _run_agent through the gateway with the given agent class."""
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
monkeypatch.setattr(
|
||||
"agent.anthropic_adapter.build_anthropic_client", _fake_build_anthropic_client
|
||||
)
|
||||
monkeypatch.setattr(run_agent, "AIAgent", agent_cls)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"base_url": "https://api.anthropic.com",
|
||||
"api_key": "sk-ant-api03-test-key",
|
||||
},
|
||||
)
|
||||
monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
|
||||
|
||||
runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
|
||||
runner.adapters = {}
|
||||
runner._ephemeral_system_prompt = ""
|
||||
runner._prefill_messages = []
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._running_agents = {}
|
||||
runner.hooks = MagicMock()
|
||||
runner.hooks.emit = AsyncMock()
|
||||
runner.hooks.loaded_hooks = []
|
||||
runner._session_db = None
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.LOCAL,
|
||||
chat_id="cli",
|
||||
chat_name="CLI",
|
||||
chat_type="dm",
|
||||
user_id="test-user-1",
|
||||
)
|
||||
|
||||
return asyncio.run(
|
||||
runner._run_agent(
|
||||
message="hello",
|
||||
context_prompt="",
|
||||
history=[],
|
||||
source=source,
|
||||
session_id="test-session",
|
||||
session_key="agent:main:local:dm",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_429_rate_limit_is_retried_and_recovers(monkeypatch):
|
||||
"""429 should be retried with backoff. First call fails, second succeeds."""
|
||||
agent_cls = _make_agent_cls(_RateLimitError, recover_after=1)
|
||||
result = _run_with_agent(monkeypatch, agent_cls)
|
||||
assert result["final_response"] == "Recovered"
|
||||
|
||||
|
||||
def test_529_overloaded_is_retried_and_recovers(monkeypatch):
|
||||
"""529 should be retried with backoff. First call fails, second succeeds."""
|
||||
agent_cls = _make_agent_cls(_OverloadedError, recover_after=1)
|
||||
result = _run_with_agent(monkeypatch, agent_cls)
|
||||
assert result["final_response"] == "Recovered"
|
||||
|
||||
|
||||
def test_429_exhausts_all_retries_before_raising(monkeypatch):
|
||||
"""429 must retry max_retries times, not abort on first attempt."""
|
||||
agent_cls = _make_agent_cls(_RateLimitError) # always fails
|
||||
with pytest.raises(_RateLimitError):
|
||||
_run_with_agent(monkeypatch, agent_cls)
|
||||
|
||||
|
||||
def test_400_bad_request_is_non_retryable(monkeypatch):
|
||||
"""400 should fail immediately with only 1 API call (regression guard)."""
|
||||
agent_cls = _make_agent_cls(_BadRequestError)
|
||||
result = _run_with_agent(monkeypatch, agent_cls)
|
||||
assert result["api_calls"] == 1
|
||||
assert "400" in str(result.get("final_response", ""))
|
||||
|
||||
|
||||
def test_500_server_error_is_retried_and_recovers(monkeypatch):
|
||||
"""500 should be retried with backoff. First call fails, second succeeds."""
|
||||
agent_cls = _make_agent_cls(_ServerError, recover_after=1)
|
||||
result = _run_with_agent(monkeypatch, agent_cls)
|
||||
assert result["final_response"] == "Recovered"
|
||||
|
||||
|
||||
def test_401_credential_refresh_recovers(monkeypatch):
|
||||
"""401 should trigger credential refresh and retry once."""
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
monkeypatch.setattr(
|
||||
"agent.anthropic_adapter.build_anthropic_client", _fake_build_anthropic_client
|
||||
)
|
||||
monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
|
||||
|
||||
refresh_count = {"n": 0}
|
||||
|
||||
class _Auth401ThenSuccessAgent(run_agent.AIAgent):
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs.setdefault("skip_context_files", True)
|
||||
kwargs.setdefault("skip_memory", True)
|
||||
kwargs.setdefault("max_iterations", 4)
|
||||
super().__init__(*args, **kwargs)
|
||||
self._cleanup_task_resources = lambda task_id: None
|
||||
self._persist_session = lambda messages, history=None: None
|
||||
self._save_trajectory = lambda messages, user_message, completed: None
|
||||
self._save_session_log = lambda messages: None
|
||||
|
||||
def _try_refresh_anthropic_client_credentials(self) -> bool:
|
||||
refresh_count["n"] += 1
|
||||
return True # Simulate successful credential refresh
|
||||
|
||||
def run_conversation(self, user_message, conversation_history=None, task_id=None):
|
||||
calls = {"n": 0}
|
||||
|
||||
def _fake_api_call(api_kwargs):
|
||||
calls["n"] += 1
|
||||
if calls["n"] == 1:
|
||||
raise _UnauthorizedError()
|
||||
return _anthropic_response("Auth refreshed")
|
||||
|
||||
self._interruptible_api_call = _fake_api_call
|
||||
return super().run_conversation(
|
||||
user_message, conversation_history=conversation_history, task_id=task_id
|
||||
)
|
||||
|
||||
monkeypatch.setattr(run_agent, "AIAgent", _Auth401ThenSuccessAgent)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"base_url": "https://api.anthropic.com",
|
||||
"api_key": "sk-ant-api03-test-key",
|
||||
},
|
||||
)
|
||||
|
||||
runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
|
||||
runner.adapters = {}
|
||||
runner._ephemeral_system_prompt = ""
|
||||
runner._prefill_messages = []
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._running_agents = {}
|
||||
runner.hooks = MagicMock()
|
||||
runner.hooks.emit = AsyncMock()
|
||||
runner.hooks.loaded_hooks = []
|
||||
runner._session_db = None
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.LOCAL, chat_id="cli", chat_name="CLI",
|
||||
chat_type="dm", user_id="test-user-1",
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
runner._run_agent(
|
||||
message="hello", context_prompt="", history=[],
|
||||
source=source, session_id="session-401",
|
||||
session_key="agent:main:local:dm",
|
||||
)
|
||||
)
|
||||
|
||||
assert result["final_response"] == "Auth refreshed"
|
||||
assert refresh_count["n"] == 1
|
||||
|
||||
|
||||
def test_401_refresh_fails_is_non_retryable(monkeypatch):
|
||||
"""401 with failed credential refresh should be treated as non-retryable."""
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
monkeypatch.setattr(
|
||||
"agent.anthropic_adapter.build_anthropic_client", _fake_build_anthropic_client
|
||||
)
|
||||
monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
|
||||
|
||||
class _Auth401AlwaysFailAgent(run_agent.AIAgent):
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs.setdefault("skip_context_files", True)
|
||||
kwargs.setdefault("skip_memory", True)
|
||||
kwargs.setdefault("max_iterations", 4)
|
||||
super().__init__(*args, **kwargs)
|
||||
self._cleanup_task_resources = lambda task_id: None
|
||||
self._persist_session = lambda messages, history=None: None
|
||||
self._save_trajectory = lambda messages, user_message, completed: None
|
||||
self._save_session_log = lambda messages: None
|
||||
|
||||
def _try_refresh_anthropic_client_credentials(self) -> bool:
|
||||
return False # Simulate failed credential refresh
|
||||
|
||||
def run_conversation(self, user_message, conversation_history=None, task_id=None):
|
||||
def _fake_api_call(api_kwargs):
|
||||
raise _UnauthorizedError()
|
||||
|
||||
self._interruptible_api_call = _fake_api_call
|
||||
return super().run_conversation(
|
||||
user_message, conversation_history=conversation_history, task_id=task_id
|
||||
)
|
||||
|
||||
monkeypatch.setattr(run_agent, "AIAgent", _Auth401AlwaysFailAgent)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"base_url": "https://api.anthropic.com",
|
||||
"api_key": "sk-ant-api03-test-key",
|
||||
},
|
||||
)
|
||||
|
||||
runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
|
||||
runner.adapters = {}
|
||||
runner._ephemeral_system_prompt = ""
|
||||
runner._prefill_messages = []
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._running_agents = {}
|
||||
runner.hooks = MagicMock()
|
||||
runner.hooks.emit = AsyncMock()
|
||||
runner.hooks.loaded_hooks = []
|
||||
runner._session_db = None
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.LOCAL, chat_id="cli", chat_name="CLI",
|
||||
chat_type="dm", user_id="test-user-1",
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
runner._run_agent(
|
||||
message="hello", context_prompt="", history=[],
|
||||
source=source, session_id="session-401-fail",
|
||||
session_key="agent:main:local:dm",
|
||||
)
|
||||
)
|
||||
|
||||
# 401 after failed refresh → non-retryable (falls through to is_client_error)
|
||||
assert result["api_calls"] == 1
|
||||
assert "401" in str(result.get("final_response", "")) or "unauthorized" in str(result.get("final_response", "")).lower()
|
||||
|
||||
|
||||
def test_prompt_too_long_triggers_compression(monkeypatch):
|
||||
"""Anthropic 'prompt is too long' error should trigger context compression, not immediate fail."""
|
||||
_patch_agent_bootstrap(monkeypatch)
|
||||
monkeypatch.setattr(
|
||||
"agent.anthropic_adapter.build_anthropic_client", _fake_build_anthropic_client
|
||||
)
|
||||
monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
|
||||
|
||||
class _PromptTooLongThenSuccessAgent(run_agent.AIAgent):
|
||||
compress_called = 0
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs.setdefault("skip_context_files", True)
|
||||
kwargs.setdefault("skip_memory", True)
|
||||
kwargs.setdefault("max_iterations", 4)
|
||||
super().__init__(*args, **kwargs)
|
||||
self._cleanup_task_resources = lambda task_id: None
|
||||
self._persist_session = lambda messages, history=None: None
|
||||
self._save_trajectory = lambda messages, user_message, completed: None
|
||||
self._save_session_log = lambda messages: None
|
||||
|
||||
def _compress_context(self, messages, system_message, approx_tokens=0, task_id=None):
|
||||
type(self).compress_called += 1
|
||||
# Simulate compression by dropping oldest non-system message
|
||||
if len(messages) > 2:
|
||||
compressed = [messages[0]] + messages[2:]
|
||||
else:
|
||||
compressed = messages
|
||||
return compressed, system_message
|
||||
|
||||
def run_conversation(self, user_message, conversation_history=None, task_id=None):
|
||||
calls = {"n": 0}
|
||||
|
||||
def _fake_api_call(api_kwargs):
|
||||
calls["n"] += 1
|
||||
if calls["n"] == 1:
|
||||
raise _PromptTooLongError()
|
||||
return _anthropic_response("Compressed and recovered")
|
||||
|
||||
self._interruptible_api_call = _fake_api_call
|
||||
return super().run_conversation(
|
||||
user_message, conversation_history=conversation_history, task_id=task_id
|
||||
)
|
||||
|
||||
_PromptTooLongThenSuccessAgent.compress_called = 0
|
||||
monkeypatch.setattr(run_agent, "AIAgent", _PromptTooLongThenSuccessAgent)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"base_url": "https://api.anthropic.com",
|
||||
"api_key": "sk-ant-api03-test-key",
|
||||
},
|
||||
)
|
||||
|
||||
runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
|
||||
runner.adapters = {}
|
||||
runner._ephemeral_system_prompt = ""
|
||||
runner._prefill_messages = []
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._running_agents = {}
|
||||
runner.hooks = MagicMock()
|
||||
runner.hooks.emit = AsyncMock()
|
||||
runner.hooks.loaded_hooks = []
|
||||
runner._session_db = None
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.LOCAL, chat_id="cli", chat_name="CLI",
|
||||
chat_type="dm", user_id="test-user-1",
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
runner._run_agent(
|
||||
message="hello", context_prompt="", history=[],
|
||||
source=source, session_id="session-prompt-long",
|
||||
session_key="agent:main:local:dm",
|
||||
)
|
||||
)
|
||||
|
||||
assert result["final_response"] == "Compressed and recovered"
|
||||
assert _PromptTooLongThenSuccessAgent.compress_called >= 1
|
||||
Reference in New Issue
Block a user