* refactor: re-architect tests to mirror the codebase
* Update tests.yml
* fix: add missing tool_error imports after registry refactor
* fix(tests): replace patch.dict with monkeypatch to prevent env var leaks under xdist
patch.dict(os.environ) can leak TERMINAL_ENV across xdist workers,
causing test_code_execution tests to hit the Modal remote path.
* fix(tests): fix update_check and telegram xdist failures
- test_update_check: replace patch("hermes_cli.banner.os.getenv") with
monkeypatch.setenv("HERMES_HOME") — banner.py no longer imports os
directly, it uses get_hermes_home() from hermes_constants.
- test_telegram_conflict/approval_buttons: provide real exception classes
for telegram.error mock (NetworkError, TimedOut, BadRequest) so the
except clause in connect() doesn't fail with "catching classes that do
not inherit from BaseException" when xdist pollutes sys.modules.
* fix(tests): accept unavailable_models kwarg in _prompt_model_selection mock
491 lines
18 KiB
Python
491 lines
18 KiB
Python
"""Tests for Anthropic error handling in the agent retry loop.
|
|
|
|
Covers all error paths in run_agent.py's run_conversation() for api_mode=anthropic_messages:
|
|
- 429 rate limit → retried with backoff
|
|
- 529 overloaded → retried with backoff
|
|
- 400 bad request → non-retryable, immediate fail
|
|
- 401 unauthorized → credential refresh + retry
|
|
- 500 server error → retried with backoff
|
|
- "prompt is too long" → context length error triggers compression
|
|
"""
|
|
|
|
import asyncio
|
|
import sys
|
|
import types
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock, AsyncMock
|
|
|
|
import pytest
|
|
|
|
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
|
|
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
|
|
sys.modules.setdefault("fal_client", types.SimpleNamespace())
|
|
|
|
import gateway.run as gateway_run
|
|
import run_agent
|
|
from gateway.config import Platform
|
|
from gateway.session import SessionSource
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _patch_agent_bootstrap(monkeypatch):
|
|
monkeypatch.setattr(
|
|
run_agent,
|
|
"get_tool_definitions",
|
|
lambda **kwargs: [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "terminal",
|
|
"description": "Run shell commands.",
|
|
"parameters": {"type": "object", "properties": {}},
|
|
},
|
|
}
|
|
],
|
|
)
|
|
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
|
|
|
|
|
|
def _anthropic_response(text: str):
|
|
"""Simulate an Anthropic messages.create() response object."""
|
|
return SimpleNamespace(
|
|
content=[SimpleNamespace(type="text", text=text)],
|
|
stop_reason="end_turn",
|
|
usage=SimpleNamespace(input_tokens=10, output_tokens=5),
|
|
model="claude-sonnet-4-6-20250514",
|
|
)
|
|
|
|
|
|
class _RateLimitError(Exception):
|
|
"""Simulates Anthropic 429 rate limit error."""
|
|
def __init__(self):
|
|
super().__init__("Error code: 429 - Rate limit exceeded. Please retry after 30s.")
|
|
self.status_code = 429
|
|
|
|
|
|
class _OverloadedError(Exception):
|
|
"""Simulates Anthropic 529 overloaded error."""
|
|
def __init__(self):
|
|
super().__init__("Error code: 529 - API is temporarily overloaded.")
|
|
self.status_code = 529
|
|
|
|
|
|
class _BadRequestError(Exception):
|
|
"""Simulates Anthropic 400 bad request error (non-retryable)."""
|
|
def __init__(self):
|
|
super().__init__("Error code: 400 - Invalid model specified.")
|
|
self.status_code = 400
|
|
|
|
|
|
class _UnauthorizedError(Exception):
|
|
"""Simulates Anthropic 401 unauthorized error."""
|
|
def __init__(self):
|
|
super().__init__("Error code: 401 - Unauthorized. Invalid API key.")
|
|
self.status_code = 401
|
|
|
|
|
|
class _ServerError(Exception):
|
|
"""Simulates Anthropic 500 internal server error."""
|
|
def __init__(self):
|
|
super().__init__("Error code: 500 - Internal server error.")
|
|
self.status_code = 500
|
|
|
|
|
|
class _PromptTooLongError(Exception):
|
|
"""Simulates Anthropic prompt-too-long error (triggers context compression)."""
|
|
def __init__(self):
|
|
super().__init__("prompt is too long: 250000 tokens > 200000 maximum")
|
|
self.status_code = 400
|
|
|
|
|
|
class _FakeAnthropicClient:
|
|
def close(self):
|
|
pass
|
|
|
|
|
|
def _fake_build_anthropic_client(key, base_url=None):
|
|
return _FakeAnthropicClient()
|
|
|
|
|
|
def _make_agent_cls(error_cls, recover_after=None):
|
|
"""Create an AIAgent subclass that raises error_cls on API calls.
|
|
|
|
If recover_after is set, the agent succeeds after that many failures.
|
|
"""
|
|
|
|
class _Agent(run_agent.AIAgent):
|
|
def __init__(self, *args, **kwargs):
|
|
kwargs.setdefault("skip_context_files", True)
|
|
kwargs.setdefault("skip_memory", True)
|
|
kwargs.setdefault("max_iterations", 4)
|
|
super().__init__(*args, **kwargs)
|
|
self._cleanup_task_resources = lambda task_id: None
|
|
self._persist_session = lambda messages, history=None: None
|
|
self._save_trajectory = lambda messages, user_message, completed: None
|
|
self._save_session_log = lambda messages: None
|
|
|
|
def run_conversation(self, user_message, conversation_history=None, task_id=None):
|
|
calls = {"n": 0}
|
|
|
|
def _fake_api_call(api_kwargs):
|
|
calls["n"] += 1
|
|
if recover_after is not None and calls["n"] > recover_after:
|
|
return _anthropic_response("Recovered")
|
|
raise error_cls()
|
|
|
|
self._interruptible_api_call = _fake_api_call
|
|
return super().run_conversation(
|
|
user_message, conversation_history=conversation_history, task_id=task_id
|
|
)
|
|
|
|
return _Agent
|
|
|
|
|
|
def _run_with_agent(monkeypatch, agent_cls):
|
|
"""Run _run_agent through the gateway with the given agent class."""
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
monkeypatch.setattr(
|
|
"agent.anthropic_adapter.build_anthropic_client", _fake_build_anthropic_client
|
|
)
|
|
monkeypatch.setattr(run_agent, "AIAgent", agent_cls)
|
|
monkeypatch.setattr(
|
|
gateway_run,
|
|
"_resolve_runtime_agent_kwargs",
|
|
lambda: {
|
|
"provider": "anthropic",
|
|
"api_mode": "anthropic_messages",
|
|
"base_url": "https://api.anthropic.com",
|
|
"api_key": "sk-ant-api03-test-key",
|
|
},
|
|
)
|
|
monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
|
|
|
|
runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
|
|
runner.adapters = {}
|
|
runner._ephemeral_system_prompt = ""
|
|
runner._prefill_messages = []
|
|
runner._reasoning_config = None
|
|
runner._provider_routing = {}
|
|
runner._fallback_model = None
|
|
runner._running_agents = {}
|
|
runner.hooks = MagicMock()
|
|
runner.hooks.emit = AsyncMock()
|
|
runner.hooks.loaded_hooks = []
|
|
runner._session_db = None
|
|
|
|
source = SessionSource(
|
|
platform=Platform.LOCAL,
|
|
chat_id="cli",
|
|
chat_name="CLI",
|
|
chat_type="dm",
|
|
user_id="test-user-1",
|
|
)
|
|
|
|
return asyncio.run(
|
|
runner._run_agent(
|
|
message="hello",
|
|
context_prompt="",
|
|
history=[],
|
|
source=source,
|
|
session_id="test-session",
|
|
session_key="agent:main:local:dm",
|
|
)
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_429_rate_limit_is_retried_and_recovers(monkeypatch):
|
|
"""429 should be retried with backoff. First call fails, second succeeds."""
|
|
agent_cls = _make_agent_cls(_RateLimitError, recover_after=1)
|
|
result = _run_with_agent(monkeypatch, agent_cls)
|
|
assert result["final_response"] == "Recovered"
|
|
|
|
|
|
def test_529_overloaded_is_retried_and_recovers(monkeypatch):
|
|
"""529 should be retried with backoff. First call fails, second succeeds."""
|
|
agent_cls = _make_agent_cls(_OverloadedError, recover_after=1)
|
|
result = _run_with_agent(monkeypatch, agent_cls)
|
|
assert result["final_response"] == "Recovered"
|
|
|
|
|
|
def test_429_exhausts_all_retries_before_raising(monkeypatch):
|
|
"""429 must retry max_retries times, then return a failed result.
|
|
|
|
The agent no longer re-raises after exhausting retries — it returns a
|
|
result dict with the error in final_response. This changed when the
|
|
fallback-provider feature was added (the agent tries a fallback before
|
|
giving up, and returns a result dict either way).
|
|
"""
|
|
agent_cls = _make_agent_cls(_RateLimitError) # always fails
|
|
result = _run_with_agent(monkeypatch, agent_cls)
|
|
resp = str(result.get("final_response", ""))
|
|
assert "429" in resp or "retries" in resp.lower()
|
|
|
|
|
|
def test_400_bad_request_is_non_retryable(monkeypatch):
|
|
"""400 should fail immediately with only 1 API call (regression guard)."""
|
|
agent_cls = _make_agent_cls(_BadRequestError)
|
|
result = _run_with_agent(monkeypatch, agent_cls)
|
|
assert result["api_calls"] == 1
|
|
assert "400" in str(result.get("final_response", ""))
|
|
|
|
|
|
def test_500_server_error_is_retried_and_recovers(monkeypatch):
|
|
"""500 should be retried with backoff. First call fails, second succeeds."""
|
|
agent_cls = _make_agent_cls(_ServerError, recover_after=1)
|
|
result = _run_with_agent(monkeypatch, agent_cls)
|
|
assert result["final_response"] == "Recovered"
|
|
|
|
|
|
def test_401_credential_refresh_recovers(monkeypatch):
|
|
"""401 should trigger credential refresh and retry once."""
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
monkeypatch.setattr(
|
|
"agent.anthropic_adapter.build_anthropic_client", _fake_build_anthropic_client
|
|
)
|
|
monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
|
|
|
|
refresh_count = {"n": 0}
|
|
|
|
class _Auth401ThenSuccessAgent(run_agent.AIAgent):
|
|
def __init__(self, *args, **kwargs):
|
|
kwargs.setdefault("skip_context_files", True)
|
|
kwargs.setdefault("skip_memory", True)
|
|
kwargs.setdefault("max_iterations", 4)
|
|
super().__init__(*args, **kwargs)
|
|
self._cleanup_task_resources = lambda task_id: None
|
|
self._persist_session = lambda messages, history=None: None
|
|
self._save_trajectory = lambda messages, user_message, completed: None
|
|
self._save_session_log = lambda messages: None
|
|
|
|
def _try_refresh_anthropic_client_credentials(self) -> bool:
|
|
refresh_count["n"] += 1
|
|
return True # Simulate successful credential refresh
|
|
|
|
def run_conversation(self, user_message, conversation_history=None, task_id=None):
|
|
calls = {"n": 0}
|
|
|
|
def _fake_api_call(api_kwargs):
|
|
calls["n"] += 1
|
|
if calls["n"] == 1:
|
|
raise _UnauthorizedError()
|
|
return _anthropic_response("Auth refreshed")
|
|
|
|
self._interruptible_api_call = _fake_api_call
|
|
# Also patch streaming path — run_conversation now prefers
|
|
# streaming for health checking even without stream consumers.
|
|
self._interruptible_streaming_api_call = lambda api_kwargs, **kw: _fake_api_call(api_kwargs)
|
|
return super().run_conversation(
|
|
user_message, conversation_history=conversation_history, task_id=task_id
|
|
)
|
|
|
|
monkeypatch.setattr(run_agent, "AIAgent", _Auth401ThenSuccessAgent)
|
|
monkeypatch.setattr(
|
|
gateway_run,
|
|
"_resolve_runtime_agent_kwargs",
|
|
lambda: {
|
|
"provider": "anthropic",
|
|
"api_mode": "anthropic_messages",
|
|
"base_url": "https://api.anthropic.com",
|
|
"api_key": "sk-ant-api03-test-key",
|
|
},
|
|
)
|
|
|
|
runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
|
|
runner.adapters = {}
|
|
runner._ephemeral_system_prompt = ""
|
|
runner._prefill_messages = []
|
|
runner._reasoning_config = None
|
|
runner._provider_routing = {}
|
|
runner._fallback_model = None
|
|
runner._running_agents = {}
|
|
runner.hooks = MagicMock()
|
|
runner.hooks.emit = AsyncMock()
|
|
runner.hooks.loaded_hooks = []
|
|
runner._session_db = None
|
|
|
|
source = SessionSource(
|
|
platform=Platform.LOCAL, chat_id="cli", chat_name="CLI",
|
|
chat_type="dm", user_id="test-user-1",
|
|
)
|
|
|
|
result = asyncio.run(
|
|
runner._run_agent(
|
|
message="hello", context_prompt="", history=[],
|
|
source=source, session_id="session-401",
|
|
session_key="agent:main:local:dm",
|
|
)
|
|
)
|
|
|
|
assert result["final_response"] == "Auth refreshed"
|
|
assert refresh_count["n"] == 1
|
|
|
|
|
|
def test_401_refresh_fails_is_non_retryable(monkeypatch):
|
|
"""401 with failed credential refresh should be treated as non-retryable."""
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
monkeypatch.setattr(
|
|
"agent.anthropic_adapter.build_anthropic_client", _fake_build_anthropic_client
|
|
)
|
|
monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
|
|
|
|
class _Auth401AlwaysFailAgent(run_agent.AIAgent):
|
|
def __init__(self, *args, **kwargs):
|
|
kwargs.setdefault("skip_context_files", True)
|
|
kwargs.setdefault("skip_memory", True)
|
|
kwargs.setdefault("max_iterations", 4)
|
|
super().__init__(*args, **kwargs)
|
|
self._cleanup_task_resources = lambda task_id: None
|
|
self._persist_session = lambda messages, history=None: None
|
|
self._save_trajectory = lambda messages, user_message, completed: None
|
|
self._save_session_log = lambda messages: None
|
|
|
|
def _try_refresh_anthropic_client_credentials(self) -> bool:
|
|
return False # Simulate failed credential refresh
|
|
|
|
def run_conversation(self, user_message, conversation_history=None, task_id=None):
|
|
def _fake_api_call(api_kwargs):
|
|
raise _UnauthorizedError()
|
|
|
|
self._interruptible_api_call = _fake_api_call
|
|
return super().run_conversation(
|
|
user_message, conversation_history=conversation_history, task_id=task_id
|
|
)
|
|
|
|
monkeypatch.setattr(run_agent, "AIAgent", _Auth401AlwaysFailAgent)
|
|
monkeypatch.setattr(
|
|
gateway_run,
|
|
"_resolve_runtime_agent_kwargs",
|
|
lambda: {
|
|
"provider": "anthropic",
|
|
"api_mode": "anthropic_messages",
|
|
"base_url": "https://api.anthropic.com",
|
|
"api_key": "sk-ant-api03-test-key",
|
|
},
|
|
)
|
|
|
|
runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
|
|
runner.adapters = {}
|
|
runner._ephemeral_system_prompt = ""
|
|
runner._prefill_messages = []
|
|
runner._reasoning_config = None
|
|
runner._provider_routing = {}
|
|
runner._fallback_model = None
|
|
runner._running_agents = {}
|
|
runner.hooks = MagicMock()
|
|
runner.hooks.emit = AsyncMock()
|
|
runner.hooks.loaded_hooks = []
|
|
runner._session_db = None
|
|
|
|
source = SessionSource(
|
|
platform=Platform.LOCAL, chat_id="cli", chat_name="CLI",
|
|
chat_type="dm", user_id="test-user-1",
|
|
)
|
|
|
|
result = asyncio.run(
|
|
runner._run_agent(
|
|
message="hello", context_prompt="", history=[],
|
|
source=source, session_id="session-401-fail",
|
|
session_key="agent:main:local:dm",
|
|
)
|
|
)
|
|
|
|
# 401 after failed refresh → non-retryable (falls through to is_client_error)
|
|
assert result["api_calls"] == 1
|
|
assert "401" in str(result.get("final_response", "")) or "unauthorized" in str(result.get("final_response", "")).lower()
|
|
|
|
|
|
def test_prompt_too_long_triggers_compression(monkeypatch):
|
|
"""Anthropic 'prompt is too long' error should trigger context compression, not immediate fail."""
|
|
_patch_agent_bootstrap(monkeypatch)
|
|
monkeypatch.setattr(
|
|
"agent.anthropic_adapter.build_anthropic_client", _fake_build_anthropic_client
|
|
)
|
|
monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
|
|
|
|
class _PromptTooLongThenSuccessAgent(run_agent.AIAgent):
|
|
compress_called = 0
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
kwargs.setdefault("skip_context_files", True)
|
|
kwargs.setdefault("skip_memory", True)
|
|
kwargs.setdefault("max_iterations", 4)
|
|
super().__init__(*args, **kwargs)
|
|
self._cleanup_task_resources = lambda task_id: None
|
|
self._persist_session = lambda messages, history=None: None
|
|
self._save_trajectory = lambda messages, user_message, completed: None
|
|
self._save_session_log = lambda messages: None
|
|
|
|
def _compress_context(self, messages, system_message, approx_tokens=0, task_id=None):
|
|
type(self).compress_called += 1
|
|
# Simulate compression by dropping oldest non-system message
|
|
if len(messages) > 2:
|
|
compressed = [messages[0]] + messages[2:]
|
|
else:
|
|
compressed = messages
|
|
return compressed, system_message
|
|
|
|
def run_conversation(self, user_message, conversation_history=None, task_id=None):
|
|
calls = {"n": 0}
|
|
|
|
def _fake_api_call(api_kwargs):
|
|
calls["n"] += 1
|
|
if calls["n"] == 1:
|
|
raise _PromptTooLongError()
|
|
return _anthropic_response("Compressed and recovered")
|
|
|
|
self._interruptible_api_call = _fake_api_call
|
|
return super().run_conversation(
|
|
user_message, conversation_history=conversation_history, task_id=task_id
|
|
)
|
|
|
|
_PromptTooLongThenSuccessAgent.compress_called = 0
|
|
monkeypatch.setattr(run_agent, "AIAgent", _PromptTooLongThenSuccessAgent)
|
|
monkeypatch.setattr(
|
|
gateway_run,
|
|
"_resolve_runtime_agent_kwargs",
|
|
lambda: {
|
|
"provider": "anthropic",
|
|
"api_mode": "anthropic_messages",
|
|
"base_url": "https://api.anthropic.com",
|
|
"api_key": "sk-ant-api03-test-key",
|
|
},
|
|
)
|
|
|
|
runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
|
|
runner.adapters = {}
|
|
runner._ephemeral_system_prompt = ""
|
|
runner._prefill_messages = []
|
|
runner._reasoning_config = None
|
|
runner._provider_routing = {}
|
|
runner._fallback_model = None
|
|
runner._running_agents = {}
|
|
runner.hooks = MagicMock()
|
|
runner.hooks.emit = AsyncMock()
|
|
runner.hooks.loaded_hooks = []
|
|
runner._session_db = None
|
|
|
|
source = SessionSource(
|
|
platform=Platform.LOCAL, chat_id="cli", chat_name="CLI",
|
|
chat_type="dm", user_id="test-user-1",
|
|
)
|
|
|
|
result = asyncio.run(
|
|
runner._run_agent(
|
|
message="hello", context_prompt="", history=[],
|
|
source=source, session_id="session-prompt-long",
|
|
session_key="agent:main:local:dm",
|
|
)
|
|
)
|
|
|
|
assert result["final_response"] == "Compressed and recovered"
|
|
assert _PromptTooLongThenSuccessAgent.compress_called >= 1
|