* refactor: re-architect tests to mirror the codebase
* Update tests.yml
* fix: add missing tool_error imports after registry refactor
* fix(tests): replace patch.dict with monkeypatch to prevent env var leaks under xdist
patch.dict(os.environ) can leak TERMINAL_ENV across xdist workers,
causing test_code_execution tests to hit the Modal remote path.
* fix(tests): fix update_check and telegram xdist failures
- test_update_check: replace patch("hermes_cli.banner.os.getenv") with
monkeypatch.setenv("HERMES_HOME") — banner.py no longer imports os
directly, it uses get_hermes_home() from hermes_constants.
- test_telegram_conflict/approval_buttons: provide real exception classes
for telegram.error mock (NetworkError, TimedOut, BadRequest) so the
except clause in connect() doesn't fail with "catching classes that do
not inherit from BaseException" when xdist pollutes sys.modules.
* fix(tests): accept unavailable_models kwarg in _prompt_model_selection mock
210 lines
7.1 KiB
Python
210 lines
7.1 KiB
Python
"""Tests for Anthropic Sonnet long-context tier 429 handling.
|
|
|
|
When Claude Max users without "extra usage" hit the 1M context tier
|
|
on Sonnet, Anthropic returns HTTP 429 "Extra usage is required for long
|
|
context requests." This is NOT a transient rate limit — the agent should
|
|
reduce context_length to 200k and compress instead of retrying.
|
|
|
|
Only Sonnet is affected — Opus 1M is general access.
|
|
"""
|
|
|
|
import pytest
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Detection logic
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestLongContextTierDetection:
|
|
"""Verify the detection heuristic matches the Anthropic error."""
|
|
|
|
@staticmethod
|
|
def _is_long_context_tier_error(status_code, error_msg, model="claude-sonnet-4.6"):
|
|
error_msg = error_msg.lower()
|
|
return (
|
|
status_code == 429
|
|
and "extra usage" in error_msg
|
|
and "long context" in error_msg
|
|
and "sonnet" in model.lower()
|
|
)
|
|
|
|
def test_matches_anthropic_error(self):
|
|
assert self._is_long_context_tier_error(
|
|
429,
|
|
"Extra usage is required for long context requests.",
|
|
)
|
|
|
|
def test_matches_lowercase(self):
|
|
assert self._is_long_context_tier_error(
|
|
429,
|
|
"extra usage is required for long context requests.",
|
|
)
|
|
|
|
def test_matches_openrouter_model_id(self):
|
|
assert self._is_long_context_tier_error(
|
|
429,
|
|
"Extra usage is required for long context requests.",
|
|
model="anthropic/claude-sonnet-4.6",
|
|
)
|
|
|
|
def test_matches_nous_model_id(self):
|
|
assert self._is_long_context_tier_error(
|
|
429,
|
|
"Extra usage is required for long context requests.",
|
|
model="claude-sonnet-4-6",
|
|
)
|
|
|
|
def test_rejects_opus(self):
|
|
"""Opus 1M is general access — should NOT trigger reduction."""
|
|
assert not self._is_long_context_tier_error(
|
|
429,
|
|
"Extra usage is required for long context requests.",
|
|
model="claude-opus-4.6",
|
|
)
|
|
|
|
def test_rejects_opus_openrouter(self):
|
|
assert not self._is_long_context_tier_error(
|
|
429,
|
|
"Extra usage is required for long context requests.",
|
|
model="anthropic/claude-opus-4.6",
|
|
)
|
|
|
|
def test_rejects_normal_429(self):
|
|
assert not self._is_long_context_tier_error(
|
|
429,
|
|
"Rate limit exceeded. Please retry after 30 seconds.",
|
|
)
|
|
|
|
def test_rejects_wrong_status(self):
|
|
assert not self._is_long_context_tier_error(
|
|
400,
|
|
"Extra usage is required for long context requests.",
|
|
)
|
|
|
|
def test_rejects_partial_match(self):
|
|
"""Both 'extra usage' AND 'long context' must be present."""
|
|
assert not self._is_long_context_tier_error(
|
|
429, "extra usage required"
|
|
)
|
|
assert not self._is_long_context_tier_error(
|
|
429, "long context requests not supported"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Context reduction
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestContextReduction:
|
|
"""When the long-context tier error fires, context_length should
|
|
drop to 200k and the reduced flag should be set correctly."""
|
|
|
|
def _make_compressor(self, context_length=1_000_000, threshold_percent=0.5):
|
|
c = SimpleNamespace(
|
|
context_length=context_length,
|
|
threshold_percent=threshold_percent,
|
|
threshold_tokens=int(context_length * threshold_percent),
|
|
_context_probed=False,
|
|
_context_probe_persistable=False,
|
|
)
|
|
return c
|
|
|
|
def test_reduces_1m_to_200k(self):
|
|
comp = self._make_compressor(1_000_000)
|
|
reduced_ctx = 200_000
|
|
|
|
if comp.context_length > reduced_ctx:
|
|
comp.context_length = reduced_ctx
|
|
comp.threshold_tokens = int(reduced_ctx * comp.threshold_percent)
|
|
comp._context_probed = True
|
|
comp._context_probe_persistable = False
|
|
|
|
assert comp.context_length == 200_000
|
|
assert comp.threshold_tokens == 100_000
|
|
assert comp._context_probed is True
|
|
# Must NOT persist — subscription tier, not model capability
|
|
assert comp._context_probe_persistable is False
|
|
|
|
def test_no_reduction_when_already_200k(self):
|
|
comp = self._make_compressor(200_000)
|
|
reduced_ctx = 200_000
|
|
|
|
original = comp.context_length
|
|
if comp.context_length > reduced_ctx:
|
|
comp.context_length = reduced_ctx
|
|
|
|
assert comp.context_length == original # unchanged
|
|
|
|
def test_no_reduction_when_below_200k(self):
|
|
comp = self._make_compressor(128_000)
|
|
reduced_ctx = 200_000
|
|
|
|
original = comp.context_length
|
|
if comp.context_length > reduced_ctx:
|
|
comp.context_length = reduced_ctx
|
|
|
|
assert comp.context_length == original # unchanged
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Integration: agent error handler path
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestAgentErrorPath:
|
|
"""Verify the long-context 429 doesn't hit the generic rate-limit
|
|
or client-error handlers."""
|
|
|
|
def test_long_context_429_not_treated_as_rate_limit(self):
|
|
"""The error should be intercepted before the generic
|
|
is_rate_limited check fires a fallback switch."""
|
|
error_msg = "extra usage is required for long context requests."
|
|
status_code = 429
|
|
model = "claude-sonnet-4.6"
|
|
|
|
_is_long_context_tier_error = (
|
|
status_code == 429
|
|
and "extra usage" in error_msg
|
|
and "long context" in error_msg
|
|
and "sonnet" in model.lower()
|
|
)
|
|
assert _is_long_context_tier_error
|
|
|
|
def test_opus_429_falls_through_to_rate_limit(self):
|
|
"""Opus should NOT match — falls through to generic rate-limit."""
|
|
error_msg = "extra usage is required for long context requests."
|
|
status_code = 429
|
|
model = "claude-opus-4.6"
|
|
|
|
_is_long_context_tier_error = (
|
|
status_code == 429
|
|
and "extra usage" in error_msg
|
|
and "long context" in error_msg
|
|
and "sonnet" in model.lower()
|
|
)
|
|
assert not _is_long_context_tier_error
|
|
|
|
def test_normal_429_still_treated_as_rate_limit(self):
|
|
"""A normal 429 should NOT match the long-context check."""
|
|
error_msg = "rate limit exceeded"
|
|
status_code = 429
|
|
model = "claude-sonnet-4.6"
|
|
|
|
_is_long_context_tier_error = (
|
|
status_code == 429
|
|
and "extra usage" in error_msg
|
|
and "long context" in error_msg
|
|
and "sonnet" in model.lower()
|
|
)
|
|
assert not _is_long_context_tier_error
|
|
|
|
is_rate_limited = (
|
|
status_code == 429
|
|
or "rate limit" in error_msg
|
|
)
|
|
assert is_rate_limited
|