"""Tests for Anthropic Sonnet long-context tier 429 handling. When Claude Max users without "extra usage" hit the 1M context tier on Sonnet, Anthropic returns HTTP 429 "Extra usage is required for long context requests." This is NOT a transient rate limit — the agent should reduce context_length to 200k and compress instead of retrying. Only Sonnet is affected — Opus 1M is general access. """ import pytest from types import SimpleNamespace from unittest.mock import MagicMock, patch # --------------------------------------------------------------------------- # Detection logic # --------------------------------------------------------------------------- class TestLongContextTierDetection: """Verify the detection heuristic matches the Anthropic error.""" @staticmethod def _is_long_context_tier_error(status_code, error_msg, model="claude-sonnet-4.6"): error_msg = error_msg.lower() return ( status_code == 429 and "extra usage" in error_msg and "long context" in error_msg and "sonnet" in model.lower() ) def test_matches_anthropic_error(self): assert self._is_long_context_tier_error( 429, "Extra usage is required for long context requests.", ) def test_matches_lowercase(self): assert self._is_long_context_tier_error( 429, "extra usage is required for long context requests.", ) def test_matches_openrouter_model_id(self): assert self._is_long_context_tier_error( 429, "Extra usage is required for long context requests.", model="anthropic/claude-sonnet-4.6", ) def test_matches_nous_model_id(self): assert self._is_long_context_tier_error( 429, "Extra usage is required for long context requests.", model="claude-sonnet-4-6", ) def test_rejects_opus(self): """Opus 1M is general access — should NOT trigger reduction.""" assert not self._is_long_context_tier_error( 429, "Extra usage is required for long context requests.", model="claude-opus-4.6", ) def test_rejects_opus_openrouter(self): assert not self._is_long_context_tier_error( 429, "Extra usage is required for long context requests.", model="anthropic/claude-opus-4.6", ) def test_rejects_normal_429(self): assert not self._is_long_context_tier_error( 429, "Rate limit exceeded. Please retry after 30 seconds.", ) def test_rejects_wrong_status(self): assert not self._is_long_context_tier_error( 400, "Extra usage is required for long context requests.", ) def test_rejects_partial_match(self): """Both 'extra usage' AND 'long context' must be present.""" assert not self._is_long_context_tier_error( 429, "extra usage required" ) assert not self._is_long_context_tier_error( 429, "long context requests not supported" ) # --------------------------------------------------------------------------- # Context reduction # --------------------------------------------------------------------------- class TestContextReduction: """When the long-context tier error fires, context_length should drop to 200k and the reduced flag should be set correctly.""" def _make_compressor(self, context_length=1_000_000, threshold_percent=0.5): c = SimpleNamespace( context_length=context_length, threshold_percent=threshold_percent, threshold_tokens=int(context_length * threshold_percent), _context_probed=False, _context_probe_persistable=False, ) return c def test_reduces_1m_to_200k(self): comp = self._make_compressor(1_000_000) reduced_ctx = 200_000 if comp.context_length > reduced_ctx: comp.context_length = reduced_ctx comp.threshold_tokens = int(reduced_ctx * comp.threshold_percent) comp._context_probed = True comp._context_probe_persistable = False assert comp.context_length == 200_000 assert comp.threshold_tokens == 100_000 assert comp._context_probed is True # Must NOT persist — subscription tier, not model capability assert comp._context_probe_persistable is False def test_no_reduction_when_already_200k(self): comp = self._make_compressor(200_000) reduced_ctx = 200_000 original = comp.context_length if comp.context_length > reduced_ctx: comp.context_length = reduced_ctx assert comp.context_length == original # unchanged def test_no_reduction_when_below_200k(self): comp = self._make_compressor(128_000) reduced_ctx = 200_000 original = comp.context_length if comp.context_length > reduced_ctx: comp.context_length = reduced_ctx assert comp.context_length == original # unchanged # --------------------------------------------------------------------------- # Integration: agent error handler path # --------------------------------------------------------------------------- class TestAgentErrorPath: """Verify the long-context 429 doesn't hit the generic rate-limit or client-error handlers.""" def test_long_context_429_not_treated_as_rate_limit(self): """The error should be intercepted before the generic is_rate_limited check fires a fallback switch.""" error_msg = "extra usage is required for long context requests." status_code = 429 model = "claude-sonnet-4.6" _is_long_context_tier_error = ( status_code == 429 and "extra usage" in error_msg and "long context" in error_msg and "sonnet" in model.lower() ) assert _is_long_context_tier_error def test_opus_429_falls_through_to_rate_limit(self): """Opus should NOT match — falls through to generic rate-limit.""" error_msg = "extra usage is required for long context requests." status_code = 429 model = "claude-opus-4.6" _is_long_context_tier_error = ( status_code == 429 and "extra usage" in error_msg and "long context" in error_msg and "sonnet" in model.lower() ) assert not _is_long_context_tier_error def test_normal_429_still_treated_as_rate_limit(self): """A normal 429 should NOT match the long-context check.""" error_msg = "rate limit exceeded" status_code = 429 model = "claude-sonnet-4.6" _is_long_context_tier_error = ( status_code == 429 and "extra usage" in error_msg and "long context" in error_msg and "sonnet" in model.lower() ) assert not _is_long_context_tier_error is_rate_limited = ( status_code == 429 or "rate limit" in error_msg ) assert is_rate_limited