fix: handle Anthropic long-context tier 429 by reducing to 200k
Anthropic returns HTTP 429 'Extra usage is required for long context requests' when a Claude Max subscription doesn't include the 1M context tier. This is NOT a transient rate limit — retrying won't help. Detect this specific error before the generic rate-limit handler and: 1. Reduce context_length from 1M to 200k (the standard tier) 2. Trigger context compression to fit 3. Retry with the reduced context The reduction is session-scoped (not persisted) so it auto-recovers if the user later enables extra usage on their subscription. Fixes: Sonnet 4.6 instant rate limits on Claude Max without extra usage
This commit is contained in:
163
tests/test_long_context_tier_429.py
Normal file
163
tests/test_long_context_tier_429.py
Normal file
@@ -0,0 +1,163 @@
|
||||
"""Tests for Anthropic long-context tier 429 handling.
|
||||
|
||||
When Claude Max users without "extra usage" hit the 1M context tier,
|
||||
Anthropic returns HTTP 429 "Extra usage is required for long context
|
||||
requests." This is NOT a transient rate limit — the agent should
|
||||
reduce context_length to 200k and compress instead of retrying.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Detection logic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLongContextTierDetection:
|
||||
"""Verify the detection heuristic matches the Anthropic error."""
|
||||
|
||||
@staticmethod
|
||||
def _is_long_context_tier_error(status_code, error_msg):
|
||||
error_msg = error_msg.lower()
|
||||
return (
|
||||
status_code == 429
|
||||
and "extra usage" in error_msg
|
||||
and "long context" in error_msg
|
||||
)
|
||||
|
||||
def test_matches_anthropic_error(self):
|
||||
assert self._is_long_context_tier_error(
|
||||
429,
|
||||
"Extra usage is required for long context requests.",
|
||||
)
|
||||
|
||||
def test_matches_lowercase(self):
|
||||
assert self._is_long_context_tier_error(
|
||||
429,
|
||||
"extra usage is required for long context requests.",
|
||||
)
|
||||
|
||||
def test_rejects_normal_429(self):
|
||||
assert not self._is_long_context_tier_error(
|
||||
429,
|
||||
"Rate limit exceeded. Please retry after 30 seconds.",
|
||||
)
|
||||
|
||||
def test_rejects_wrong_status(self):
|
||||
assert not self._is_long_context_tier_error(
|
||||
400,
|
||||
"Extra usage is required for long context requests.",
|
||||
)
|
||||
|
||||
def test_rejects_partial_match(self):
|
||||
"""Both 'extra usage' AND 'long context' must be present."""
|
||||
assert not self._is_long_context_tier_error(
|
||||
429, "extra usage required"
|
||||
)
|
||||
assert not self._is_long_context_tier_error(
|
||||
429, "long context requests not supported"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Context reduction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestContextReduction:
|
||||
"""When the long-context tier error fires, context_length should
|
||||
drop to 200k and the reduced flag should be set correctly."""
|
||||
|
||||
def _make_compressor(self, context_length=1_000_000, threshold_percent=0.5):
|
||||
c = SimpleNamespace(
|
||||
context_length=context_length,
|
||||
threshold_percent=threshold_percent,
|
||||
threshold_tokens=int(context_length * threshold_percent),
|
||||
_context_probed=False,
|
||||
_context_probe_persistable=False,
|
||||
)
|
||||
return c
|
||||
|
||||
def test_reduces_1m_to_200k(self):
|
||||
comp = self._make_compressor(1_000_000)
|
||||
reduced_ctx = 200_000
|
||||
|
||||
if comp.context_length > reduced_ctx:
|
||||
comp.context_length = reduced_ctx
|
||||
comp.threshold_tokens = int(reduced_ctx * comp.threshold_percent)
|
||||
comp._context_probed = True
|
||||
comp._context_probe_persistable = False
|
||||
|
||||
assert comp.context_length == 200_000
|
||||
assert comp.threshold_tokens == 100_000
|
||||
assert comp._context_probed is True
|
||||
# Must NOT persist — subscription tier, not model capability
|
||||
assert comp._context_probe_persistable is False
|
||||
|
||||
def test_no_reduction_when_already_200k(self):
|
||||
comp = self._make_compressor(200_000)
|
||||
reduced_ctx = 200_000
|
||||
|
||||
original = comp.context_length
|
||||
if comp.context_length > reduced_ctx:
|
||||
comp.context_length = reduced_ctx
|
||||
|
||||
assert comp.context_length == original # unchanged
|
||||
|
||||
def test_no_reduction_when_below_200k(self):
|
||||
comp = self._make_compressor(128_000)
|
||||
reduced_ctx = 200_000
|
||||
|
||||
original = comp.context_length
|
||||
if comp.context_length > reduced_ctx:
|
||||
comp.context_length = reduced_ctx
|
||||
|
||||
assert comp.context_length == original # unchanged
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration: agent error handler path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAgentErrorPath:
|
||||
"""Verify the long-context 429 doesn't hit the generic rate-limit
|
||||
or client-error handlers."""
|
||||
|
||||
def test_long_context_429_not_treated_as_rate_limit(self):
|
||||
"""The error should be intercepted before the generic
|
||||
is_rate_limited check fires a fallback switch."""
|
||||
error_msg = "extra usage is required for long context requests."
|
||||
status_code = 429
|
||||
|
||||
# The long-context check fires first
|
||||
_is_long_context_tier_error = (
|
||||
status_code == 429
|
||||
and "extra usage" in error_msg
|
||||
and "long context" in error_msg
|
||||
)
|
||||
assert _is_long_context_tier_error
|
||||
|
||||
# So we never reach the generic rate-limit path
|
||||
# (in the real code, `break` exits the retry loop)
|
||||
|
||||
def test_normal_429_still_treated_as_rate_limit(self):
|
||||
"""A normal 429 should NOT match the long-context check."""
|
||||
error_msg = "rate limit exceeded"
|
||||
status_code = 429
|
||||
|
||||
_is_long_context_tier_error = (
|
||||
status_code == 429
|
||||
and "extra usage" in error_msg
|
||||
and "long context" in error_msg
|
||||
)
|
||||
assert not _is_long_context_tier_error
|
||||
|
||||
is_rate_limited = (
|
||||
status_code == 429
|
||||
or "rate limit" in error_msg
|
||||
)
|
||||
assert is_rate_limited
|
||||
Reference in New Issue
Block a user