Files
hermes-agent/tests/test_long_context_tier_429.py
Teknium 8fd9fafc84 fix: handle Anthropic Sonnet long-context tier 429 by reducing to 200k (#4747)
Anthropic returns HTTP 429 'Extra usage is required for long context
requests' when a Claude Max subscription doesn't include the 1M context
tier. This is NOT a transient rate limit — retrying won't help.

Only applies to Sonnet models (Opus 1M is general access). Detects
this specific error before the generic rate-limit handler and:
1. Reduces context_length from 1M to 200k (the standard tier)
2. Triggers context compression to fit
3. Retries with the reduced context

The reduction is session-scoped (not persisted) so it auto-recovers
if the user later enables extra usage on their subscription.

Fixes: Sonnet 4.6 instant rate limits on Claude Max without extra usage
2026-04-03 02:05:02 -07:00

210 lines
7.1 KiB
Python

"""Tests for Anthropic Sonnet long-context tier 429 handling.
When Claude Max users without "extra usage" hit the 1M context tier
on Sonnet, Anthropic returns HTTP 429 "Extra usage is required for long
context requests." This is NOT a transient rate limit — the agent should
reduce context_length to 200k and compress instead of retrying.
Only Sonnet is affected — Opus 1M is general access.
"""
import pytest
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
# ---------------------------------------------------------------------------
# Detection logic
# ---------------------------------------------------------------------------
class TestLongContextTierDetection:
"""Verify the detection heuristic matches the Anthropic error."""
@staticmethod
def _is_long_context_tier_error(status_code, error_msg, model="claude-sonnet-4.6"):
error_msg = error_msg.lower()
return (
status_code == 429
and "extra usage" in error_msg
and "long context" in error_msg
and "sonnet" in model.lower()
)
def test_matches_anthropic_error(self):
assert self._is_long_context_tier_error(
429,
"Extra usage is required for long context requests.",
)
def test_matches_lowercase(self):
assert self._is_long_context_tier_error(
429,
"extra usage is required for long context requests.",
)
def test_matches_openrouter_model_id(self):
assert self._is_long_context_tier_error(
429,
"Extra usage is required for long context requests.",
model="anthropic/claude-sonnet-4.6",
)
def test_matches_nous_model_id(self):
assert self._is_long_context_tier_error(
429,
"Extra usage is required for long context requests.",
model="claude-sonnet-4-6",
)
def test_rejects_opus(self):
"""Opus 1M is general access — should NOT trigger reduction."""
assert not self._is_long_context_tier_error(
429,
"Extra usage is required for long context requests.",
model="claude-opus-4.6",
)
def test_rejects_opus_openrouter(self):
assert not self._is_long_context_tier_error(
429,
"Extra usage is required for long context requests.",
model="anthropic/claude-opus-4.6",
)
def test_rejects_normal_429(self):
assert not self._is_long_context_tier_error(
429,
"Rate limit exceeded. Please retry after 30 seconds.",
)
def test_rejects_wrong_status(self):
assert not self._is_long_context_tier_error(
400,
"Extra usage is required for long context requests.",
)
def test_rejects_partial_match(self):
"""Both 'extra usage' AND 'long context' must be present."""
assert not self._is_long_context_tier_error(
429, "extra usage required"
)
assert not self._is_long_context_tier_error(
429, "long context requests not supported"
)
# ---------------------------------------------------------------------------
# Context reduction
# ---------------------------------------------------------------------------
class TestContextReduction:
"""When the long-context tier error fires, context_length should
drop to 200k and the reduced flag should be set correctly."""
def _make_compressor(self, context_length=1_000_000, threshold_percent=0.5):
c = SimpleNamespace(
context_length=context_length,
threshold_percent=threshold_percent,
threshold_tokens=int(context_length * threshold_percent),
_context_probed=False,
_context_probe_persistable=False,
)
return c
def test_reduces_1m_to_200k(self):
comp = self._make_compressor(1_000_000)
reduced_ctx = 200_000
if comp.context_length > reduced_ctx:
comp.context_length = reduced_ctx
comp.threshold_tokens = int(reduced_ctx * comp.threshold_percent)
comp._context_probed = True
comp._context_probe_persistable = False
assert comp.context_length == 200_000
assert comp.threshold_tokens == 100_000
assert comp._context_probed is True
# Must NOT persist — subscription tier, not model capability
assert comp._context_probe_persistable is False
def test_no_reduction_when_already_200k(self):
comp = self._make_compressor(200_000)
reduced_ctx = 200_000
original = comp.context_length
if comp.context_length > reduced_ctx:
comp.context_length = reduced_ctx
assert comp.context_length == original # unchanged
def test_no_reduction_when_below_200k(self):
comp = self._make_compressor(128_000)
reduced_ctx = 200_000
original = comp.context_length
if comp.context_length > reduced_ctx:
comp.context_length = reduced_ctx
assert comp.context_length == original # unchanged
# ---------------------------------------------------------------------------
# Integration: agent error handler path
# ---------------------------------------------------------------------------
class TestAgentErrorPath:
"""Verify the long-context 429 doesn't hit the generic rate-limit
or client-error handlers."""
def test_long_context_429_not_treated_as_rate_limit(self):
"""The error should be intercepted before the generic
is_rate_limited check fires a fallback switch."""
error_msg = "extra usage is required for long context requests."
status_code = 429
model = "claude-sonnet-4.6"
_is_long_context_tier_error = (
status_code == 429
and "extra usage" in error_msg
and "long context" in error_msg
and "sonnet" in model.lower()
)
assert _is_long_context_tier_error
def test_opus_429_falls_through_to_rate_limit(self):
"""Opus should NOT match — falls through to generic rate-limit."""
error_msg = "extra usage is required for long context requests."
status_code = 429
model = "claude-opus-4.6"
_is_long_context_tier_error = (
status_code == 429
and "extra usage" in error_msg
and "long context" in error_msg
and "sonnet" in model.lower()
)
assert not _is_long_context_tier_error
def test_normal_429_still_treated_as_rate_limit(self):
"""A normal 429 should NOT match the long-context check."""
error_msg = "rate limit exceeded"
status_code = 429
model = "claude-sonnet-4.6"
_is_long_context_tier_error = (
status_code == 429
and "extra usage" in error_msg
and "long context" in error_msg
and "sonnet" in model.lower()
)
assert not _is_long_context_tier_error
is_rate_limited = (
status_code == 429
or "rate limit" in error_msg
)
assert is_rate_limited