hermes-agent/tests/run_agent/test_long_context_tier_429.py

"""Tests for Anthropic Sonnet long-context tier 429 handling.

When Claude Max users without "extra usage" hit the 1M context tier
on Sonnet, Anthropic returns HTTP 429 "Extra usage is required for long
context requests."  This is NOT a transient rate limit — the agent should
reduce context_length to 200k and compress instead of retrying.

Only Sonnet is affected — Opus 1M is general access.
"""

import pytest
from types import SimpleNamespace
from unittest.mock import MagicMock, patch


# ---------------------------------------------------------------------------
# Detection logic
# ---------------------------------------------------------------------------


class TestLongContextTierDetection:
    """Verify the detection heuristic matches the Anthropic error."""

    @staticmethod
    def _is_long_context_tier_error(status_code, error_msg, model="claude-sonnet-4.6"):
        error_msg = error_msg.lower()
        return (
            status_code == 429
            and "extra usage" in error_msg
            and "long context" in error_msg
            and "sonnet" in model.lower()
        )

    def test_matches_anthropic_error(self):
        assert self._is_long_context_tier_error(
            429,
            "Extra usage is required for long context requests.",
        )

    def test_matches_lowercase(self):
        assert self._is_long_context_tier_error(
            429,
            "extra usage is required for long context requests.",
        )

    def test_matches_openrouter_model_id(self):
        assert self._is_long_context_tier_error(
            429,
            "Extra usage is required for long context requests.",
            model="anthropic/claude-sonnet-4.6",
        )

    def test_matches_nous_model_id(self):
        assert self._is_long_context_tier_error(
            429,
            "Extra usage is required for long context requests.",
            model="claude-sonnet-4-6",
        )

    def test_rejects_opus(self):
        """Opus 1M is general access — should NOT trigger reduction."""
        assert not self._is_long_context_tier_error(
            429,
            "Extra usage is required for long context requests.",
            model="claude-opus-4.6",
        )

    def test_rejects_opus_openrouter(self):
        assert not self._is_long_context_tier_error(
            429,
            "Extra usage is required for long context requests.",
            model="anthropic/claude-opus-4.6",
        )

    def test_rejects_normal_429(self):
        assert not self._is_long_context_tier_error(
            429,
            "Rate limit exceeded. Please retry after 30 seconds.",
        )

    def test_rejects_wrong_status(self):
        assert not self._is_long_context_tier_error(
            400,
            "Extra usage is required for long context requests.",
        )

    def test_rejects_partial_match(self):
        """Both 'extra usage' AND 'long context' must be present."""
        assert not self._is_long_context_tier_error(
            429, "extra usage required"
        )
        assert not self._is_long_context_tier_error(
            429, "long context requests not supported"
        )


# ---------------------------------------------------------------------------
# Context reduction
# ---------------------------------------------------------------------------


class TestContextReduction:
    """When the long-context tier error fires, context_length should
    drop to 200k and the reduced flag should be set correctly."""

    def _make_compressor(self, context_length=1_000_000, threshold_percent=0.5):
        c = SimpleNamespace(
            context_length=context_length,
            threshold_percent=threshold_percent,
            threshold_tokens=int(context_length * threshold_percent),
            _context_probed=False,
            _context_probe_persistable=False,
        )
        return c

    def test_reduces_1m_to_200k(self):
        comp = self._make_compressor(1_000_000)
        reduced_ctx = 200_000

        if comp.context_length > reduced_ctx:
            comp.context_length = reduced_ctx
            comp.threshold_tokens = int(reduced_ctx * comp.threshold_percent)
            comp._context_probed = True
            comp._context_probe_persistable = False

        assert comp.context_length == 200_000
        assert comp.threshold_tokens == 100_000
        assert comp._context_probed is True
        # Must NOT persist — subscription tier, not model capability
        assert comp._context_probe_persistable is False

    def test_no_reduction_when_already_200k(self):
        comp = self._make_compressor(200_000)
        reduced_ctx = 200_000

        original = comp.context_length
        if comp.context_length > reduced_ctx:
            comp.context_length = reduced_ctx

        assert comp.context_length == original  # unchanged

    def test_no_reduction_when_below_200k(self):
        comp = self._make_compressor(128_000)
        reduced_ctx = 200_000

        original = comp.context_length
        if comp.context_length > reduced_ctx:
            comp.context_length = reduced_ctx

        assert comp.context_length == original  # unchanged


# ---------------------------------------------------------------------------
# Integration: agent error handler path
# ---------------------------------------------------------------------------


class TestAgentErrorPath:
    """Verify the long-context 429 doesn't hit the generic rate-limit
    or client-error handlers."""

    def test_long_context_429_not_treated_as_rate_limit(self):
        """The error should be intercepted before the generic
        is_rate_limited check fires a fallback switch."""
        error_msg = "extra usage is required for long context requests."
        status_code = 429
        model = "claude-sonnet-4.6"

        _is_long_context_tier_error = (
            status_code == 429
            and "extra usage" in error_msg
            and "long context" in error_msg
            and "sonnet" in model.lower()
        )
        assert _is_long_context_tier_error

    def test_opus_429_falls_through_to_rate_limit(self):
        """Opus should NOT match — falls through to generic rate-limit."""
        error_msg = "extra usage is required for long context requests."
        status_code = 429
        model = "claude-opus-4.6"

        _is_long_context_tier_error = (
            status_code == 429
            and "extra usage" in error_msg
            and "long context" in error_msg
            and "sonnet" in model.lower()
        )
        assert not _is_long_context_tier_error

    def test_normal_429_still_treated_as_rate_limit(self):
        """A normal 429 should NOT match the long-context check."""
        error_msg = "rate limit exceeded"
        status_code = 429
        model = "claude-sonnet-4.6"

        _is_long_context_tier_error = (
            status_code == 429
            and "extra usage" in error_msg
            and "long context" in error_msg
            and "sonnet" in model.lower()
        )
        assert not _is_long_context_tier_error

        is_rate_limited = (
            status_code == 429
            or "rate limit" in error_msg
        )
        assert is_rate_limited