Files
hermes-agent/tests/run_agent/test_413_compression.py
Siddharth Balyan f3006ebef9 refactor(tests): re-architect tests + fix CI failures (#5946)
* refactor: re-architect tests to mirror the codebase

* Update tests.yml

* fix: add missing tool_error imports after registry refactor

* fix(tests): replace patch.dict with monkeypatch to prevent env var leaks under xdist

patch.dict(os.environ) can leak TERMINAL_ENV across xdist workers,
causing test_code_execution tests to hit the Modal remote path.

* fix(tests): fix update_check and telegram xdist failures

- test_update_check: replace patch("hermes_cli.banner.os.getenv") with
  monkeypatch.setenv("HERMES_HOME") — banner.py no longer imports os
  directly, it uses get_hermes_home() from hermes_constants.

- test_telegram_conflict/approval_buttons: provide real exception classes
  for telegram.error mock (NetworkError, TimedOut, BadRequest) so the
  except clause in connect() doesn't fail with "catching classes that do
  not inherit from BaseException" when xdist pollutes sys.modules.

* fix(tests): accept unavailable_models kwarg in _prompt_model_selection mock
2026-04-07 17:19:07 -07:00

476 lines
19 KiB
Python

"""Tests for payload/context-length → compression retry logic in AIAgent.
Verifies that:
- HTTP 413 errors trigger history compression and retry
- HTTP 400 context-length errors trigger compression (not generic 4xx abort)
- Preflight compression proactively compresses oversized sessions before API calls
"""
import pytest
#pytestmark = pytest.mark.skip(reason="Hangs in non-interactive environments")
import uuid
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
from agent.context_compressor import SUMMARY_PREFIX
from run_agent import AIAgent
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_tool_defs(*names: str) -> list:
return [
{
"type": "function",
"function": {
"name": n,
"description": f"{n} tool",
"parameters": {"type": "object", "properties": {}},
},
}
for n in names
]
def _mock_response(content="Hello", finish_reason="stop", tool_calls=None, usage=None):
msg = SimpleNamespace(
content=content,
tool_calls=tool_calls,
reasoning_content=None,
reasoning=None,
)
choice = SimpleNamespace(message=msg, finish_reason=finish_reason)
resp = SimpleNamespace(choices=[choice], model="test/model")
resp.usage = SimpleNamespace(**usage) if usage else None
return resp
def _make_413_error(*, use_status_code=True, message="Request entity too large"):
"""Create an exception that mimics a 413 HTTP error."""
err = Exception(message)
if use_status_code:
err.status_code = 413
return err
@pytest.fixture()
def agent():
with (
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
):
a = AIAgent(
api_key="test-key-1234567890",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
a.client = MagicMock()
a._cached_system_prompt = "You are helpful."
a._use_prompt_caching = False
a.tool_delay = 0
a.compression_enabled = False
a.save_trajectories = False
return a
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestHTTP413Compression:
"""413 errors should trigger compression, not abort as generic 4xx."""
def test_413_triggers_compression(self, agent):
"""A 413 error should call _compress_context and retry, not abort."""
# First call raises 413; second call succeeds after compression.
err_413 = _make_413_error()
ok_resp = _mock_response(content="Success after compression", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
# Prefill so there are multiple messages for compression to reduce
prefill = [
{"role": "user", "content": "previous question"},
{"role": "assistant", "content": "previous answer"},
]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
# Compression reduces 3 messages down to 1
mock_compress.return_value = (
[{"role": "user", "content": "hello"}],
"compressed prompt",
)
result = agent.run_conversation("hello", conversation_history=prefill)
mock_compress.assert_called_once()
assert result["completed"] is True
assert result["final_response"] == "Success after compression"
def test_413_not_treated_as_generic_4xx(self, agent):
"""413 must NOT hit the generic 4xx abort path; it should attempt compression."""
err_413 = _make_413_error()
ok_resp = _mock_response(content="Recovered", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
prefill = [
{"role": "user", "content": "previous question"},
{"role": "assistant", "content": "previous answer"},
]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
mock_compress.return_value = (
[{"role": "user", "content": "hello"}],
"compressed",
)
result = agent.run_conversation("hello", conversation_history=prefill)
# If 413 were treated as generic 4xx, result would have "failed": True
assert result.get("failed") is not True
assert result["completed"] is True
def test_413_error_message_detection(self, agent):
"""413 detected via error message string (no status_code attr)."""
err = _make_413_error(use_status_code=False, message="error code: 413")
ok_resp = _mock_response(content="OK", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [err, ok_resp]
prefill = [
{"role": "user", "content": "previous question"},
{"role": "assistant", "content": "previous answer"},
]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
mock_compress.return_value = (
[{"role": "user", "content": "hello"}],
"compressed",
)
result = agent.run_conversation("hello", conversation_history=prefill)
mock_compress.assert_called_once()
assert result["completed"] is True
def test_400_context_length_triggers_compression(self, agent):
"""A 400 with 'maximum context length' should trigger compression, not abort as generic 4xx.
OpenRouter returns HTTP 400 (not 413) for context-length errors. Before
the fix, this was caught by the generic 4xx handler which aborted
immediately — now it correctly triggers compression+retry.
"""
err_400 = Exception(
"Error code: 400 - {'error': {'message': "
"\"This endpoint's maximum context length is 204800 tokens. "
"However, you requested about 270460 tokens.\", 'code': 400}}"
)
err_400.status_code = 400
ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
prefill = [
{"role": "user", "content": "previous question"},
{"role": "assistant", "content": "previous answer"},
]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
mock_compress.return_value = (
[{"role": "user", "content": "hello"}],
"compressed prompt",
)
result = agent.run_conversation("hello", conversation_history=prefill)
mock_compress.assert_called_once()
# Must NOT have "failed": True (which would mean the generic 4xx handler caught it)
assert result.get("failed") is not True
assert result["completed"] is True
assert result["final_response"] == "Recovered after compression"
def test_400_reduce_length_triggers_compression(self, agent):
"""A 400 with 'reduce the length' should trigger compression."""
err_400 = Exception(
"Error code: 400 - Please reduce the length of the messages"
)
err_400.status_code = 400
ok_resp = _mock_response(content="OK", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
prefill = [
{"role": "user", "content": "previous question"},
{"role": "assistant", "content": "previous answer"},
]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
mock_compress.return_value = (
[{"role": "user", "content": "hello"}],
"compressed",
)
result = agent.run_conversation("hello", conversation_history=prefill)
mock_compress.assert_called_once()
assert result["completed"] is True
def test_context_length_retry_rebuilds_request_after_compression(self, agent):
"""Retry must send the compressed transcript, not the stale oversized payload."""
err_400 = Exception(
"Error code: 400 - {'error': {'message': "
"\"This endpoint's maximum context length is 128000 tokens. "
"Please reduce the length of the messages.\"}}"
)
err_400.status_code = 400
ok_resp = _mock_response(content="Recovered after real compression", finish_reason="stop")
request_payloads = []
def _side_effect(**kwargs):
request_payloads.append(kwargs)
if len(request_payloads) == 1:
raise err_400
return ok_resp
agent.client.chat.completions.create.side_effect = _side_effect
prefill = [
{"role": "user", "content": "previous question"},
{"role": "assistant", "content": "previous answer"},
]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
mock_compress.return_value = (
[{"role": "user", "content": "compressed summary"}],
"compressed prompt",
)
result = agent.run_conversation("hello", conversation_history=prefill)
assert result["completed"] is True
assert len(request_payloads) == 2
assert len(request_payloads[1]["messages"]) < len(request_payloads[0]["messages"])
assert request_payloads[1]["messages"][0] == {
"role": "system",
"content": "compressed prompt",
}
assert request_payloads[1]["messages"][1] == {
"role": "user",
"content": "compressed summary",
}
def test_413_cannot_compress_further(self, agent):
"""When compression can't reduce messages, return partial result."""
err_413 = _make_413_error()
agent.client.chat.completions.create.side_effect = [err_413]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
# Compression returns same number of messages → can't compress further
mock_compress.return_value = (
[{"role": "user", "content": "hello"}],
"same prompt",
)
result = agent.run_conversation("hello")
assert result["completed"] is False
assert result.get("partial") is True
assert "413" in result["error"]
class TestPreflightCompression:
"""Preflight compression should compress history before the first API call."""
def test_preflight_compresses_oversized_history(self, agent):
"""When loaded history exceeds the model's context threshold, compress before API call."""
agent.compression_enabled = True
# Set a small context so the history is "oversized", but large enough
# that the compressed result (2 short messages) fits in a single pass.
agent.context_compressor.context_length = 2000
agent.context_compressor.threshold_tokens = 200
# Build a history that will be large enough to trigger preflight
# (each message ~50 chars ≈ 13 tokens, 40 messages ≈ 520 tokens > 200 threshold)
big_history = []
for i in range(20):
big_history.append({"role": "user", "content": f"Message number {i} with some extra text padding"})
big_history.append({"role": "assistant", "content": f"Response number {i} with extra padding here"})
ok_resp = _mock_response(content="After preflight", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [ok_resp]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
# Simulate compression reducing messages to a small set that fits
mock_compress.return_value = (
[
{"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
{"role": "user", "content": "hello"},
],
"new system prompt",
)
result = agent.run_conversation("hello", conversation_history=big_history)
# Preflight compression should have been called BEFORE the API call
mock_compress.assert_called_once()
assert result["completed"] is True
assert result["final_response"] == "After preflight"
def test_no_preflight_when_under_threshold(self, agent):
"""When history fits within context, no preflight compression needed."""
agent.compression_enabled = True
# Large context — history easily fits
agent.context_compressor.context_length = 1000000
agent.context_compressor.threshold_tokens = 850000
small_history = [
{"role": "user", "content": "hi"},
{"role": "assistant", "content": "hello"},
]
ok_resp = _mock_response(content="No compression needed", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [ok_resp]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("hello", conversation_history=small_history)
mock_compress.assert_not_called()
assert result["completed"] is True
def test_no_preflight_when_compression_disabled(self, agent):
"""Preflight should not run when compression is disabled."""
agent.compression_enabled = False
agent.context_compressor.context_length = 100
agent.context_compressor.threshold_tokens = 85
big_history = [
{"role": "user", "content": "x" * 1000},
{"role": "assistant", "content": "y" * 1000},
] * 10
ok_resp = _mock_response(content="OK", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [ok_resp]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("hello", conversation_history=big_history)
mock_compress.assert_not_called()
class TestToolResultPreflightCompression:
"""Compression should trigger when tool results push context past the threshold."""
def test_large_tool_results_trigger_compression(self, agent):
"""When tool results push estimated tokens past threshold, compress before next call."""
agent.compression_enabled = True
agent.context_compressor.context_length = 200_000
agent.context_compressor.threshold_tokens = 130_000 # below the 135k reported usage
agent.context_compressor.last_prompt_tokens = 130_000
agent.context_compressor.last_completion_tokens = 5_000
tc = SimpleNamespace(
id="tc1", type="function",
function=SimpleNamespace(name="web_search", arguments='{"query":"test"}'),
)
tool_resp = _mock_response(
content=None, finish_reason="stop", tool_calls=[tc],
usage={"prompt_tokens": 130_000, "completion_tokens": 5_000, "total_tokens": 135_000},
)
ok_resp = _mock_response(
content="Done after compression", finish_reason="stop",
usage={"prompt_tokens": 50_000, "completion_tokens": 100, "total_tokens": 50_100},
)
agent.client.chat.completions.create.side_effect = [tool_resp, ok_resp]
large_result = "x" * 100_000
with (
patch("run_agent.handle_function_call", return_value=large_result),
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
mock_compress.return_value = (
[{"role": "user", "content": "hello"}], "compressed prompt",
)
result = agent.run_conversation("hello")
mock_compress.assert_called_once()
assert result["completed"] is True
def test_anthropic_prompt_too_long_safety_net(self, agent):
"""Anthropic 'prompt is too long' error triggers compression as safety net."""
err_400 = Exception(
"Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', "
"'message': 'prompt is too long: 233153 tokens > 200000 maximum'}}"
)
err_400.status_code = 400
ok_resp = _mock_response(content="Recovered", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
prefill = [
{"role": "user", "content": "previous"},
{"role": "assistant", "content": "answer"},
]
with (
patch.object(agent, "_compress_context") as mock_compress,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
mock_compress.return_value = (
[{"role": "user", "content": "hello"}], "compressed",
)
result = agent.run_conversation("hello", conversation_history=prefill)
mock_compress.assert_called_once()
assert result["completed"] is True