188 lines
6.8 KiB
Python
188 lines
6.8 KiB
Python
"""Tests for 413 payload-too-large → compression retry logic in AIAgent.
|
|
|
|
Verifies that HTTP 413 errors trigger history compression and retry,
|
|
rather than being treated as non-retryable generic 4xx errors.
|
|
"""
|
|
|
|
import uuid
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from run_agent import AIAgent
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_tool_defs(*names: str) -> list:
|
|
return [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": n,
|
|
"description": f"{n} tool",
|
|
"parameters": {"type": "object", "properties": {}},
|
|
},
|
|
}
|
|
for n in names
|
|
]
|
|
|
|
|
|
def _mock_response(content="Hello", finish_reason="stop", tool_calls=None, usage=None):
|
|
msg = SimpleNamespace(
|
|
content=content,
|
|
tool_calls=tool_calls,
|
|
reasoning_content=None,
|
|
reasoning=None,
|
|
)
|
|
choice = SimpleNamespace(message=msg, finish_reason=finish_reason)
|
|
resp = SimpleNamespace(choices=[choice], model="test/model")
|
|
resp.usage = SimpleNamespace(**usage) if usage else None
|
|
return resp
|
|
|
|
|
|
def _make_413_error(*, use_status_code=True, message="Request entity too large"):
|
|
"""Create an exception that mimics a 413 HTTP error."""
|
|
err = Exception(message)
|
|
if use_status_code:
|
|
err.status_code = 413
|
|
return err
|
|
|
|
|
|
@pytest.fixture()
|
|
def agent():
|
|
with (
|
|
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
|
|
patch("run_agent.check_toolset_requirements", return_value={}),
|
|
patch("run_agent.OpenAI"),
|
|
):
|
|
a = AIAgent(
|
|
api_key="test-key-1234567890",
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
a.client = MagicMock()
|
|
a._cached_system_prompt = "You are helpful."
|
|
a._use_prompt_caching = False
|
|
a.tool_delay = 0
|
|
a.compression_enabled = False
|
|
a.save_trajectories = False
|
|
return a
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestHTTP413Compression:
|
|
"""413 errors should trigger compression, not abort as generic 4xx."""
|
|
|
|
def test_413_triggers_compression(self, agent):
|
|
"""A 413 error should call _compress_context and retry, not abort."""
|
|
# First call raises 413; second call succeeds after compression.
|
|
err_413 = _make_413_error()
|
|
ok_resp = _mock_response(content="Success after compression", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
|
|
|
|
# Prefill so there are multiple messages for compression to reduce
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
# Compression reduces 3 messages down to 1
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed prompt",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
mock_compress.assert_called_once()
|
|
assert result["completed"] is True
|
|
assert result["final_response"] == "Success after compression"
|
|
|
|
def test_413_not_treated_as_generic_4xx(self, agent):
|
|
"""413 must NOT hit the generic 4xx abort path; it should attempt compression."""
|
|
err_413 = _make_413_error()
|
|
ok_resp = _mock_response(content="Recovered", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err_413, ok_resp]
|
|
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
# If 413 were treated as generic 4xx, result would have "failed": True
|
|
assert result.get("failed") is not True
|
|
assert result["completed"] is True
|
|
|
|
def test_413_error_message_detection(self, agent):
|
|
"""413 detected via error message string (no status_code attr)."""
|
|
err = _make_413_error(use_status_code=False, message="error code: 413")
|
|
ok_resp = _mock_response(content="OK", finish_reason="stop")
|
|
agent.client.chat.completions.create.side_effect = [err, ok_resp]
|
|
|
|
prefill = [
|
|
{"role": "user", "content": "previous question"},
|
|
{"role": "assistant", "content": "previous answer"},
|
|
]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"compressed",
|
|
)
|
|
result = agent.run_conversation("hello", conversation_history=prefill)
|
|
|
|
mock_compress.assert_called_once()
|
|
assert result["completed"] is True
|
|
|
|
def test_413_cannot_compress_further(self, agent):
|
|
"""When compression can't reduce messages, return partial result."""
|
|
err_413 = _make_413_error()
|
|
agent.client.chat.completions.create.side_effect = [err_413]
|
|
|
|
with (
|
|
patch.object(agent, "_compress_context") as mock_compress,
|
|
patch.object(agent, "_persist_session"),
|
|
patch.object(agent, "_save_trajectory"),
|
|
patch.object(agent, "_cleanup_task_resources"),
|
|
):
|
|
# Compression returns same number of messages → can't compress further
|
|
mock_compress.return_value = (
|
|
[{"role": "user", "content": "hello"}],
|
|
"same prompt",
|
|
)
|
|
result = agent.run_conversation("hello")
|
|
|
|
assert result["completed"] is False
|
|
assert result.get("partial") is True
|
|
assert "413" in result["error"]
|