From 79bd65034c9254bdb49d90d7177bc1fa5b706a45 Mon Sep 17 00:00:00 2001 From: tekelala Date: Fri, 27 Feb 2026 12:21:27 -0500 Subject: [PATCH] fix(agent): handle 413 payload-too-large via compression instead of aborting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 413 "Request Entity Too Large" error from the LLM API was caught by the generic 4xx handler which aborts immediately. This is wrong for 413 — it's a payload-size issue that can be resolved by compressing conversation history. - Intercept 413 before the generic 4xx block and route to _compress_context - Exclude 413 from generic is_client_error detection - Add 'request entity too large' to context-length phrases as safety net - Add tests for 413 compression behavior Co-Authored-By: Claude Opus 4.6 --- run_agent.py | 44 ++++++++- tests/test_413_compression.py | 171 ++++++++++++++++++++++++++++++++++ 2 files changed, 210 insertions(+), 5 deletions(-) create mode 100644 tests/test_413_compression.py diff --git a/run_agent.py b/run_agent.py index 1cf3808e..49131ff7 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2092,11 +2092,44 @@ class AIAgent: "interrupted": True, } + # Check for 413 payload-too-large BEFORE generic 4xx handler. + # A 413 is a payload-size error — the correct response is to + # compress history and retry, not abort immediately. + status_code = getattr(api_error, "status_code", None) + is_payload_too_large = ( + status_code == 413 + or 'request entity too large' in error_msg + or 'error code: 413' in error_msg + ) + + if is_payload_too_large: + print(f"{self.log_prefix}⚠️ Request payload too large (413) - attempting compression...") + + original_len = len(messages) + messages, active_system_prompt = self._compress_context( + messages, system_message, approx_tokens=approx_tokens + ) + + if len(messages) < original_len: + print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...") + continue # Retry with compressed messages + else: + print(f"{self.log_prefix}❌ Payload too large and cannot compress further.") + logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.") + self._persist_session(messages, conversation_history) + return { + "messages": messages, + "completed": False, + "api_calls": api_call_count, + "error": "Request payload too large (413). Cannot compress further.", + "partial": True + } + # Check for non-retryable client errors (4xx HTTP status codes). # These indicate a problem with the request itself (bad model ID, # invalid API key, forbidden, etc.) and will never succeed on retry. - status_code = getattr(api_error, "status_code", None) - is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 + # Note: 413 is excluded — it's handled above via compression. + is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413 is_client_error = is_client_status_error or any(phrase in error_msg for phrase in [ 'error code: 400', 'error code: 401', 'error code: 403', 'error code: 404', 'error code: 422', @@ -2104,7 +2137,7 @@ class AIAgent: 'invalid api key', 'invalid_api_key', 'authentication', 'unauthorized', 'forbidden', 'not found', ]) - + if is_client_error: self._dump_api_request_debug( api_kwargs, reason="non_retryable_client_error", error=api_error, @@ -2124,8 +2157,9 @@ class AIAgent: # Check for non-retryable errors (context length exceeded) is_context_length_error = any(phrase in error_msg for phrase in [ - 'context length', 'maximum context', 'token limit', - 'too many tokens', 'reduce the length', 'exceeds the limit' + 'context length', 'maximum context', 'token limit', + 'too many tokens', 'reduce the length', 'exceeds the limit', + 'request entity too large', # OpenRouter/Nous 413 safety net ]) if is_context_length_error: diff --git a/tests/test_413_compression.py b/tests/test_413_compression.py new file mode 100644 index 00000000..f6274ebf --- /dev/null +++ b/tests/test_413_compression.py @@ -0,0 +1,171 @@ +"""Tests for 413 payload-too-large → compression retry logic in AIAgent. + +Verifies that HTTP 413 errors trigger history compression and retry, +rather than being treated as non-retryable generic 4xx errors. +""" + +import uuid +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from run_agent import AIAgent + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_tool_defs(*names: str) -> list: + return [ + { + "type": "function", + "function": { + "name": n, + "description": f"{n} tool", + "parameters": {"type": "object", "properties": {}}, + }, + } + for n in names + ] + + +def _mock_response(content="Hello", finish_reason="stop", tool_calls=None, usage=None): + msg = SimpleNamespace( + content=content, + tool_calls=tool_calls, + reasoning_content=None, + reasoning=None, + ) + choice = SimpleNamespace(message=msg, finish_reason=finish_reason) + resp = SimpleNamespace(choices=[choice], model="test/model") + resp.usage = SimpleNamespace(**usage) if usage else None + return resp + + +def _make_413_error(*, use_status_code=True, message="Request entity too large"): + """Create an exception that mimics a 413 HTTP error.""" + err = Exception(message) + if use_status_code: + err.status_code = 413 + return err + + +@pytest.fixture() +def agent(): + with ( + patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + ): + a = AIAgent( + api_key="test-key-1234567890", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + a.client = MagicMock() + a._cached_system_prompt = "You are helpful." + a._use_prompt_caching = False + a.tool_delay = 0 + a.compression_enabled = False + a.save_trajectories = False + return a + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +class TestHTTP413Compression: + """413 errors should trigger compression, not abort as generic 4xx.""" + + def test_413_triggers_compression(self, agent): + """A 413 error should call _compress_context and retry, not abort.""" + # First call raises 413; second call succeeds after compression. + err_413 = _make_413_error() + ok_resp = _mock_response(content="Success after compression", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [err_413, ok_resp] + + with ( + patch.object(agent, "_compress_context") as mock_compress, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + # Compression removes messages, enabling retry + mock_compress.return_value = ( + [{"role": "user", "content": "hello"}], + "compressed prompt", + ) + result = agent.run_conversation("hello") + + mock_compress.assert_called_once() + assert result["completed"] is True + assert result["final_response"] == "Success after compression" + + def test_413_not_treated_as_generic_4xx(self, agent): + """413 must NOT hit the generic 4xx abort path; it should attempt compression.""" + err_413 = _make_413_error() + ok_resp = _mock_response(content="Recovered", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [err_413, ok_resp] + + with ( + patch.object(agent, "_compress_context") as mock_compress, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + mock_compress.return_value = ( + [{"role": "user", "content": "hello"}], + "compressed", + ) + result = agent.run_conversation("hello") + + # If 413 were treated as generic 4xx, result would have "failed": True + assert result.get("failed") is not True + assert result["completed"] is True + + def test_413_error_message_detection(self, agent): + """413 detected via error message string (no status_code attr).""" + err = _make_413_error(use_status_code=False, message="error code: 413") + ok_resp = _mock_response(content="OK", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [err, ok_resp] + + with ( + patch.object(agent, "_compress_context") as mock_compress, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + mock_compress.return_value = ( + [{"role": "user", "content": "hello"}], + "compressed", + ) + result = agent.run_conversation("hello") + + mock_compress.assert_called_once() + assert result["completed"] is True + + def test_413_cannot_compress_further(self, agent): + """When compression can't reduce messages, return partial result.""" + err_413 = _make_413_error() + agent.client.chat.completions.create.side_effect = [err_413] + + with ( + patch.object(agent, "_compress_context") as mock_compress, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + # Compression returns same number of messages → can't compress further + mock_compress.return_value = ( + [{"role": "user", "content": "hello"}], + "same prompt", + ) + result = agent.run_conversation("hello") + + assert result["completed"] is False + assert result.get("partial") is True + assert "413" in result["error"]