diff --git a/run_agent.py b/run_agent.py index 7543d1d0c..e98863f5e 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3872,6 +3872,7 @@ class AIAgent: 'token limit', 'too many tokens', 'reduce the length', 'exceeds the limit', 'context window', 'request entity too large', # OpenRouter/Nous 413 safety net + 'prompt is too long', # Anthropic: "prompt is too long: N tokens > M maximum" ]) if is_context_length_error: @@ -4256,6 +4257,7 @@ class AIAgent: messages.append(assistant_msg) + _msg_count_before_tools = len(messages) self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count) # Refund the iteration if the ONLY tool(s) called were @@ -4265,7 +4267,20 @@ class AIAgent: if _tc_names == {"execute_code"}: self.iteration_budget.refund() - if self.compression_enabled and self.context_compressor.should_compress(): + # Estimate next prompt size using real token counts from the + # last API response + rough estimate of newly appended tool + # results. This catches cases where tool results push the + # context past the limit that last_prompt_tokens alone misses + # (e.g. large file reads, web extractions). + _compressor = self.context_compressor + _new_tool_msgs = messages[_msg_count_before_tools:] + _new_chars = sum(len(str(m.get("content", "") or "")) for m in _new_tool_msgs) + _estimated_next_prompt = ( + _compressor.last_prompt_tokens + + _compressor.last_completion_tokens + + _new_chars // 3 # conservative: JSON-heavy tool results ≈ 3 chars/token + ) + if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt): messages, active_system_prompt = self._compress_context( messages, system_message, approx_tokens=self.context_compressor.last_prompt_tokens, diff --git a/tests/test_413_compression.py b/tests/test_413_compression.py index 62fee8b8e..1736bbde5 100644 --- a/tests/test_413_compression.py +++ b/tests/test_413_compression.py @@ -396,3 +396,73 @@ class TestPreflightCompression: result = agent.run_conversation("hello", conversation_history=big_history) mock_compress.assert_not_called() + + +class TestToolResultPreflightCompression: + """Compression should trigger when tool results push context past the threshold.""" + + def test_large_tool_results_trigger_compression(self, agent): + """When tool results push estimated tokens past threshold, compress before next call.""" + agent.compression_enabled = True + agent.context_compressor.context_length = 200_000 + agent.context_compressor.threshold_tokens = 140_000 + agent.context_compressor.last_prompt_tokens = 130_000 + agent.context_compressor.last_completion_tokens = 5_000 + + tc = SimpleNamespace( + id="tc1", type="function", + function=SimpleNamespace(name="web_search", arguments='{"query":"test"}'), + ) + tool_resp = _mock_response( + content=None, finish_reason="stop", tool_calls=[tc], + usage={"prompt_tokens": 130_000, "completion_tokens": 5_000, "total_tokens": 135_000}, + ) + ok_resp = _mock_response( + content="Done after compression", finish_reason="stop", + usage={"prompt_tokens": 50_000, "completion_tokens": 100, "total_tokens": 50_100}, + ) + agent.client.chat.completions.create.side_effect = [tool_resp, ok_resp] + large_result = "x" * 100_000 + + with ( + patch("run_agent.handle_function_call", return_value=large_result), + patch.object(agent, "_compress_context") as mock_compress, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + mock_compress.return_value = ( + [{"role": "user", "content": "hello"}], "compressed prompt", + ) + result = agent.run_conversation("hello") + + mock_compress.assert_called_once() + assert result["completed"] is True + + def test_anthropic_prompt_too_long_safety_net(self, agent): + """Anthropic 'prompt is too long' error triggers compression as safety net.""" + err_400 = Exception( + "Error code: 400 - {'type': 'error', 'error': {'type': 'invalid_request_error', " + "'message': 'prompt is too long: 233153 tokens > 200000 maximum'}}" + ) + err_400.status_code = 400 + ok_resp = _mock_response(content="Recovered", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [err_400, ok_resp] + prefill = [ + {"role": "user", "content": "previous"}, + {"role": "assistant", "content": "answer"}, + ] + + with ( + patch.object(agent, "_compress_context") as mock_compress, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + mock_compress.return_value = ( + [{"role": "user", "content": "hello"}], "compressed", + ) + result = agent.run_conversation("hello", conversation_history=prefill) + + mock_compress.assert_called_once() + assert result["completed"] is True