diff --git a/run_agent.py b/run_agent.py index 8ac36159..fa2a930b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3233,6 +3233,8 @@ class AIAgent: final_response = None interrupted = False codex_ack_continuations = 0 + length_continue_retries = 0 + truncated_response_prefix = "" # Clear any stale interrupt state at start self.clear_interrupt() @@ -3375,6 +3377,7 @@ class AIAgent: codex_auth_retry_attempted = False nous_auth_retry_attempted = False restart_with_compressed_messages = False + restart_with_length_continuation = False finish_reason = "stop" response = None # Guard against UnboundLocalError if all retries fail @@ -3525,19 +3528,60 @@ class AIAgent: finish_reason = "stop" else: finish_reason = response.choices[0].finish_reason - - # Handle "length" finish_reason - response was truncated + if finish_reason == "length": print(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens") - + + if self.api_mode == "chat_completions": + assistant_message = response.choices[0].message + if not assistant_message.tool_calls: + length_continue_retries += 1 + interim_msg = self._build_assistant_message(assistant_message, finish_reason) + messages.append(interim_msg) + self._log_msg_to_db(interim_msg) + if assistant_message.content: + truncated_response_prefix += assistant_message.content + + if length_continue_retries < 3: + print( + f"{self.log_prefix}↻ Requesting continuation " + f"({length_continue_retries}/3)..." + ) + continue_msg = { + "role": "user", + "content": ( + "[System: Your previous response was truncated by the output " + "length limit. Continue exactly where you left off. Do not " + "restart or repeat prior text. Finish the answer directly.]" + ), + } + messages.append(continue_msg) + self._log_msg_to_db(continue_msg) + self._session_messages = messages + self._save_session_log(messages) + restart_with_length_continuation = True + break + + partial_response = self._strip_think_blocks(truncated_response_prefix).strip() + self._cleanup_task_resources(effective_task_id) + self._persist_session(messages, conversation_history) + return { + "final_response": partial_response or None, + "messages": messages, + "api_calls": api_call_count, + "completed": False, + "partial": True, + "error": "Response remained truncated after 3 continuation attempts", + } + # If we have prior messages, roll back to last complete state if len(messages) > 1: print(f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn") rolled_back_messages = self._get_messages_up_to_last_assistant(messages) - + self._cleanup_task_resources(effective_task_id) self._persist_session(messages, conversation_history) - + return { "final_response": None, "messages": rolled_back_messages, @@ -3870,6 +3914,9 @@ class AIAgent: self.iteration_budget.refund() continue + if restart_with_length_continuation: + continue + # Guard: if all retries exhausted without a successful response # (e.g. repeated context-length errors that exhausted retry_count), # the `response` variable is still None. Break out cleanly. @@ -4260,6 +4307,9 @@ class AIAgent: continue codex_ack_continuations = 0 + + if truncated_response_prefix: + final_response = truncated_response_prefix + final_response # Strip blocks from user-facing response (keep raw in messages for trajectory) final_response = self._strip_think_blocks(final_response).strip() diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 64de980d..2d420dd0 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -829,6 +829,36 @@ class TestRunConversation: assert result["final_response"] == "All done" assert result["completed"] is True + @pytest.mark.parametrize( + ("first_content", "second_content", "expected_final"), + [ + ("Part 1 ", "Part 2", "Part 1 Part 2"), + ("internal reasoning", "Recovered final answer", "Recovered final answer"), + ], + ) + def test_length_finish_reason_requests_continuation( + self, agent, first_content, second_content, expected_final + ): + self._setup_agent(agent) + first = _mock_response(content=first_content, finish_reason="length") + second = _mock_response(content=second_content, finish_reason="stop") + agent.client.chat.completions.create.side_effect = [first, second] + + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("hello") + + assert result["completed"] is True + assert result["api_calls"] == 2 + assert result["final_response"] == expected_final + + second_call_messages = agent.client.chat.completions.create.call_args_list[1].kwargs["messages"] + assert second_call_messages[-1]["role"] == "user" + assert "truncated by the output length limit" in second_call_messages[-1]["content"] + class TestRetryExhaustion: """Regression: retry_count > max_retries was dead code (off-by-one).