diff --git a/run_agent.py b/run_agent.py index 4d91c5cd1..19f7c23f0 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5741,6 +5741,7 @@ class AIAgent: api_msg.pop("reasoning", None) api_msg.pop("finish_reason", None) api_msg.pop("_flush_sentinel", None) + api_msg.pop("_thinking_prefill", None) if _needs_sanitize: self._sanitize_tool_calls_for_strict_api(api_msg) api_messages.append(api_msg) @@ -6664,7 +6665,7 @@ class AIAgent: api_messages = [] for msg in messages: api_msg = msg.copy() - for internal_field in ("reasoning", "finish_reason"): + for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"): api_msg.pop(internal_field, None) if _needs_sanitize: self._sanitize_tool_calls_for_strict_api(api_msg) @@ -6856,6 +6857,7 @@ class AIAgent: self._empty_content_retries = 0 self._incomplete_scratchpad_retries = 0 self._codex_incomplete_retries = 0 + self._thinking_prefill_retries = 0 self._last_content_with_tools = None self._mute_post_response = False self._surrogate_sanitized = False @@ -7201,6 +7203,8 @@ class AIAgent: # Remove finish_reason - not accepted by strict APIs (e.g. Mistral) if "finish_reason" in api_msg: api_msg.pop("finish_reason") + # Strip internal thinking-prefill marker + api_msg.pop("_thinking_prefill", None) # Strip Codex Responses API fields (call_id, response_item_id) for # strict providers like Mistral, Fireworks, etc. that reject unknown fields. # Uses new dicts so the internal messages list retains the fields @@ -8735,6 +8739,15 @@ class AIAgent: if clean: self._vprint(f" ┊ 💬 {clean}") + # Pop thinking-only prefill message(s) before appending + # (tool-call path — same rationale as the final-response path). + while ( + messages + and isinstance(messages[-1], dict) + and messages[-1].get("_thinking_prefill") + ): + messages.pop() + messages.append(assistant_msg) # Close any open streaming display (response box, reasoning @@ -8848,11 +8861,36 @@ class AIAgent: self._response_was_previewed = True break - # Reasoning-only response: the model produced thinking - # but no visible content. This is a valid response — - # keep reasoning in its own field and set content to - # "(empty)" so every provider accepts the message. - # No retries needed. + # ── Thinking-only prefill continuation ────────── + # The model produced structured reasoning (via API + # fields) but no visible text content. Rather than + # giving up, append the assistant message as-is and + # continue — the model will see its own reasoning + # on the next turn and produce the text portion. + # Inspired by clawdbot's "incomplete-text" recovery. + _has_structured = bool( + getattr(assistant_message, "reasoning", None) + or getattr(assistant_message, "reasoning_content", None) + or getattr(assistant_message, "reasoning_details", None) + ) + if _has_structured and self._thinking_prefill_retries < 2: + self._thinking_prefill_retries += 1 + self._vprint( + f"{self.log_prefix}↻ Thinking-only response — " + f"prefilling to continue " + f"({self._thinking_prefill_retries}/2)" + ) + interim_msg = self._build_assistant_message( + assistant_message, "incomplete" + ) + interim_msg["_thinking_prefill"] = True + messages.append(interim_msg) + self._session_messages = messages + self._save_session_log(messages) + continue + + # Exhausted prefill attempts or no structured + # reasoning — fall through to "(empty)" terminal. reasoning_text = self._extract_reasoning(assistant_message) assistant_msg = self._build_assistant_message(assistant_message, finish_reason) assistant_msg["content"] = "(empty)" @@ -8871,6 +8909,7 @@ class AIAgent: if hasattr(self, '_empty_content_retries'): self._empty_content_retries = 0 self._last_empty_content_signature = None + self._thinking_prefill_retries = 0 if ( self.api_mode == "codex_responses" @@ -8909,7 +8948,18 @@ class AIAgent: final_response = self._strip_think_blocks(final_response).strip() final_msg = self._build_assistant_message(assistant_message, finish_reason) - + + # Pop thinking-only prefill message(s) before appending + # the final response. This avoids consecutive assistant + # messages which break strict-alternation providers + # (Anthropic Messages API) and keeps history clean. + while ( + messages + and isinstance(messages[-1], dict) + and messages[-1].get("_thinking_prefill") + ): + messages.pop() + messages.append(final_msg) if not self.quiet_mode: diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 281945492..7f6ab4c30 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1547,7 +1547,7 @@ class TestRunConversation: assert any(m.get("reasoning") for m in assistant_msgs) def test_reasoning_only_local_resumed_no_compression_triggered(self, agent): - """Reasoning-only responses no longer trigger compression — accepted immediately.""" + """Reasoning-only responses no longer trigger compression — prefill then accepted.""" self._setup_agent(agent) agent.base_url = "http://127.0.0.1:1234/v1" agent.compression_enabled = True @@ -1561,8 +1561,9 @@ class TestRunConversation: {"role": "assistant", "content": "old answer"}, ] + # 3 responses: original + 2 prefill continuations (structured reasoning triggers prefill) with ( - patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp]), + patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp, empty_resp, empty_resp]), patch.object(agent, "_compress_context") as mock_compress, patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), @@ -1573,17 +1574,18 @@ class TestRunConversation: mock_compress.assert_not_called() # no compression triggered assert result["completed"] is True assert result["final_response"] == "(empty)" - assert result["api_calls"] == 1 + assert result["api_calls"] == 3 # 1 original + 2 prefill continuations - def test_reasoning_only_response_accepted_without_retry(self, agent): - """Reasoning-only response should be accepted with (empty) content, no retries.""" + def test_reasoning_only_response_prefill_then_empty(self, agent): + """Structured reasoning-only triggers prefill continuation (up to 2), then falls through to (empty).""" self._setup_agent(agent) empty_resp = _mock_response( content=None, finish_reason="stop", reasoning_content="structured reasoning answer", ) - agent.client.chat.completions.create.side_effect = [empty_resp] + # 3 responses: original + 2 prefill continuations, all reasoning-only + agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp, empty_resp] with ( patch.object(agent, "_persist_session"), patch.object(agent, "_save_trajectory"), @@ -1592,7 +1594,35 @@ class TestRunConversation: result = agent.run_conversation("answer me") assert result["completed"] is True assert result["final_response"] == "(empty)" - assert result["api_calls"] == 1 # no retries + assert result["api_calls"] == 3 # 1 original + 2 prefill continuations + + def test_reasoning_only_prefill_succeeds_on_continuation(self, agent): + """When prefill continuation produces content, it becomes the final response.""" + self._setup_agent(agent) + empty_resp = _mock_response( + content=None, + finish_reason="stop", + reasoning_content="structured reasoning answer", + ) + content_resp = _mock_response( + content="Here is the actual answer.", + finish_reason="stop", + ) + agent.client.chat.completions.create.side_effect = [empty_resp, content_resp] + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("answer me") + assert result["completed"] is True + assert result["final_response"] == "Here is the actual answer." + assert result["api_calls"] == 2 # 1 original + 1 prefill continuation + # Prefill message should be cleaned up — no consecutive assistant messages + roles = [m.get("role") for m in result["messages"]] + for i in range(len(roles) - 1): + if roles[i] == "assistant" and roles[i + 1] == "assistant": + raise AssertionError("Consecutive assistant messages found in history") def test_truly_empty_response_accepted_without_retry(self, agent): """Truly empty response (no content, no reasoning) should still complete with (empty)."""