2026-04-07 14:03:16 +00:00
2 changed files with 38 additions and 160 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -8620,140 +8620,24 @@ class AIAgent:
                            self._response_was_previewed = True
                            break

-                        # No fallback available — classify the empty response before
-                        # blindly spending retries. Some local/custom backends surface
-                        # implicit context pressure as reasoning-only output rather than
-                        # an explicit overflow error.
-                        if not hasattr(self, '_empty_content_retries'):
-                            self._empty_content_retries = 0
-                        self._empty_content_retries += 1
+                        # Reasoning-only response: the model produced thinking
+                        # but no visible content.  This is a valid response —
+                        # keep reasoning in its own field and set content to
+                        # "(empty)" so every provider accepts the message.
+                        # No retries needed.
+                        reasoning_text = self._extract_reasoning(assistant_message)
+                        assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
+                        assistant_msg["content"] = "(empty)"
+                        messages.append(assistant_msg)

-                        empty_response_info = self._classify_empty_content_response(
-                            assistant_message,
-                            finish_reason=finish_reason,
-                            approx_tokens=approx_tokens,
-                            api_messages=api_messages,
-                            conversation_history=conversation_history,
-                        )
-                        reasoning_text = empty_response_info["reasoning_text"]
-                        self._vprint(f"{self.log_prefix}⚠️  Response only contains think block with no content after it")
                        if reasoning_text:
                            reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
-                            self._vprint(f"{self.log_prefix}   Reasoning: {reasoning_preview}")
+                            self._vprint(f"{self.log_prefix}ℹ️  Reasoning-only response (no visible content). Reasoning: {reasoning_preview}")
                        else:
-                            content_preview = final_response[:80] + "..." if len(final_response) > 80 else final_response
-                            self._vprint(f"{self.log_prefix}   Content: '{content_preview}'")
+                            self._vprint(f"{self.log_prefix}ℹ️  Empty response (no content or reasoning).")

-                        if empty_response_info["should_compress"]:
-                            compression_attempts += 1
-                            if compression_attempts > max_compression_attempts:
-                                self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
-                                self._vprint(f"{self.log_prefix}   💡 Local/custom backend returned reasoning-only output with no visible content. This often means the resumed/large session exceeds the runtime context window. Try /new or lower model.context_length to the actual runtime limit.", force=True)
-                            else:
-                                self._vprint(f"{self.log_prefix}🗜️  Reasoning-only response looks like implicit context pressure — attempting compression ({compression_attempts}/{max_compression_attempts})...", force=True)
-                                original_len = len(messages)
-                                messages, active_system_prompt = self._compress_context(
-                                    messages, system_message, approx_tokens=approx_tokens,
-                                    task_id=effective_task_id,
-                                )
-                                if len(messages) < original_len:
-                                    conversation_history = None
-                                    self._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages after reasoning-only response, retrying...")
-                                    time.sleep(2)
-                                    api_call_count -= 1
-                                    self.iteration_budget.refund()
-                                    retry_count += 1
-                                    continue
-                                self._vprint(f"{self.log_prefix}   Compression could not shrink the session; falling back to retry/salvage logic.")
-
-                        if (
-                            reasoning_text
-                            and empty_response_info["repeated_signature"]
-                            and empty_response_info["has_structured_reasoning"]
-                        ):
-                            self._vprint(f"{self.log_prefix}ℹ️  Structured reasoning-only response repeated unchanged — using reasoning text directly.", force=True)
-                            self._empty_content_retries = 0
-                            final_response = reasoning_text
-                            empty_msg = {
-                                "role": "assistant",
-                                "content": final_response,
-                                "reasoning": reasoning_text,
-                                "finish_reason": finish_reason,
-                            }
-                            messages.append(empty_msg)
-                            break
-                        
-                        if self._empty_content_retries < 3:
-                            self._vprint(f"{self.log_prefix}🔄 Retrying API call ({self._empty_content_retries}/3)...")
-                            continue
-                        else:
-                            self._vprint(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.", force=True)
-                            self._empty_content_retries = 0
-                            
-                            # If a prior tool_calls turn had real content, salvage it:
-                            # rewrite that turn's content to a brief tool description,
-                            # and use the original content as the final response here.
-                            fallback = getattr(self, '_last_content_with_tools', None)
-                            if fallback:
-                                self._last_content_with_tools = None
-                                # Find the last assistant message with tool_calls and rewrite it
-                                for i in range(len(messages) - 1, -1, -1):
-                                    msg = messages[i]
-                                    if msg.get("role") == "assistant" and msg.get("tool_calls"):
-                                        tool_names = []
-                                        for tc in msg["tool_calls"]:
-                                            if not tc or not isinstance(tc, dict): continue
-                                            fn = tc.get("function", {})
-                                            tool_names.append(fn.get("name", "unknown"))
-                                        msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
-                                        break
-                                # Strip <think> blocks from fallback content for user display
-                                final_response = self._strip_think_blocks(fallback).strip()
-                                self._response_was_previewed = True
-                                break
-                            
-                            # No fallback -- if reasoning_text exists, the model put its
-                            # entire response inside <think> tags; use that as the content.
-                            if reasoning_text:
-                                self._vprint(f"{self.log_prefix}Using reasoning as response content (model wrapped entire response in think tags).", force=True)
-                                final_response = reasoning_text
-                                empty_msg = {
-                                    "role": "assistant",
-                                    "content": final_response,
-                                    "reasoning": reasoning_text,
-                                    "finish_reason": finish_reason,
-                                }
-                                messages.append(empty_msg)
-                                break
-
-                            # Truly empty -- no reasoning and no content
-                            empty_msg = {
-                                "role": "assistant",
-                                "content": final_response,
-                                "reasoning": reasoning_text,
-                                "finish_reason": finish_reason,
-                            }
-                            messages.append(empty_msg)
-
-                            self._cleanup_task_resources(effective_task_id)
-                            self._persist_session(messages, conversation_history)
-
-                            error_message = "Model generated only think blocks with no actual response after 3 retries"
-                            if empty_response_info["is_local_custom"]:
-                                error_message = (
-                                    "Local/custom backend returned reasoning-only output with no visible response after 3 retries. "
-                                    "Likely causes: wrong /v1 endpoint, runtime context window smaller than Hermes expects, "
-                                    "or a resumed/large session exceeding the backend's actual context limit."
-                                )
-
-                            return {
-                                "final_response": final_response or None,
-                                "messages": messages,
-                                "api_calls": api_call_count,
-                                "completed": False,
-                                "partial": True,
-                                "error": error_message
-                            }
+                        final_response = "(empty)"
+                        break
                    
                    # Reset retry counter/signature on successful content
                    if hasattr(self, '_empty_content_retries'):
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -1488,19 +1488,14 @@ class TestRunConversation:
        assert result["completed"] is True
        assert result["api_calls"] == 2

-    def test_empty_content_retry_uses_inline_reasoning_as_response(self, agent):
-        """Reasoning-only payloads should recover the inline reasoning text."""
+    def test_inline_think_blocks_reasoning_only_accepted(self, agent):
+        """Inline <think> reasoning-only responses accepted with (empty) content, no retries."""
        self._setup_agent(agent)
        empty_resp = _mock_response(
            content="<think>internal reasoning</think>",
            finish_reason="stop",
        )
-        # Return empty 3 times to exhaust retries
-        agent.client.chat.completions.create.side_effect = [
-            empty_resp,
-            empty_resp,
-            empty_resp,
-        ]
+        agent.client.chat.completions.create.side_effect = [empty_resp]
        with (
            patch.object(agent, "_persist_session"),
            patch.object(agent, "_save_trajectory"),
@@ -1508,10 +1503,14 @@ class TestRunConversation:
        ):
            result = agent.run_conversation("answer me")
        assert result["completed"] is True
-        assert result["final_response"] == "internal reasoning"
+        assert result["final_response"] == "(empty)"
+        assert result["api_calls"] == 1  # no retries
+        # Reasoning should be preserved in the assistant message
+        assistant_msgs = [m for m in result["messages"] if m.get("role") == "assistant"]
+        assert any(m.get("reasoning") for m in assistant_msgs)

-    def test_empty_content_local_resumed_session_triggers_compression(self, agent):
-        """Local resumed reasoning-only responses should compress before burning retries."""
+    def test_reasoning_only_local_resumed_no_compression_triggered(self, agent):
+        """Reasoning-only responses no longer trigger compression — accepted immediately."""
        self._setup_agent(agent)
        agent.base_url = "http://127.0.0.1:1234/v1"
        agent.compression_enabled = True
@@ -1520,39 +1519,34 @@ class TestRunConversation:
            finish_reason="stop",
            reasoning_content="reasoning only",
        )
-        ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop")
        prefill = [
            {"role": "user", "content": "old question"},
            {"role": "assistant", "content": "old answer"},
        ]

        with (
-            patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp, ok_resp]),
+            patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp]),
            patch.object(agent, "_compress_context") as mock_compress,
            patch.object(agent, "_persist_session"),
            patch.object(agent, "_save_trajectory"),
            patch.object(agent, "_cleanup_task_resources"),
        ):
-            mock_compress.return_value = (
-                [{"role": "user", "content": "compressed user message"}],
-                "compressed system prompt",
-            )
            result = agent.run_conversation("hello", conversation_history=prefill)

-        mock_compress.assert_called_once()
+        mock_compress.assert_not_called()  # no compression triggered
        assert result["completed"] is True
-        assert result["final_response"] == "Recovered after compression"
-        assert result["api_calls"] == 1  # compression retry is refunded, same as explicit overflow path
+        assert result["final_response"] == "(empty)"
+        assert result["api_calls"] == 1

-    def test_empty_content_repeated_structured_reasoning_salvages_early(self, agent):
-        """Repeated identical structured reasoning-only responses should stop retrying early."""
+    def test_reasoning_only_response_accepted_without_retry(self, agent):
+        """Reasoning-only response should be accepted with (empty) content, no retries."""
        self._setup_agent(agent)
        empty_resp = _mock_response(
            content=None,
            finish_reason="stop",
            reasoning_content="structured reasoning answer",
        )
-        agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp]
+        agent.client.chat.completions.create.side_effect = [empty_resp]
        with (
            patch.object(agent, "_persist_session"),
            patch.object(agent, "_save_trajectory"),
@@ -1560,24 +1554,24 @@ class TestRunConversation:
        ):
            result = agent.run_conversation("answer me")
        assert result["completed"] is True
-        assert result["final_response"] == "structured reasoning answer"
-        assert result["api_calls"] == 2
+        assert result["final_response"] == "(empty)"
+        assert result["api_calls"] == 1  # no retries

-    def test_empty_content_local_custom_error_is_actionable(self, agent):
-        """Local/custom retries should return a diagnostic tailored to context/endpoint mismatch."""
+    def test_truly_empty_response_accepted_without_retry(self, agent):
+        """Truly empty response (no content, no reasoning) should still complete with (empty)."""
        self._setup_agent(agent)
        agent.base_url = "http://127.0.0.1:1234/v1"
        empty_resp = _mock_response(content=None, finish_reason="stop")
-        agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp, empty_resp]
+        agent.client.chat.completions.create.side_effect = [empty_resp]
        with (
            patch.object(agent, "_persist_session"),
            patch.object(agent, "_save_trajectory"),
            patch.object(agent, "_cleanup_task_resources"),
        ):
            result = agent.run_conversation("answer me")
-        assert result["completed"] is False
-        assert "Local/custom backend returned reasoning-only output" in result["error"]
-        assert "wrong /v1 endpoint" in result["error"]
+        assert result["completed"] is True
+        assert result["final_response"] == "(empty)"
+        assert result["api_calls"] == 1  # no retries

    def test_nous_401_refreshes_after_remint_and_retries(self, agent):
        self._setup_agent(agent)