fix: prevent reasoning box from rendering 3x during tool-calling loops (#3405)

Two independent bugs caused the reasoning box to appear three times when the model produced reasoning + tool_calls: Bug A: _build_assistant_message() re-fired reasoning_callback with the full reasoning text even when streaming had already displayed it. The original guard only checked structured reasoning_content deltas, but reasoning also arrives via content tag extraction (<REASONING_SCRATCHPAD>/<think> tags in delta.content), which went through _fire_stream_delta not _fire_reasoning_delta. Fix: skip the callback entirely when streaming is active — both paths display reasoning during the stream. Any reasoning not shown during streaming is caught by the CLI post-response fallback. Bug B: The post-response reasoning display checked _reasoning_stream_started, but that flag was reset by _reset_stream_state() during intermediate turn boundaries (when stream_delta_callback(None) fires between tool calls). Introduced _reasoning_shown_this_turn flag that persists across the tool loop and is only reset at the start of each user turn. Live-tested in PTY: reasoning now shows exactly once per API call, no duplicates across tool-calling loops.
2026-03-27 09:57:50 -07:00
parent e0dbbdb2c9
commit 8ecd7aed2c
3 changed files with 183 additions and 7 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -539,6 +539,7 @@ class AIAgent:
        self.tool_progress_callback = tool_progress_callback
        self.thinking_callback = thinking_callback
        self.reasoning_callback = reasoning_callback
+        self._reasoning_deltas_fired = False  # Set by _fire_reasoning_delta, reset per API call
        self.clarify_callback = clarify_callback
        self.step_callback = step_callback
        self.stream_delta_callback = stream_delta_callback
@@ -3415,6 +3416,7 @@ class AIAgent:
        max_stream_retries = 1
        has_tool_calls = False
        first_delta_fired = False
+        self._reasoning_deltas_fired = False
        for attempt in range(max_stream_retries + 1):
            try:
                with active_client.responses.stream(**api_kwargs) as stream:
@@ -3691,6 +3693,7 @@ class AIAgent:

    def _fire_reasoning_delta(self, text: str) -> None:
        """Fire reasoning callback if registered."""
+        self._reasoning_deltas_fired = True
        cb = self.reasoning_callback
        if cb is not None:
            try:
@@ -3798,6 +3801,9 @@ class AIAgent:
            role = "assistant"
            reasoning_parts: list = []
            usage_obj = None
+            # Reset per-call reasoning tracking so _build_assistant_message
+            # knows whether reasoning was already displayed during streaming.
+            self._reasoning_deltas_fired = False

            for chunk in stream:
                last_chunk_time["t"] = time.time()
@@ -3917,6 +3923,7 @@ class AIAgent:
            works unchanged.
            """
            has_tool_use = False
+            self._reasoning_deltas_fired = False

            # Reset stale-stream timer for this attempt
            last_chunk_time["t"] = time.time()
@@ -4630,11 +4637,15 @@ class AIAgent:
            logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {reasoning_text}")

        if reasoning_text and self.reasoning_callback:
-            # Skip callback for <think>-extracted reasoning when streaming is active.
-            # _stream_delta() already displayed <think> blocks during streaming;
-            # firing the callback again would cause duplicate display.
-            # Structured reasoning (from reasoning_content field) always fires.
-            if _from_structured or not self.stream_delta_callback:
+            # Skip callback when streaming is active — reasoning was already
+            # displayed during the stream via one of two paths:
+            #   (a) _fire_reasoning_delta (structured reasoning_content deltas)
+            #   (b) _stream_delta tag extraction (<think>/<REASONING_SCRATCHPAD>)
+            # When streaming is NOT active, always fire so non-streaming modes
+            # (gateway, batch, quiet) still get reasoning.
+            # Any reasoning that wasn't shown during streaming is caught by the
+            # CLI post-response display fallback (cli.py _reasoning_shown_this_turn).
+            if not self.stream_delta_callback:
                try:
                    self.reasoning_callback(reasoning_text)
                except Exception: