From 8ecd7aed2c3b2d00048814cc79e6aac60dcaf497 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 27 Mar 2026 09:57:50 -0700 Subject: [PATCH] fix: prevent reasoning box from rendering 3x during tool-calling loops (#3405) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two independent bugs caused the reasoning box to appear three times when the model produced reasoning + tool_calls: Bug A: _build_assistant_message() re-fired reasoning_callback with the full reasoning text even when streaming had already displayed it. The original guard only checked structured reasoning_content deltas, but reasoning also arrives via content tag extraction (/ tags in delta.content), which went through _fire_stream_delta not _fire_reasoning_delta. Fix: skip the callback entirely when streaming is active — both paths display reasoning during the stream. Any reasoning not shown during streaming is caught by the CLI post-response fallback. Bug B: The post-response reasoning display checked _reasoning_stream_started, but that flag was reset by _reset_stream_state() during intermediate turn boundaries (when stream_delta_callback(None) fires between tool calls). Introduced _reasoning_shown_this_turn flag that persists across the tool loop and is only reset at the start of each user turn. Live-tested in PTY: reasoning now shows exactly once per API call, no duplicates across tool-calling loops. --- cli.py | 14 ++- run_agent.py | 21 +++-- tests/test_reasoning_command.py | 155 ++++++++++++++++++++++++++++++++ 3 files changed, 183 insertions(+), 7 deletions(-) diff --git a/cli.py b/cli.py index bd7c26c48..c8aeaad8a 100644 --- a/cli.py +++ b/cli.py @@ -1625,6 +1625,7 @@ class HermesCLI: if not text: return self._reasoning_stream_started = True + self._reasoning_shown_this_turn = True if getattr(self, "_stream_box_opened", False): return @@ -5545,6 +5546,10 @@ class HermesCLI: # Reset streaming display state for this turn self._reset_stream_state() + # Separate from _reset_stream_state because this must persist + # across intermediate turn boundaries (tool-calling loops) — only + # reset at the start of each user turn. + self._reasoning_shown_this_turn = False # --- Streaming TTS setup --- # When ElevenLabs is the TTS provider and sounddevice is available, @@ -5759,8 +5764,13 @@ class HermesCLI: response_previewed = result.get("response_previewed", False) if result else False # Display reasoning (thinking) box if enabled and available. - # Skip when streaming already showed reasoning live. - if self.show_reasoning and result and not self._reasoning_stream_started: + # Skip when streaming already showed reasoning live. Use the + # turn-persistent flag (_reasoning_shown_this_turn) instead of + # _reasoning_stream_started — the latter gets reset during + # intermediate turn boundaries (tool-calling loops), which caused + # the reasoning box to re-render after the final response. + _reasoning_already_shown = getattr(self, '_reasoning_shown_this_turn', False) + if self.show_reasoning and result and not _reasoning_already_shown: reasoning = result.get("last_reasoning") if reasoning: w = shutil.get_terminal_size().columns diff --git a/run_agent.py b/run_agent.py index e220fa992..457479f6e 100644 --- a/run_agent.py +++ b/run_agent.py @@ -539,6 +539,7 @@ class AIAgent: self.tool_progress_callback = tool_progress_callback self.thinking_callback = thinking_callback self.reasoning_callback = reasoning_callback + self._reasoning_deltas_fired = False # Set by _fire_reasoning_delta, reset per API call self.clarify_callback = clarify_callback self.step_callback = step_callback self.stream_delta_callback = stream_delta_callback @@ -3415,6 +3416,7 @@ class AIAgent: max_stream_retries = 1 has_tool_calls = False first_delta_fired = False + self._reasoning_deltas_fired = False for attempt in range(max_stream_retries + 1): try: with active_client.responses.stream(**api_kwargs) as stream: @@ -3691,6 +3693,7 @@ class AIAgent: def _fire_reasoning_delta(self, text: str) -> None: """Fire reasoning callback if registered.""" + self._reasoning_deltas_fired = True cb = self.reasoning_callback if cb is not None: try: @@ -3798,6 +3801,9 @@ class AIAgent: role = "assistant" reasoning_parts: list = [] usage_obj = None + # Reset per-call reasoning tracking so _build_assistant_message + # knows whether reasoning was already displayed during streaming. + self._reasoning_deltas_fired = False for chunk in stream: last_chunk_time["t"] = time.time() @@ -3917,6 +3923,7 @@ class AIAgent: works unchanged. """ has_tool_use = False + self._reasoning_deltas_fired = False # Reset stale-stream timer for this attempt last_chunk_time["t"] = time.time() @@ -4630,11 +4637,15 @@ class AIAgent: logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {reasoning_text}") if reasoning_text and self.reasoning_callback: - # Skip callback for -extracted reasoning when streaming is active. - # _stream_delta() already displayed blocks during streaming; - # firing the callback again would cause duplicate display. - # Structured reasoning (from reasoning_content field) always fires. - if _from_structured or not self.stream_delta_callback: + # Skip callback when streaming is active — reasoning was already + # displayed during the stream via one of two paths: + # (a) _fire_reasoning_delta (structured reasoning_content deltas) + # (b) _stream_delta tag extraction (/) + # When streaming is NOT active, always fire so non-streaming modes + # (gateway, batch, quiet) still get reasoning. + # Any reasoning that wasn't shown during streaming is caught by the + # CLI post-response display fallback (cli.py _reasoning_shown_this_turn). + if not self.stream_delta_callback: try: self.reasoning_callback(reasoning_text) except Exception: diff --git a/tests/test_reasoning_command.py b/tests/test_reasoning_command.py index 81d452a27..4270d630d 100644 --- a/tests/test_reasoning_command.py +++ b/tests/test_reasoning_command.py @@ -472,6 +472,7 @@ class TestInlineThinkBlockExtraction(unittest.TestCase): agent._extract_reasoning = AIAgent._extract_reasoning.__get__(agent) agent.verbose_logging = False agent.reasoning_callback = None + agent.stream_delta_callback = None # non-streaming by default return agent def test_single_think_block_extracted(self): @@ -605,5 +606,159 @@ class TestEndToEndPipeline(unittest.TestCase): self.assertIsNone(result["last_reasoning"]) +# --------------------------------------------------------------------------- +# Duplicate reasoning box prevention (Bug fix: 3 boxes for 1 reasoning) +# --------------------------------------------------------------------------- + +class TestReasoningDeltasFiredFlag(unittest.TestCase): + """_build_assistant_message should not re-fire reasoning_callback when + reasoning was already streamed via _fire_reasoning_delta.""" + + def _make_agent(self): + from run_agent import AIAgent + agent = AIAgent.__new__(AIAgent) + agent.reasoning_callback = None + agent.stream_delta_callback = None + agent._reasoning_deltas_fired = False + agent.verbose_logging = False + return agent + + def test_fire_reasoning_delta_sets_flag(self): + agent = self._make_agent() + captured = [] + agent.reasoning_callback = lambda t: captured.append(t) + self.assertFalse(agent._reasoning_deltas_fired) + agent._fire_reasoning_delta("thinking...") + self.assertTrue(agent._reasoning_deltas_fired) + self.assertEqual(captured, ["thinking..."]) + + def test_build_assistant_message_skips_callback_when_already_streamed(self): + """When streaming already fired reasoning deltas, the post-stream + _build_assistant_message should NOT re-fire the callback.""" + agent = self._make_agent() + captured = [] + agent.reasoning_callback = lambda t: captured.append(t) + agent.stream_delta_callback = lambda t: None # streaming is active + + # Simulate streaming having fired reasoning + agent._reasoning_deltas_fired = True + + msg = SimpleNamespace( + content="I'll merge that.", + tool_calls=None, + reasoning_content="Let me merge the PR.", + reasoning=None, + reasoning_details=None, + ) + agent._build_assistant_message(msg, "stop") + + # Callback should NOT have been fired again + self.assertEqual(captured, []) + + def test_build_assistant_message_skips_callback_when_streaming_active(self): + """When streaming is active, callback should NEVER fire from + _build_assistant_message — reasoning was already displayed during the + stream (either via reasoning_content deltas or content tag extraction). + Any missed reasoning is caught by the CLI post-response fallback.""" + agent = self._make_agent() + captured = [] + agent.reasoning_callback = lambda t: captured.append(t) + agent.stream_delta_callback = lambda t: None # streaming active + + # Even though _reasoning_deltas_fired is False (reasoning came through + # content tags, not reasoning_content deltas), callback should not fire + agent._reasoning_deltas_fired = False + + msg = SimpleNamespace( + content="I'll merge that.", + tool_calls=None, + reasoning_content="Let me merge the PR.", + reasoning=None, + reasoning_details=None, + ) + agent._build_assistant_message(msg, "stop") + + # Callback should NOT fire — streaming is active + self.assertEqual(captured, []) + + def test_build_assistant_message_fires_callback_without_streaming(self): + """When no streaming is active, callback always fires for structured + reasoning.""" + agent = self._make_agent() + captured = [] + agent.reasoning_callback = lambda t: captured.append(t) + # No streaming + agent.stream_delta_callback = None + agent._reasoning_deltas_fired = False + + msg = SimpleNamespace( + content="I'll merge that.", + tool_calls=None, + reasoning_content="Let me merge the PR.", + reasoning=None, + reasoning_details=None, + ) + agent._build_assistant_message(msg, "stop") + + self.assertEqual(captured, ["Let me merge the PR."]) + + +class TestReasoningShownThisTurnFlag(unittest.TestCase): + """Post-response reasoning display should be suppressed when reasoning + was already shown during streaming in a tool-calling loop.""" + + def _make_cli(self): + from cli import HermesCLI + cli = HermesCLI.__new__(HermesCLI) + cli.show_reasoning = True + cli.streaming_enabled = True + cli._stream_box_opened = False + cli._reasoning_box_opened = False + cli._reasoning_stream_started = False + cli._reasoning_shown_this_turn = False + cli._reasoning_buf = "" + cli._stream_buf = "" + cli._stream_started = False + cli._stream_text_ansi = "" + cli._stream_prefilt = "" + cli._in_reasoning_block = False + cli._reasoning_preview_buf = "" + return cli + + @patch("cli._cprint") + def test_streaming_reasoning_sets_turn_flag(self, mock_cprint): + cli = self._make_cli() + self.assertFalse(cli._reasoning_shown_this_turn) + cli._stream_reasoning_delta("Thinking about it...") + self.assertTrue(cli._reasoning_shown_this_turn) + + @patch("cli._cprint") + def test_turn_flag_survives_reset_stream_state(self, mock_cprint): + """_reasoning_shown_this_turn must NOT be cleared by + _reset_stream_state (called at intermediate turn boundaries).""" + cli = self._make_cli() + cli._stream_reasoning_delta("Thinking...") + self.assertTrue(cli._reasoning_shown_this_turn) + + # Simulate intermediate turn boundary (tool call) + cli._reset_stream_state() + + # Flag must persist + self.assertTrue(cli._reasoning_shown_this_turn) + + @patch("cli._cprint") + def test_turn_flag_cleared_before_new_turn(self, mock_cprint): + """The turn flag should be reset at the start of a new user turn. + This happens outside _reset_stream_state, at the call site.""" + cli = self._make_cli() + cli._reasoning_shown_this_turn = True + + # Simulate new user turn setup + cli._reset_stream_state() + cli._reasoning_shown_this_turn = False # done by process_input + + self.assertFalse(cli._reasoning_shown_this_turn) + + if __name__ == "__main__": unittest.main()