From 942950f5b9aaba74a60c7d400b839f70807d3696 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Mon, 16 Mar 2026 10:29:55 -0700
Subject: [PATCH] =?UTF-8?q?feat(cli):=20live=20reasoning=20token=20streami?=
 =?UTF-8?q?ng=20=E2=80=94=20dim=20box=20above=20response?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When both display.streaming and display.show_reasoning are enabled,
reasoning tokens stream in real-time into a dim bordered box. When
content tokens start arriving, the reasoning box closes and the
response box opens — smooth visual transition.

- _stream_reasoning_delta(): line-buffered rendering in dim text
- _close_reasoning_box(): flush + close, called on first content token
- Reasoning callback routes to streaming version when both flags set
- Skips static post-response reasoning display when streamed live
- State reset per turn via _reset_stream_state()

Works with reasoning_content deltas (OpenRouter reasoning mode) and
thinking_delta events (Anthropic extended thinking).
---
 cli.py | 65 +++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 53 insertions(+), 12 deletions(-)

diff --git a/cli.py b/cli.py
index b02221ec8..c24b67e07 100755
--- a/cli.py
+++ b/cli.py
@@ -1413,15 +1413,43 @@ class HermesCLI:
         self._invalidate()
 
     # ── Streaming display ────────────────────────────────────────────────
-    #
-    # Future: When display.show_reasoning is also enabled, stream reasoning
-    # tokens into a dim box above the response (like the existing static
-    # reasoning display, but live). The infrastructure exists — reasoning
-    # deltas fire via _fire_reasoning_delta() during streaming. The display
-    # layer needs: a dim reasoning box that opens on first reasoning token,
-    # accumulates live, then transitions to the response box when content
-    # tokens start arriving. See PR #1214 (raulvidis) for gateway-side
-    # reasoning visibility modes as a reference implementation.
+
+    def _stream_reasoning_delta(self, text: str) -> None:
+        """Stream reasoning/thinking tokens into a dim box above the response.
+
+        Opens a dim reasoning box on first token, streams line-by-line.
+        The box is closed automatically when content tokens start arriving
+        (via _stream_delta → _emit_stream_text).
+        """
+        if not text:
+            return
+
+        # Open reasoning box on first reasoning token
+        if not getattr(self, "_reasoning_box_opened", False):
+            self._reasoning_box_opened = True
+            w = shutil.get_terminal_size().columns
+            r_label = " Reasoning "
+            r_fill = w - 2 - len(r_label)
+            _cprint(f"\n{_DIM}┌─{r_label}{'─' * max(r_fill - 1, 0)}┐{_RST}")
+
+        self._reasoning_buf = getattr(self, "_reasoning_buf", "") + text
+
+        # Emit complete lines
+        while "\n" in self._reasoning_buf:
+            line, self._reasoning_buf = self._reasoning_buf.split("\n", 1)
+            _cprint(f"{_DIM}{line}{_RST}")
+
+    def _close_reasoning_box(self) -> None:
+        """Close the live reasoning box if it's open."""
+        if getattr(self, "_reasoning_box_opened", False):
+            # Flush remaining reasoning buffer
+            buf = getattr(self, "_reasoning_buf", "")
+            if buf:
+                _cprint(f"{_DIM}{buf}{_RST}")
+                self._reasoning_buf = ""
+            w = shutil.get_terminal_size().columns
+            _cprint(f"{_DIM}└{'─' * (w - 2)}┘{_RST}")
+            self._reasoning_box_opened = False
 
     def _stream_delta(self, text: str) -> None:
         """Line-buffered streaming callback for real-time token rendering.
@@ -1504,6 +1532,9 @@ class HermesCLI:
         if not text:
             return
 
+        # Close the live reasoning box before opening the response box
+        self._close_reasoning_box()
+
         # Open the response box header on the very first visible text
         if not self._stream_box_opened:
             # Strip leading whitespace/newlines before first visible content
@@ -1530,6 +1561,9 @@ class HermesCLI:
 
     def _flush_stream(self) -> None:
         """Emit any remaining partial line from the stream buffer and close the box."""
+        # Close reasoning box if still open (in case no content tokens arrived)
+        self._close_reasoning_box()
+
         if self._stream_buf:
             _cprint(self._stream_buf)
             self._stream_buf = ""
@@ -1546,6 +1580,8 @@ class HermesCLI:
         self._stream_box_opened = False
         self._stream_prefilt = ""
         self._in_reasoning_block = False
+        self._reasoning_box_opened = False
+        self._reasoning_buf = ""
 
     def _slow_command_status(self, command: str) -> str:
         """Return a user-facing status message for slower slash commands."""
@@ -1724,7 +1760,11 @@ class HermesCLI:
                 platform="cli",
                 session_db=self._session_db,
                 clarify_callback=self._clarify_callback,
-                reasoning_callback=self._on_reasoning if (self.show_reasoning or self.verbose) else None,
+                reasoning_callback=(
+                    self._stream_reasoning_delta if (self.streaming_enabled and self.show_reasoning)
+                    else self._on_reasoning if (self.show_reasoning or self.verbose)
+                    else None
+                ),
                 honcho_session_key=None,  # resolved by run_agent via config sessions map / title
                 fallback_model=self._fallback_model,
                 thinking_callback=self._on_thinking,
@@ -4935,8 +4975,9 @@ class HermesCLI:
 
             response_previewed = result.get("response_previewed", False) if result else False
 
-            # Display reasoning (thinking) box if enabled and available
-            if self.show_reasoning and result:
+            # Display reasoning (thinking) box if enabled and available.
+            # Skip when streaming already showed reasoning live.
+            if self.show_reasoning and result and not self._stream_started:
                 reasoning = result.get("last_reasoning")
                 if reasoning:
                     w = shutil.get_terminal_size().columns