diff --git a/cli.py b/cli.py
index 230d1e9ff..3221cbb79 100755
--- a/cli.py
+++ b/cli.py
@@ -4099,6 +4099,7 @@ class HermesCLI:
             # we stream audio sentence-by-sentence as the agent generates tokens
             # instead of waiting for the full response.
             use_streaming_tts = False
+            _streaming_box_opened = False
             text_queue = None
             tts_thread = None
             stream_callback = None
@@ -4123,9 +4124,21 @@ class HermesCLI:
                 text_queue = queue.Queue()
                 stop_event = threading.Event()
 
+                def display_callback(sentence: str):
+                    """Called by TTS consumer when a sentence is ready to display + speak."""
+                    nonlocal _streaming_box_opened
+                    if not _streaming_box_opened:
+                        _streaming_box_opened = True
+                        w = self.console.width
+                        label = " ⚕ Hermes "
+                        fill = w - 2 - len(label)
+                        _cprint(f"\n{_GOLD}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
+                    _cprint(sentence.rstrip())
+
                 tts_thread = threading.Thread(
                     target=stream_tts_to_speaker,
                     args=(text_queue, stop_event, self._voice_tts_done),
+                    kwargs={"display_callback": display_callback},
                     daemon=True,
                 )
                 tts_thread.start()
@@ -4244,8 +4257,7 @@ class HermesCLI:
                     _cprint(f"\n{r_top}\n{_DIM}{display_reasoning}{_RST}\n{r_bot}")
 
             if response and not response_previewed:
-                # Use a Rich Panel for the response box — adapts to terminal
-                # width at render time instead of hard-coding border length.
+                # Use skin engine for label/color with fallback
                 try:
                     from hermes_cli.skin_engine import get_active_skin
                     _skin = get_active_skin()
@@ -4257,17 +4269,22 @@ class HermesCLI:
                     _resp_color = "#CD7F32"
                     _resp_text = "#FFF8DC"
 
-                _chat_console = ChatConsole()
-                _chat_console.print(Panel(
-                    _rich_text_from_ansi(response),
-                    title=f"[{_resp_color} bold]{label}[/]",
-                    title_align="left",
-                    border_style=_resp_color,
-                    style=_resp_text,
-                    box=rich_box.HORIZONTALS,
-                    padding=(1, 2),
-                ))
-
+                is_error_response = result and (result.get("failed") or result.get("partial"))
+                if use_streaming_tts and _streaming_box_opened and not is_error_response:
+                    # Text was already printed sentence-by-sentence; just close the box
+                    w = shutil.get_terminal_size().columns
+                    _cprint(f"\n{_GOLD}╰{'─' * (w - 2)}╯{_RST}")
+                else:
+                    _chat_console = ChatConsole()
+                    _chat_console.print(Panel(
+                        _rich_text_from_ansi(response),
+                        title=f"[{_resp_color} bold]{label}[/]",
+                        title_align="left",
+                        border_style=_resp_color,
+                        style=_resp_text,
+                        box=rich_box.HORIZONTALS,
+                        padding=(1, 2),
+                    ))
 
 
             # Play terminal bell when agent finishes (if enabled).
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 31c57ce01..3b8773d49 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -32,7 +32,7 @@ import subprocess
 import tempfile
 import threading
 from pathlib import Path
-from typing import Dict, Any, Optional
+from typing import Callable, Dict, Any, Optional
 
 logger = logging.getLogger(__name__)
 
@@ -469,6 +469,7 @@ def stream_tts_to_speaker(
     text_queue: queue.Queue,
     stop_event: threading.Event,
     tts_done_event: threading.Event,
+    display_callback: Optional[Callable[[str], None]] = None,
 ):
     """Consume text deltas from *text_queue*, buffer them into sentences,
     and stream each sentence through ElevenLabs TTS to the speaker in
@@ -484,34 +485,38 @@ def stream_tts_to_speaker(
     tts_done_event.clear()
 
     try:
+        # --- TTS client setup (optional -- display_callback works without it) ---
+        client = None
+        output_stream = None
+        voice_id = DEFAULT_ELEVENLABS_VOICE_ID
+        model_id = DEFAULT_ELEVENLABS_STREAMING_MODEL_ID
+
         tts_config = _load_tts_config()
         el_config = tts_config.get("elevenlabs", {})
-        voice_id = el_config.get("voice_id", DEFAULT_ELEVENLABS_VOICE_ID)
+        voice_id = el_config.get("voice_id", voice_id)
         model_id = el_config.get("streaming_model_id",
-                                 el_config.get("model_id", DEFAULT_ELEVENLABS_STREAMING_MODEL_ID))
+                                 el_config.get("model_id", model_id))
 
         api_key = os.getenv("ELEVENLABS_API_KEY", "")
         if not api_key:
-            logger.warning("ELEVENLABS_API_KEY not set; streaming TTS disabled")
-            return
+            logger.warning("ELEVENLABS_API_KEY not set; streaming TTS audio disabled")
+        elif _HAS_ELEVENLABS:
+            client = ElevenLabs(api_key=api_key)
 
-        client = ElevenLabs(api_key=api_key)
-
-        # Open a single sounddevice output stream for the lifetime of
-        # this function.  ElevenLabs pcm_24000 produces signed 16-bit
-        # little-endian mono PCM at 24 kHz.
-        use_sd = _HAS_AUDIO and sd is not None
-        output_stream = None
-        if use_sd:
-            try:
-                import numpy as _np
-                output_stream = sd.OutputStream(
-                    samplerate=24000, channels=1, dtype="int16",
-                )
-                output_stream.start()
-            except Exception as exc:
-                logger.warning("sounddevice OutputStream failed: %s", exc)
-                output_stream = None
+            # Open a single sounddevice output stream for the lifetime of
+            # this function.  ElevenLabs pcm_24000 produces signed 16-bit
+            # little-endian mono PCM at 24 kHz.
+            use_sd = _HAS_AUDIO and sd is not None
+            if use_sd:
+                try:
+                    import numpy as _np
+                    output_stream = sd.OutputStream(
+                        samplerate=24000, channels=1, dtype="int16",
+                    )
+                    output_stream.start()
+                except Exception as exc:
+                    logger.warning("sounddevice OutputStream failed: %s", exc)
+                    output_stream = None
 
         sentence_buf = ""
         in_think = False  # track <think>...</think> blocks
@@ -520,12 +525,18 @@ def stream_tts_to_speaker(
         queue_timeout = 0.5
 
         def _speak_sentence(sentence: str):
-            """Generate and play audio for a single sentence."""
+            """Display sentence and optionally generate + play audio."""
             if stop_event.is_set():
                 return
             cleaned = _strip_markdown_for_tts(sentence).strip()
             if not cleaned:
                 return
+            # Display raw sentence on screen before TTS processing
+            if display_callback is not None:
+                display_callback(sentence)
+            # Skip audio generation if no TTS client available
+            if client is None:
+                return
             # Truncate very long sentences
             if len(cleaned) > MAX_TEXT_LENGTH:
                 cleaned = cleaned[:MAX_TEXT_LENGTH]