fix: prevent Ctrl+B key handler from blocking prompt_toolkit event loop

The handle_voice_record key binding runs in prompt_toolkit's event-loop thread. When silence auto-stopped recording, _voice_recording was False but recorder.stop() still held AudioRecorder._lock. A concurrent Ctrl+B press entered the START path and blocked on that lock, freezing all keyboard input. Three changes: - Set _voice_processing atomically with _voice_recording=False in _voice_stop_and_transcribe to close the race window - Add _voice_processing guard in the START path to prevent starting while stop/transcribe is still running - Dispatch _voice_start_recording to a daemon thread so play_beep (sd.wait) and AudioRecorder.start (lock acquire) never block the event loop
2026-03-10 12:59:30 +03:00
parent ecc3dd7c63
commit d0e3b39e69
2 changed files with 133 additions and 19 deletions
--- a/cli.py
+++ b/cli.py
@@ -3617,11 +3617,14 @@ class HermesCLI:

    def _voice_stop_and_transcribe(self):
        """Stop recording, transcribe via STT, and queue the transcript as input."""
-        # Atomic guard: only one thread can enter stop-and-transcribe
+        # Atomic guard: only one thread can enter stop-and-transcribe.
+        # Set _voice_processing immediately so concurrent Ctrl+B presses
+        # don't race into the START path while recorder.stop() holds its lock.
        with self._voice_lock:
            if not self._voice_recording:
                return
            self._voice_recording = False
+            self._voice_processing = True

        submitted = False
        wav_path = None
@@ -3642,8 +3645,7 @@ class HermesCLI:
                _cprint(f"{_DIM}No speech detected.{_RST}")
                return

-            with self._voice_lock:
-                self._voice_processing = True
+            # _voice_processing is already True (set atomically above)
            if hasattr(self, '_app') and self._app:
                self._app.invalidate()
            _cprint(f"{_DIM}Transcribing...{_RST}")
@@ -4864,7 +4866,12 @@ class HermesCLI:

        @kb.add(_voice_key)
        def handle_voice_record(event):
-            """Toggle voice recording when voice mode is active."""
+            """Toggle voice recording when voice mode is active.
+
+            IMPORTANT: This handler runs in prompt_toolkit's event-loop thread.
+            Any blocking call here (locks, sd.wait, disk I/O) freezes the
+            entire UI.  All heavy work is dispatched to daemon threads.
+            """
            if not cli_ref._voice_mode:
                return
            # Always allow STOPPING a recording (even when agent is running)
@@ -4884,21 +4891,38 @@ class HermesCLI:
                    return
                if cli_ref._clarify_state or cli_ref._sudo_state or cli_ref._approval_state:
                    return
-                try:
-                    # Interrupt TTS if playing, so user can start talking
-                    if not cli_ref._voice_tts_done.is_set():
-                        try:
-                            from tools.voice_mode import stop_playback
-                            stop_playback()
-                            cli_ref._voice_tts_done.set()
-                        except Exception:
-                            pass
-                    with cli_ref._voice_lock:
-                        cli_ref._voice_continuous = True
-                    cli_ref._voice_start_recording()
-                    event.app.invalidate()
-                except Exception as e:
-                    _cprint(f"\n{_DIM}Voice recording failed: {e}{_RST}")
+                # Guard: don't start while a previous stop/transcribe cycle is
+                # still running — recorder.stop() holds AudioRecorder._lock and
+                # start() would block the event-loop thread waiting for it.
+                if cli_ref._voice_processing:
+                    return
+
+                # Interrupt TTS if playing, so user can start talking.
+                # stop_playback() is fast (just terminates a subprocess).
+                if not cli_ref._voice_tts_done.is_set():
+                    try:
+                        from tools.voice_mode import stop_playback
+                        stop_playback()
+                        cli_ref._voice_tts_done.set()
+                    except Exception:
+                        pass
+
+                with cli_ref._voice_lock:
+                    cli_ref._voice_continuous = True
+
+                # Dispatch to a daemon thread so play_beep(sd.wait),
+                # AudioRecorder.start(lock acquire), and config I/O
+                # never block the prompt_toolkit event loop.
+                def _start_recording():
+                    try:
+                        cli_ref._voice_start_recording()
+                        if hasattr(cli_ref, '_app') and cli_ref._app:
+                            cli_ref._app.invalidate()
+                    except Exception as e:
+                        _cprint(f"\n{_DIM}Voice recording failed: {e}{_RST}")
+
+                threading.Thread(target=_start_recording, daemon=True).start()
+                event.app.invalidate()
        from prompt_toolkit.keys import Keys

        @kb.add(Keys.BracketedPaste, eager=True)
--- a/tests/tools/test_voice_cli_integration.py
+++ b/tests/tools/test_voice_cli_integration.py
@@ -732,6 +732,96 @@ class TestBrowserToolSignalHandlerRemoved:
            )


+class TestKeyHandlerNeverBlocks:
+    """The Ctrl+B key handler runs in prompt_toolkit's event-loop thread.
+    Any blocking call freezes the entire UI.  Verify that:
+    1. _voice_start_recording is NOT called directly (must be in daemon thread)
+    2. _voice_processing guard prevents starting while stop/transcribe runs
+    3. _voice_processing is set atomically with _voice_recording in stop_and_transcribe
+    """
+
+    def test_start_recording_not_called_directly_in_handler(self):
+        """AST check: handle_voice_record must NOT call _voice_start_recording()
+        directly — it must wrap it in a Thread to avoid blocking the UI."""
+        import ast as _ast
+
+        with open("cli.py") as f:
+            tree = _ast.parse(f.read())
+
+        for node in _ast.walk(tree):
+            if isinstance(node, _ast.FunctionDef) and node.name == "handle_voice_record":
+                # Collect all direct calls to _voice_start_recording in this function.
+                # They should ONLY appear inside a nested def (the _start_recording wrapper).
+                for child in _ast.iter_child_nodes(node):
+                    # Direct statements in the handler body (not nested defs)
+                    if isinstance(child, _ast.Expr) and isinstance(child.value, _ast.Call):
+                        call_src = _ast.dump(child.value)
+                        assert "_voice_start_recording" not in call_src, (
+                            "handle_voice_record calls _voice_start_recording directly "
+                            "— must dispatch to a daemon thread"
+                        )
+                break
+
+    def test_processing_guard_in_start_path(self):
+        """Source check: key handler must check _voice_processing before
+        starting a new recording."""
+        with open("cli.py") as f:
+            source = f.read()
+
+        lines = source.split("\n")
+        in_handler = False
+        in_else = False
+        found_guard = False
+        for line in lines:
+            if "def handle_voice_record" in line:
+                in_handler = True
+            elif in_handler and line.strip().startswith("def ") and "_start_recording" not in line:
+                break
+            elif in_handler and "else:" in line:
+                in_else = True
+            elif in_else and "_voice_processing" in line:
+                found_guard = True
+                break
+
+        assert found_guard, (
+            "Key handler START path must guard against _voice_processing "
+            "to prevent blocking on AudioRecorder._lock"
+        )
+
+    def test_processing_set_atomically_with_recording_false(self):
+        """Source check: _voice_stop_and_transcribe must set _voice_processing = True
+        in the same lock block where it sets _voice_recording = False."""
+        with open("cli.py") as f:
+            source = f.read()
+
+        lines = source.split("\n")
+        in_method = False
+        in_first_lock = False
+        found_recording_false = False
+        found_processing_true = False
+        for line in lines:
+            if "def _voice_stop_and_transcribe" in line:
+                in_method = True
+            elif in_method and "with self._voice_lock:" in line and not in_first_lock:
+                in_first_lock = True
+            elif in_first_lock:
+                stripped = line.strip()
+                if not stripped or stripped.startswith("#"):
+                    continue
+                if "_voice_recording = False" in stripped:
+                    found_recording_false = True
+                if "_voice_processing = True" in stripped:
+                    found_processing_true = True
+                # End of with block (dedent)
+                if stripped and not line.startswith("            ") and not line.startswith("\t\t\t"):
+                    break
+
+        assert found_recording_false and found_processing_true, (
+            "_voice_stop_and_transcribe must set _voice_processing = True "
+            "atomically (same lock block) with _voice_recording = False"
+        )
+
+
 # ============================================================================
 # Real behavior tests — CLI voice methods via _make_voice_cli()
 # ============================================================================