fix: prevent Ctrl+B key handler from blocking prompt_toolkit event loop

The handle_voice_record key binding runs in prompt_toolkit's event-loop
thread. When silence auto-stopped recording, _voice_recording was False
but recorder.stop() still held AudioRecorder._lock. A concurrent Ctrl+B
press entered the START path and blocked on that lock, freezing all
keyboard input.

Three changes:
- Set _voice_processing atomically with _voice_recording=False in
  _voice_stop_and_transcribe to close the race window
- Add _voice_processing guard in the START path to prevent starting
  while stop/transcribe is still running
- Dispatch _voice_start_recording to a daemon thread so play_beep
  (sd.wait) and AudioRecorder.start (lock acquire) never block the
  event loop
This commit is contained in:
0xbyt4
2026-03-10 12:59:30 +03:00
parent ecc3dd7c63
commit d0e3b39e69
2 changed files with 133 additions and 19 deletions

62
cli.py
View File

@@ -3617,11 +3617,14 @@ class HermesCLI:
def _voice_stop_and_transcribe(self):
"""Stop recording, transcribe via STT, and queue the transcript as input."""
# Atomic guard: only one thread can enter stop-and-transcribe
# Atomic guard: only one thread can enter stop-and-transcribe.
# Set _voice_processing immediately so concurrent Ctrl+B presses
# don't race into the START path while recorder.stop() holds its lock.
with self._voice_lock:
if not self._voice_recording:
return
self._voice_recording = False
self._voice_processing = True
submitted = False
wav_path = None
@@ -3642,8 +3645,7 @@ class HermesCLI:
_cprint(f"{_DIM}No speech detected.{_RST}")
return
with self._voice_lock:
self._voice_processing = True
# _voice_processing is already True (set atomically above)
if hasattr(self, '_app') and self._app:
self._app.invalidate()
_cprint(f"{_DIM}Transcribing...{_RST}")
@@ -4864,7 +4866,12 @@ class HermesCLI:
@kb.add(_voice_key)
def handle_voice_record(event):
"""Toggle voice recording when voice mode is active."""
"""Toggle voice recording when voice mode is active.
IMPORTANT: This handler runs in prompt_toolkit's event-loop thread.
Any blocking call here (locks, sd.wait, disk I/O) freezes the
entire UI. All heavy work is dispatched to daemon threads.
"""
if not cli_ref._voice_mode:
return
# Always allow STOPPING a recording (even when agent is running)
@@ -4884,21 +4891,38 @@ class HermesCLI:
return
if cli_ref._clarify_state or cli_ref._sudo_state or cli_ref._approval_state:
return
try:
# Interrupt TTS if playing, so user can start talking
if not cli_ref._voice_tts_done.is_set():
try:
from tools.voice_mode import stop_playback
stop_playback()
cli_ref._voice_tts_done.set()
except Exception:
pass
with cli_ref._voice_lock:
cli_ref._voice_continuous = True
cli_ref._voice_start_recording()
event.app.invalidate()
except Exception as e:
_cprint(f"\n{_DIM}Voice recording failed: {e}{_RST}")
# Guard: don't start while a previous stop/transcribe cycle is
# still running — recorder.stop() holds AudioRecorder._lock and
# start() would block the event-loop thread waiting for it.
if cli_ref._voice_processing:
return
# Interrupt TTS if playing, so user can start talking.
# stop_playback() is fast (just terminates a subprocess).
if not cli_ref._voice_tts_done.is_set():
try:
from tools.voice_mode import stop_playback
stop_playback()
cli_ref._voice_tts_done.set()
except Exception:
pass
with cli_ref._voice_lock:
cli_ref._voice_continuous = True
# Dispatch to a daemon thread so play_beep(sd.wait),
# AudioRecorder.start(lock acquire), and config I/O
# never block the prompt_toolkit event loop.
def _start_recording():
try:
cli_ref._voice_start_recording()
if hasattr(cli_ref, '_app') and cli_ref._app:
cli_ref._app.invalidate()
except Exception as e:
_cprint(f"\n{_DIM}Voice recording failed: {e}{_RST}")
threading.Thread(target=_start_recording, daemon=True).start()
event.app.invalidate()
from prompt_toolkit.keys import Keys
@kb.add(Keys.BracketedPaste, eager=True)

View File

@@ -732,6 +732,96 @@ class TestBrowserToolSignalHandlerRemoved:
)
class TestKeyHandlerNeverBlocks:
"""The Ctrl+B key handler runs in prompt_toolkit's event-loop thread.
Any blocking call freezes the entire UI. Verify that:
1. _voice_start_recording is NOT called directly (must be in daemon thread)
2. _voice_processing guard prevents starting while stop/transcribe runs
3. _voice_processing is set atomically with _voice_recording in stop_and_transcribe
"""
def test_start_recording_not_called_directly_in_handler(self):
"""AST check: handle_voice_record must NOT call _voice_start_recording()
directly — it must wrap it in a Thread to avoid blocking the UI."""
import ast as _ast
with open("cli.py") as f:
tree = _ast.parse(f.read())
for node in _ast.walk(tree):
if isinstance(node, _ast.FunctionDef) and node.name == "handle_voice_record":
# Collect all direct calls to _voice_start_recording in this function.
# They should ONLY appear inside a nested def (the _start_recording wrapper).
for child in _ast.iter_child_nodes(node):
# Direct statements in the handler body (not nested defs)
if isinstance(child, _ast.Expr) and isinstance(child.value, _ast.Call):
call_src = _ast.dump(child.value)
assert "_voice_start_recording" not in call_src, (
"handle_voice_record calls _voice_start_recording directly "
"— must dispatch to a daemon thread"
)
break
def test_processing_guard_in_start_path(self):
"""Source check: key handler must check _voice_processing before
starting a new recording."""
with open("cli.py") as f:
source = f.read()
lines = source.split("\n")
in_handler = False
in_else = False
found_guard = False
for line in lines:
if "def handle_voice_record" in line:
in_handler = True
elif in_handler and line.strip().startswith("def ") and "_start_recording" not in line:
break
elif in_handler and "else:" in line:
in_else = True
elif in_else and "_voice_processing" in line:
found_guard = True
break
assert found_guard, (
"Key handler START path must guard against _voice_processing "
"to prevent blocking on AudioRecorder._lock"
)
def test_processing_set_atomically_with_recording_false(self):
"""Source check: _voice_stop_and_transcribe must set _voice_processing = True
in the same lock block where it sets _voice_recording = False."""
with open("cli.py") as f:
source = f.read()
lines = source.split("\n")
in_method = False
in_first_lock = False
found_recording_false = False
found_processing_true = False
for line in lines:
if "def _voice_stop_and_transcribe" in line:
in_method = True
elif in_method and "with self._voice_lock:" in line and not in_first_lock:
in_first_lock = True
elif in_first_lock:
stripped = line.strip()
if not stripped or stripped.startswith("#"):
continue
if "_voice_recording = False" in stripped:
found_recording_false = True
if "_voice_processing = True" in stripped:
found_processing_true = True
# End of with block (dedent)
if stripped and not line.startswith(" ") and not line.startswith("\t\t\t"):
break
assert found_recording_false and found_processing_true, (
"_voice_stop_and_transcribe must set _voice_processing = True "
"atomically (same lock block) with _voice_recording = False"
)
# ============================================================================
# Real behavior tests — CLI voice methods via _make_voice_cli()
# ============================================================================