fix: resolve 6 voice mode bugs found during audit

- edge_tts NameError: _generate_edge_tts now calls _import_edge_tts()
  instead of referencing bare module name (tts_tool.py)
- TTS thread leak: chat() finally block sends sentinel to text_queue,
  sets stop_event, and joins tts_thread on exception paths (cli.py)
- output_stream leak: moved close() into finally block so audio device
  is released even on exception (tts_tool.py)
- Ctrl+C continuous mode: cancel handler now resets _voice_continuous
  to prevent auto-restart after user cancels recording (cli.py)
- _disable_voice_mode: now calls stop_playback() and sets
  _voice_tts_done so TTS stops when voice mode is turned off (cli.py)
- _show_voice_status: reads record key from config instead of
  hardcoding Ctrl+B (cli.py)
This commit is contained in:
0xbyt4
2026-03-10 12:33:53 +03:00
parent a78249230c
commit ddfd6e0c59
3 changed files with 240 additions and 8 deletions

29
cli.py
View File

@@ -3830,7 +3830,7 @@ class HermesCLI:
_cprint(f" {_DIM}/voice off to disable voice mode{_RST}")
def _disable_voice_mode(self):
"""Disable voice mode and cancel any active recording."""
"""Disable voice mode, cancel any active recording, and stop TTS."""
with self._voice_lock:
if self._voice_recording and self._voice_recorder:
self._voice_recorder.cancel()
@@ -3839,6 +3839,14 @@ class HermesCLI:
self._voice_tts = False
self._voice_continuous = False
# Stop any active TTS playback
try:
from tools.voice_mode import stop_playback
stop_playback()
except Exception:
pass
self._voice_tts_done.set()
_cprint(f"\n{_DIM}Voice mode disabled.{_RST}")
def _toggle_voice_tts(self):
@@ -3868,7 +3876,9 @@ class HermesCLI:
_cprint(f" Mode: {'ON' if self._voice_mode else 'OFF'}")
_cprint(f" TTS: {'ON' if self._voice_tts else 'OFF'}")
_cprint(f" Recording: {'YES' if self._voice_recording else 'no'}")
_cprint(f" Record key: Ctrl+B")
_raw_key = load_config().get("voice", {}).get("record_key", "ctrl+b")
_display_key = _raw_key.replace("ctrl+", "Ctrl+").upper() if "ctrl+" in _raw_key.lower() else _raw_key
_cprint(f" Record key: {_display_key}")
_cprint(f"\n {_BOLD}Requirements:{_RST}")
for line in reqs["details"].split("\n"):
_cprint(f" {line}")
@@ -4368,6 +4378,20 @@ class HermesCLI:
except Exception as e:
print(f"Error: {e}")
return None
finally:
# Ensure streaming TTS resources are cleaned up even on error.
# Normal path sends the sentinel at line ~3568; this is a safety
# net for exception paths that skip it. Duplicate sentinels are
# harmless — stream_tts_to_speaker exits on the first None.
if text_queue is not None:
try:
text_queue.put_nowait(None)
except Exception:
pass
if stop_event is not None:
stop_event.set()
if tts_thread is not None and tts_thread.is_alive():
tts_thread.join(timeout=5)
def _print_exit_summary(self):
"""Print session resume info on exit, similar to Claude Code."""
@@ -4763,6 +4787,7 @@ class HermesCLI:
if cli_ref._voice_recording and cli_ref._voice_recorder:
cli_ref._voice_recorder.cancel()
cli_ref._voice_recording = False
cli_ref._voice_continuous = False
_cprint(f"\n{_DIM}Recording cancelled.{_RST}")
event.app.invalidate()
return

View File

@@ -469,3 +469,208 @@ class TestVprintForceParameter:
"Expected at least one _vprint with force=True for error messages"
assert unforced_error_count == 0, \
f"Found {unforced_error_count} critical error _vprint calls without force=True"
# ============================================================================
# Bug fix regression tests
# ============================================================================
class TestEdgeTTSLazyImport:
"""Bug #3: _generate_edge_tts must use lazy import, not bare module name."""
def test_generate_edge_tts_calls_lazy_import(self):
"""AST check: _generate_edge_tts must call _import_edge_tts(), not
reference bare 'edge_tts' module name."""
import ast as _ast
with open("tools/tts_tool.py") as f:
tree = _ast.parse(f.read())
for node in _ast.walk(tree):
if isinstance(node, _ast.AsyncFunctionDef) and node.name == "_generate_edge_tts":
# Collect all Name references (bare identifiers)
bare_refs = [
n.id for n in _ast.walk(node)
if isinstance(n, _ast.Name) and n.id == "edge_tts"
]
assert bare_refs == [], (
f"_generate_edge_tts uses bare 'edge_tts' name — "
f"should use _import_edge_tts() lazy helper"
)
# Must have a call to _import_edge_tts
lazy_calls = [
n for n in _ast.walk(node)
if isinstance(n, _ast.Call)
and isinstance(n.func, _ast.Name)
and n.func.id == "_import_edge_tts"
]
assert len(lazy_calls) >= 1, (
"_generate_edge_tts must call _import_edge_tts()"
)
break
else:
pytest.fail("_generate_edge_tts not found in tts_tool.py")
class TestStreamingTTSOutputStreamCleanup:
"""Bug #7: output_stream must be closed in finally block."""
def test_output_stream_closed_in_finally(self):
"""AST check: stream_tts_to_speaker's finally block must close
output_stream even on exception."""
import ast as _ast
with open("tools/tts_tool.py") as f:
tree = _ast.parse(f.read())
for node in _ast.walk(tree):
if isinstance(node, _ast.FunctionDef) and node.name == "stream_tts_to_speaker":
# Find the outermost try that has a finally with tts_done_event.set()
for child in _ast.walk(node):
if isinstance(child, _ast.Try) and child.finalbody:
finally_text = "\n".join(
_ast.dump(n) for n in child.finalbody
)
if "tts_done_event" in finally_text:
assert "output_stream" in finally_text, (
"finally block must close output_stream"
)
return
pytest.fail("No finally block with tts_done_event found")
class TestCtrlCResetsContinuousMode:
"""Bug #4: Ctrl+C cancel must reset _voice_continuous."""
def test_ctrl_c_handler_resets_voice_continuous(self):
"""Source check: Ctrl+C voice cancel block must set
_voice_continuous = False."""
with open("cli.py") as f:
source = f.read()
# Find the Ctrl+C handler's voice cancel block
lines = source.split("\n")
in_cancel_block = False
found_continuous_reset = False
for i, line in enumerate(lines):
if "Cancel active voice recording" in line:
in_cancel_block = True
if in_cancel_block:
if "_voice_continuous = False" in line:
found_continuous_reset = True
break
# Block ends at next comment section or return
if "return" in line and in_cancel_block:
break
assert found_continuous_reset, (
"Ctrl+C voice cancel block must set _voice_continuous = False"
)
class TestDisableVoiceModeStopsTTS:
"""Bug #5: _disable_voice_mode must stop active TTS playback."""
def test_disable_voice_mode_calls_stop_playback(self):
"""Source check: _disable_voice_mode must call stop_playback()."""
with open("cli.py") as f:
source = f.read()
# Extract _disable_voice_mode method body
lines = source.split("\n")
in_method = False
method_lines = []
for line in lines:
if "def _disable_voice_mode" in line:
in_method = True
elif in_method:
if line.strip() and not line.startswith(" ") and not line.startswith("\t"):
break
if line.strip().startswith("def "):
break
method_lines.append(line)
method_body = "\n".join(method_lines)
assert "stop_playback" in method_body, (
"_disable_voice_mode must call stop_playback()"
)
assert "_voice_tts_done.set()" in method_body, (
"_disable_voice_mode must set _voice_tts_done"
)
class TestVoiceStatusUsesConfigKey:
"""Bug #8: _show_voice_status must read record key from config."""
def test_show_voice_status_not_hardcoded(self):
"""Source check: _show_voice_status must not hardcode Ctrl+B."""
with open("cli.py") as f:
source = f.read()
lines = source.split("\n")
in_method = False
for line in lines:
if "def _show_voice_status" in line:
in_method = True
elif in_method and line.strip().startswith("def "):
break
elif in_method:
assert 'Record key: Ctrl+B"' not in line, (
"_show_voice_status hardcodes 'Ctrl+B'"
"should read from config"
)
def test_show_voice_status_reads_config(self):
"""Source check: _show_voice_status must use load_config()."""
with open("cli.py") as f:
source = f.read()
lines = source.split("\n")
in_method = False
method_lines = []
for line in lines:
if "def _show_voice_status" in line:
in_method = True
elif in_method and line.strip().startswith("def "):
break
elif in_method:
method_lines.append(line)
method_body = "\n".join(method_lines)
assert "load_config" in method_body or "record_key" in method_body, (
"_show_voice_status should read record_key from config"
)
class TestChatTTSCleanupOnException:
"""Bug #2: chat() must clean up streaming TTS resources on exception."""
def test_chat_has_finally_for_tts_cleanup(self):
"""AST check: chat() method must have a finally block that cleans up
text_queue, stop_event, and tts_thread."""
import ast as _ast
with open("cli.py") as f:
tree = _ast.parse(f.read())
for node in _ast.walk(tree):
if isinstance(node, _ast.FunctionDef) and node.name == "chat":
# Find Try nodes with finally blocks
for child in _ast.walk(node):
if isinstance(child, _ast.Try) and child.finalbody:
finally_text = "\n".join(
_ast.dump(n) for n in child.finalbody
)
if "text_queue" in finally_text:
assert "stop_event" in finally_text, (
"finally must also handle stop_event"
)
assert "tts_thread" in finally_text, (
"finally must also handle tts_thread"
)
return
pytest.fail(
"chat() must have a finally block cleaning up "
"text_queue/stop_event/tts_thread"
)

View File

@@ -161,10 +161,11 @@ async def _generate_edge_tts(text: str, output_path: str, tts_config: Dict[str,
Returns:
Path to the saved audio file.
"""
_edge_tts = _import_edge_tts()
edge_config = tts_config.get("edge", {})
voice = edge_config.get("voice", DEFAULT_EDGE_VOICE)
communicate = edge_tts.Communicate(text, voice)
communicate = _edge_tts.Communicate(text, voice)
await communicate.save(output_path)
return output_path
@@ -667,17 +668,18 @@ def stream_tts_to_speaker(
except queue.Empty:
break
# Close the audio output stream
# output_stream is closed in the finally block below
except Exception as exc:
logger.warning("Streaming TTS pipeline error: %s", exc)
finally:
# Always close the audio output stream to avoid locking the device
if output_stream is not None:
try:
output_stream.stop()
output_stream.close()
except Exception:
pass
except Exception as exc:
logger.warning("Streaming TTS pipeline error: %s", exc)
finally:
tts_done_event.set()