fix: persistent audio stream and silence detection improvements

- Keep InputStream alive across recordings to avoid CoreAudio hang on repeated open/close cycles on macOS. New _ensure_stream() creates the stream once; start()/stop()/cancel() only toggle frame collection. - Add _close_stream_with_timeout() with daemon thread to prevent stream.stop()/close() from blocking indefinitely. - Add generation counter to detect stale stream-open completions after cancel or restart. - Run recorder.cancel() in background thread from Ctrl+C handler to keep the event loop responsive. - Add shutdown() method called on /voice off to release audio resources. - Fix silence timer reset during active speech: use dip tolerance for _resume_start tracker so natural speech pauses (< 0.3s) don't prevent the silence timer from being reset. - Update tests to match persistent stream behavior.
2026-03-10 20:37:17 +03:00
parent eec04d180a
commit eb79dda04b
4 changed files with 221 additions and 132 deletions
--- a/cli.py
+++ b/cli.py
@@ -3848,14 +3848,26 @@ class HermesCLI:

    def _disable_voice_mode(self):
        """Disable voice mode, cancel any active recording, and stop TTS."""
+        recorder = None
        with self._voice_lock:
            if self._voice_recording and self._voice_recorder:
                self._voice_recorder.cancel()
                self._voice_recording = False
+            recorder = self._voice_recorder
            self._voice_mode = False
            self._voice_tts = False
            self._voice_continuous = False

+        # Shut down the persistent audio stream in background
+        if recorder is not None:
+            def _bg_shutdown(rec=recorder):
+                try:
+                    rec.shutdown()
+                except Exception:
+                    pass
+            threading.Thread(target=_bg_shutdown, daemon=True).start()
+            self._voice_recorder = None
+
        # Stop any active TTS playback
        try:
            from tools.voice_mode import stop_playback
@@ -4799,15 +4811,24 @@ class HermesCLI:
            import time as _time
            now = _time.time()

-            # Cancel active voice recording
+            # Cancel active voice recording.
+            # Run cancel() in a background thread to prevent blocking the
+            # event loop if AudioRecorder._lock or CoreAudio takes time.
+            _should_cancel_voice = False
+            _recorder_ref = None
            with cli_ref._voice_lock:
                if cli_ref._voice_recording and cli_ref._voice_recorder:
-                    cli_ref._voice_recorder.cancel()
+                    _recorder_ref = cli_ref._voice_recorder
                    cli_ref._voice_recording = False
                    cli_ref._voice_continuous = False
-                    _cprint(f"\n{_DIM}Recording cancelled.{_RST}")
-                    event.app.invalidate()
-                    return
+                    _should_cancel_voice = True
+            if _should_cancel_voice:
+                _cprint(f"\n{_DIM}Recording cancelled.{_RST}")
+                threading.Thread(
+                    target=_recorder_ref.cancel, daemon=True
+                ).start()
+                event.app.invalidate()
+                return

            # Cancel sudo prompt
            if self._sudo_state: