diff --git a/src/timmy/voice_loop.py b/src/timmy/voice_loop.py index 24e632b..6058e42 100644 --- a/src/timmy/voice_loop.py +++ b/src/timmy/voice_loop.py @@ -369,15 +369,33 @@ class VoiceLoop: # ── Main Loop ─────────────────────────────────────────────────────── - def run(self) -> None: - """Run the voice loop. Blocks until Ctrl-C.""" - self._ensure_piper() + # Whisper hallucinates these on silence/noise — skip them. + _WHISPER_HALLUCINATIONS = frozenset( + { + "you", + "thanks.", + "thank you.", + "bye.", + "", + "thanks for watching!", + "thank you for watching!", + } + ) - # Suppress MCP / Agno stderr noise during voice mode. - _suppress_mcp_noise() - # Suppress MCP async-generator teardown tracebacks on exit. - _install_quiet_asyncgen_hooks() + # Spoken phrases that end the voice session. + _EXIT_COMMANDS = frozenset( + { + "goodbye", + "exit", + "quit", + "stop", + "goodbye timmy", + "stop listening", + } + ) + def _log_banner(self) -> None: + """Log the startup banner with STT/TTS/LLM configuration.""" tts_label = ( "macOS say" if self.config.use_say_fallback @@ -393,52 +411,50 @@ class VoiceLoop: " Press Ctrl-C to exit.\n" + "=" * 60 ) + def _is_hallucination(self, text: str) -> bool: + """Return True if *text* is a known Whisper hallucination.""" + return not text or text.lower() in self._WHISPER_HALLUCINATIONS + + def _is_exit_command(self, text: str) -> bool: + """Return True if the user asked to stop the voice session.""" + return text.lower().strip().rstrip(".!") in self._EXIT_COMMANDS + + def _process_turn(self, text: str) -> None: + """Handle a single listen-think-speak turn after transcription.""" + sys.stdout.write(f"\n 👤 You: {text}\n") + sys.stdout.flush() + + response = self._think(text) + sys.stdout.write(f" 🤖 Timmy: {response}\n") + sys.stdout.flush() + + self._speak(response) + + def run(self) -> None: + """Run the voice loop. Blocks until Ctrl-C.""" + self._ensure_piper() + _suppress_mcp_noise() + _install_quiet_asyncgen_hooks() + self._log_banner() + self._running = True try: while self._running: - # 1. LISTEN — record until silence audio = self._record_utterance() if audio is None: continue - # 2. TRANSCRIBE — Whisper STT text = self._transcribe(audio) - if not text or text.lower() in ( - "you", - "thanks.", - "thank you.", - "bye.", - "", - "thanks for watching!", - "thank you for watching!", - ): - # Whisper hallucinations on silence/noise + if self._is_hallucination(text): logger.debug("Ignoring likely Whisper hallucination: '%s'", text) continue - sys.stdout.write(f"\n 👤 You: {text}\n") - sys.stdout.flush() - - # Exit commands - if text.lower().strip().rstrip(".!") in ( - "goodbye", - "exit", - "quit", - "stop", - "goodbye timmy", - "stop listening", - ): + if self._is_exit_command(text): logger.info("👋 Goodbye!") break - # 3. THINK — send to Timmy - response = self._think(text) - sys.stdout.write(f" 🤖 Timmy: {response}\n") - sys.stdout.flush() - - # 4. SPEAK — TTS output - self._speak(response) + self._process_turn(text) except KeyboardInterrupt: logger.info("👋 Voice loop stopped.") diff --git a/tests/timmy/test_voice_loop.py b/tests/timmy/test_voice_loop.py index e888699..809bd15 100644 --- a/tests/timmy/test_voice_loop.py +++ b/tests/timmy/test_voice_loop.py @@ -236,6 +236,7 @@ class TestHallucinationFilter: """Whisper tends to hallucinate on silence/noise. The loop should filter these.""" def test_known_hallucinations_filtered(self): + loop = VoiceLoop() hallucinations = [ "you", "thanks.", @@ -243,33 +244,35 @@ class TestHallucinationFilter: "Bye.", "Thanks for watching!", "Thank you for watching!", + "", ] for text in hallucinations: - assert text.lower() in ( - "you", - "thanks.", - "thank you.", - "bye.", - "", - "thanks for watching!", - "thank you for watching!", - ), f"'{text}' should be filtered" + assert loop._is_hallucination(text), f"'{text}' should be filtered" + + def test_real_speech_not_filtered(self): + loop = VoiceLoop() + assert not loop._is_hallucination("Hello Timmy") + assert not loop._is_hallucination("What time is it?") class TestExitCommands: """Voice loop should recognize exit commands.""" def test_exit_commands(self): + loop = VoiceLoop() exits = ["goodbye", "exit", "quit", "stop", "goodbye timmy", "stop listening"] for cmd in exits: - assert cmd.lower().strip().rstrip(".!") in ( - "goodbye", - "exit", - "quit", - "stop", - "goodbye timmy", - "stop listening", - ), f"'{cmd}' should be an exit command" + assert loop._is_exit_command(cmd), f"'{cmd}' should be an exit command" + + def test_exit_with_punctuation(self): + loop = VoiceLoop() + assert loop._is_exit_command("goodbye!") + assert loop._is_exit_command("stop.") + + def test_non_exit_commands(self): + loop = VoiceLoop() + assert not loop._is_exit_command("hello") + assert not loop._is_exit_command("what time is it") class TestPlayAudio: