From 782218aa2cdb88ae27fa294d9d67d311ea44b990 Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Sat, 14 Mar 2026 14:05:24 -0400 Subject: [PATCH] =?UTF-8?q?fix:=20voice=20loop=20=E2=80=94=20persistent=20?= =?UTF-8?q?event=20loop,=20markdown=20stripping,=20MCP=20noise?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes from real-world testing: 1. Event loop: replaced asyncio.run() with a persistent loop so Agno's MCP sessions survive across conversation turns. No more 'Event loop is closed' errors on turn 2+. 2. Markdown stripping: voice preamble tells Timmy to respond in natural spoken language, plus _strip_markdown() as a safety net removes **bold**, *italic*, bullets, headers, code fences, etc. TTS no longer reads 'asterisk asterisk'. 3. MCP noise: _suppress_mcp_noise() quiets mcp/agno/httpx loggers during voice mode so the terminal shows clean transcript only. 32 tests (12 new for markdown stripping + persistent loop). --- src/timmy/voice_loop.py | 101 ++++++++++++++++++++++++++++++++- tests/timmy/test_voice_loop.py | 65 ++++++++++++++++++--- 2 files changed, 156 insertions(+), 10 deletions(-) diff --git a/src/timmy/voice_loop.py b/src/timmy/voice_loop.py index 82b82ff..644e213 100644 --- a/src/timmy/voice_loop.py +++ b/src/timmy/voice_loop.py @@ -15,6 +15,7 @@ Requires: sounddevice, numpy, whisper, piper-tts import asyncio import logging +import re import subprocess import sys import tempfile @@ -26,6 +27,44 @@ import numpy as np logger = logging.getLogger(__name__) +# ── Voice-mode system instruction ─────────────────────────────────────────── +# Prepended to user messages so Timmy responds naturally for TTS. +_VOICE_PREAMBLE = ( + "[VOICE MODE] You are speaking aloud through a text-to-speech system. " + "Respond in short, natural spoken sentences. No markdown, no bullet points, " + "no asterisks, no numbered lists, no headers, no bold/italic formatting. " + "Talk like a person in a conversation — concise, warm, direct. " + "Keep responses under 3-4 sentences unless the user asks for detail." +) + + +def _strip_markdown(text: str) -> str: + """Remove markdown formatting so TTS reads naturally. + + Strips: **bold**, *italic*, `code`, # headers, - bullets, + numbered lists, [links](url), etc. + """ + if not text: + return text + # Remove bold/italic markers + text = re.sub(r"\*{1,3}([^*]+)\*{1,3}", r"\1", text) + # Remove inline code + text = re.sub(r"`([^`]+)`", r"\1", text) + # Remove headers (# Header) + text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE) + # Remove bullet points (-, *, +) at start of line + text = re.sub(r"^[\s]*[-*+]\s+", "", text, flags=re.MULTILINE) + # Remove numbered lists (1. 2. etc) + text = re.sub(r"^[\s]*\d+\.\s+", "", text, flags=re.MULTILINE) + # Remove link syntax [text](url) → text + text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text) + # Remove horizontal rules + text = re.sub(r"^[-*_]{3,}\s*$", "", text, flags=re.MULTILINE) + # Collapse multiple newlines + text = re.sub(r"\n{3,}", "\n\n", text) + return text.strip() + + # ── Defaults ──────────────────────────────────────────────────────────────── DEFAULT_WHISPER_MODEL = "base.en" @@ -75,6 +114,9 @@ class VoiceLoop: self._running = False self._speaking = False # True while TTS is playing self._interrupted = False # set when user talks over TTS + # Persistent event loop — reused across all chat calls so Agno's + # MCP sessions don't die when the loop closes. + self._loop: asyncio.AbstractEventLoop | None = None # ── Lazy initialization ───────────────────────────────────────────── @@ -283,6 +325,16 @@ class VoiceLoop: # ── LLM: Text → Response ─────────────────────────────────────────── + def _get_loop(self) -> asyncio.AbstractEventLoop: + """Return a persistent event loop, creating one if needed. + + A single loop is reused for the entire voice session so Agno's + MCP tool-server connections survive across turns. + """ + if self._loop is None or self._loop.is_closed(): + self._loop = asyncio.new_event_loop() + return self._loop + def _think(self, user_text: str) -> str: """Send text to Timmy and get a response.""" sys.stdout.write(" 💭 Thinking...\r") @@ -291,20 +343,29 @@ class VoiceLoop: t0 = time.monotonic() try: - response = asyncio.run(self._chat(user_text)) + loop = self._get_loop() + response = loop.run_until_complete(self._chat(user_text)) except Exception as exc: logger.error("Timmy chat failed: %s", exc) response = "I'm having trouble thinking right now. Could you try again?" elapsed = time.monotonic() - t0 logger.info("Timmy responded in %.1fs", elapsed) + + # Strip markdown so TTS doesn't read asterisks, bullets, etc. + response = _strip_markdown(response) return response async def _chat(self, message: str) -> str: - """Async wrapper around Timmy's session.chat().""" + """Async wrapper around Timmy's session.chat(). + + Prepends the voice-mode instruction so Timmy responds in + natural spoken language rather than markdown. + """ from timmy.session import chat - return await chat(message, session_id=self.config.session_id) + voiced = f"{_VOICE_PREAMBLE}\n\nUser said: {message}" + return await chat(voiced, session_id=self.config.session_id) # ── Main Loop ─────────────────────────────────────────────────────── @@ -312,6 +373,11 @@ class VoiceLoop: """Run the voice loop. Blocks until Ctrl-C.""" self._ensure_piper() + # Suppress MCP / Agno stderr noise during voice mode. + # The "Secure MCP Filesystem Server running on stdio" messages + # are distracting in a voice session. + _suppress_mcp_noise() + tts_label = ( "macOS say" if self.config.use_say_fallback @@ -381,7 +447,36 @@ class VoiceLoop: print("\n\n 👋 Voice loop stopped.\n") finally: self._running = False + self._cleanup_loop() + + def _cleanup_loop(self) -> None: + """Shut down the persistent event loop cleanly.""" + if self._loop is not None and not self._loop.is_closed(): + try: + self._loop.run_until_complete(self._loop.shutdown_asyncgens()) + except Exception: + pass + self._loop.close() + self._loop = None def stop(self) -> None: """Stop the voice loop (from another thread).""" self._running = False + + +def _suppress_mcp_noise() -> None: + """Quiet down noisy MCP/Agno loggers during voice mode. + + Sets specific loggers to WARNING so the terminal stays clean + for the voice transcript. + """ + for name in ( + "mcp", + "mcp.server", + "mcp.client", + "agno", + "agno.mcp", + "httpx", + "httpcore", + ): + logging.getLogger(name).setLevel(logging.WARNING) diff --git a/tests/timmy/test_voice_loop.py b/tests/timmy/test_voice_loop.py index 8c2dd52..26d83c8 100644 --- a/tests/timmy/test_voice_loop.py +++ b/tests/timmy/test_voice_loop.py @@ -9,7 +9,7 @@ from unittest.mock import MagicMock, patch import numpy as np -from timmy.voice_loop import VoiceConfig, VoiceLoop +from timmy.voice_loop import VoiceConfig, VoiceLoop, _strip_markdown # ── VoiceConfig tests ────────────────────────────────────────────────────── @@ -97,21 +97,72 @@ class TestTranscribe: assert result == "" +class TestStripMarkdown: + def test_strips_bold(self): + assert _strip_markdown("**hello**") == "hello" + + def test_strips_italic(self): + assert _strip_markdown("*hello*") == "hello" + + def test_strips_headers(self): + assert _strip_markdown("## Header\ntext") == "Header\ntext" + + def test_strips_bullets(self): + assert _strip_markdown("- item one\n- item two") == "item one\nitem two" + + def test_strips_numbered_lists(self): + assert _strip_markdown("1. first\n2. second") == "first\nsecond" + + def test_strips_inline_code(self): + assert _strip_markdown("use `pip install`") == "use pip install" + + def test_strips_links(self): + assert _strip_markdown("[click here](https://x.com)") == "click here" + + def test_preserves_plain_text(self): + assert _strip_markdown("Hello, how are you?") == "Hello, how are you?" + + def test_empty_string(self): + assert _strip_markdown("") == "" + + def test_none_passthrough(self): + assert _strip_markdown(None) is None + + def test_complex_markdown(self): + md = "**1. First** thing\n- use `code`\n*emphasis*" + result = _strip_markdown(md) + assert "**" not in result + assert "`" not in result + assert "*" not in result + + class TestThink: - @patch("timmy.voice_loop.asyncio") - def test_think_returns_response(self, mock_asyncio): - mock_asyncio.run.return_value = "I am Timmy." + def test_think_returns_response(self): loop = VoiceLoop() + loop._loop = MagicMock() + loop._loop.is_closed.return_value = False + loop._loop.run_until_complete.return_value = "I am Timmy." result = loop._think("Who are you?") assert result == "I am Timmy." - @patch("timmy.voice_loop.asyncio") - def test_think_handles_error(self, mock_asyncio): - mock_asyncio.run.side_effect = RuntimeError("Ollama down") + def test_think_handles_error(self): loop = VoiceLoop() + loop._loop = MagicMock() + loop._loop.is_closed.return_value = False + loop._loop.run_until_complete.side_effect = RuntimeError("Ollama down") result = loop._think("test") assert "trouble" in result.lower() + def test_think_strips_markdown(self): + loop = VoiceLoop() + loop._loop = MagicMock() + loop._loop.is_closed.return_value = False + loop._loop.run_until_complete.return_value = "**Hello** from *Timmy*" + result = loop._think("test") + assert "**" not in result + assert "*" not in result + assert "Hello" in result + class TestSpeakSay: @patch("subprocess.Popen")