From f32077ae8d7bd91b29f9d9923c3b03bd0d22a5c6 Mon Sep 17 00:00:00 2001
From: Alexander Whitestone <alexpaynex@gmail.com>
Date: Tue, 24 Mar 2026 15:26:01 -0400
Subject: [PATCH] refactor: split voice_loop.py into voice/ subpackage (#1379)

---
 src/dashboard/routes/world/__init__.py |  11 +-
 src/infrastructure/router/cascade.py   |   5 +-
 src/infrastructure/router/health.py    |   4 +-
 src/timmy/voice/__init__.py            |  50 +++
 src/timmy/voice/activation.py          |  38 ++
 src/timmy/voice/audio_io.py            |  19 +
 src/timmy/voice/helpers.py             |  53 +++
 src/timmy/voice/llm.py                 |  68 ++++
 src/timmy/voice/speech_engines.py      |  48 +++
 src/timmy/voice/stt.py                 | 119 +++++++
 src/timmy/voice/tts.py                 |  78 ++++
 src/timmy/voice_loop.py                | 470 ++-----------------------
 12 files changed, 517 insertions(+), 446 deletions(-)
 create mode 100644 src/timmy/voice/__init__.py
 create mode 100644 src/timmy/voice/activation.py
 create mode 100644 src/timmy/voice/audio_io.py
 create mode 100644 src/timmy/voice/helpers.py
 create mode 100644 src/timmy/voice/llm.py
 create mode 100644 src/timmy/voice/speech_engines.py
 create mode 100644 src/timmy/voice/stt.py
 create mode 100644 src/timmy/voice/tts.py

diff --git a/src/dashboard/routes/world/__init__.py b/src/dashboard/routes/world/__init__.py
index 914c8203..807e789b 100644
--- a/src/dashboard/routes/world/__init__.py
+++ b/src/dashboard/routes/world/__init__.py
@@ -50,17 +50,12 @@ for route in _matrix_matrix_router.routes:
 # ---------------------------------------------------------------------------
 
 # Used by src/dashboard/app.py
-from .websocket import broadcast_world_state  # noqa: E402, F401
-
-# Used by src/infrastructure/presence.py
-from .websocket import _ws_clients  # noqa: E402, F401
-
 # Used by tests
 from .bark import (  # noqa: E402, F401
-    BarkRequest,
     _BARK_RATE_LIMIT_SECONDS,
     _GROUND_TTL,
     _MAX_EXCHANGES,
+    BarkRequest,
     _bark_and_broadcast,
     _bark_last_request,
     _conversation,
@@ -116,9 +111,13 @@ from .utils import (  # noqa: E402, F401
     _get_agent_shape,
     _get_client_ip,
 )
+
+# Used by src/infrastructure/presence.py
 from .websocket import (  # noqa: E402, F401
     _authenticate_ws,
     _broadcast,
     _heartbeat,
+    _ws_clients,  # noqa: E402, F401
+    broadcast_world_state,  # noqa: E402, F401
     world_ws,
 )
diff --git a/src/infrastructure/router/cascade.py b/src/infrastructure/router/cascade.py
index c34ce17c..9a9e8431 100644
--- a/src/infrastructure/router/cascade.py
+++ b/src/infrastructure/router/cascade.py
@@ -29,6 +29,8 @@ except ImportError:
     requests = None  # type: ignore
 
 # Re-export data models so existing ``from …cascade import X`` keeps working.
+# Mixins
+from .health import HealthMixin
 from .models import (  # noqa: F401 – re-exports
     CircuitState,
     ContentType,
@@ -38,9 +40,6 @@ from .models import (  # noqa: F401 – re-exports
     ProviderStatus,
     RouterConfig,
 )
-
-# Mixins
-from .health import HealthMixin
 from .providers import ProviderCallsMixin
 
 logger = logging.getLogger(__name__)
diff --git a/src/infrastructure/router/health.py b/src/infrastructure/router/health.py
index 7b77318f..5be6ec0d 100644
--- a/src/infrastructure/router/health.py
+++ b/src/infrastructure/router/health.py
@@ -10,7 +10,7 @@ import logging
 import time
 from datetime import UTC, datetime
 
-from .models import CircuitState, Provider, ProviderMetrics, ProviderStatus
+from .models import CircuitState, Provider, ProviderStatus
 
 logger = logging.getLogger(__name__)
 
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
 try:
     from infrastructure.claude_quota import QuotaMonitor, get_quota_monitor
 
-    _quota_monitor: "QuotaMonitor | None" = get_quota_monitor()
+    _quota_monitor: QuotaMonitor | None = get_quota_monitor()
 except Exception as _exc:  # pragma: no cover
     logger.debug("Quota monitor not available: %s", _exc)
     _quota_monitor = None
diff --git a/src/timmy/voice/__init__.py b/src/timmy/voice/__init__.py
new file mode 100644
index 00000000..8cc847ac
--- /dev/null
+++ b/src/timmy/voice/__init__.py
@@ -0,0 +1,50 @@
+"""Voice subpackage — re-exports for convenience."""
+
+from timmy.voice.activation import (
+    EXIT_COMMANDS,
+    WHISPER_HALLUCINATIONS,
+    is_exit_command,
+    is_hallucination,
+)
+from timmy.voice.audio_io import (
+    DEFAULT_CHANNELS,
+    DEFAULT_MAX_UTTERANCE,
+    DEFAULT_MIN_UTTERANCE,
+    DEFAULT_SAMPLE_RATE,
+    DEFAULT_SILENCE_DURATION,
+    DEFAULT_SILENCE_THRESHOLD,
+    _rms,
+)
+from timmy.voice.helpers import _install_quiet_asyncgen_hooks, _suppress_mcp_noise
+from timmy.voice.llm import LLMMixin
+from timmy.voice.speech_engines import (
+    _VOICE_PREAMBLE,
+    DEFAULT_PIPER_VOICE,
+    DEFAULT_WHISPER_MODEL,
+    _strip_markdown,
+)
+from timmy.voice.stt import STTMixin
+from timmy.voice.tts import TTSMixin
+
+__all__ = [
+    "DEFAULT_CHANNELS",
+    "DEFAULT_MAX_UTTERANCE",
+    "DEFAULT_MIN_UTTERANCE",
+    "DEFAULT_PIPER_VOICE",
+    "DEFAULT_SAMPLE_RATE",
+    "DEFAULT_SILENCE_DURATION",
+    "DEFAULT_SILENCE_THRESHOLD",
+    "DEFAULT_WHISPER_MODEL",
+    "EXIT_COMMANDS",
+    "LLMMixin",
+    "STTMixin",
+    "TTSMixin",
+    "WHISPER_HALLUCINATIONS",
+    "_VOICE_PREAMBLE",
+    "_install_quiet_asyncgen_hooks",
+    "_rms",
+    "_strip_markdown",
+    "_suppress_mcp_noise",
+    "is_exit_command",
+    "is_hallucination",
+]
diff --git a/src/timmy/voice/activation.py b/src/timmy/voice/activation.py
new file mode 100644
index 00000000..0054fc61
--- /dev/null
+++ b/src/timmy/voice/activation.py
@@ -0,0 +1,38 @@
+"""Voice activation detection — hallucination filtering and exit commands."""
+
+from __future__ import annotations
+
+# Whisper hallucinates these on silence/noise — skip them.
+WHISPER_HALLUCINATIONS = frozenset(
+    {
+        "you",
+        "thanks.",
+        "thank you.",
+        "bye.",
+        "",
+        "thanks for watching!",
+        "thank you for watching!",
+    }
+)
+
+# Spoken phrases that end the voice session.
+EXIT_COMMANDS = frozenset(
+    {
+        "goodbye",
+        "exit",
+        "quit",
+        "stop",
+        "goodbye timmy",
+        "stop listening",
+    }
+)
+
+
+def is_hallucination(text: str) -> bool:
+    """Return True if *text* is a known Whisper hallucination."""
+    return not text or text.lower() in WHISPER_HALLUCINATIONS
+
+
+def is_exit_command(text: str) -> bool:
+    """Return True if the user asked to stop the voice session."""
+    return text.lower().strip().rstrip(".!") in EXIT_COMMANDS
diff --git a/src/timmy/voice/audio_io.py b/src/timmy/voice/audio_io.py
new file mode 100644
index 00000000..74035baa
--- /dev/null
+++ b/src/timmy/voice/audio_io.py
@@ -0,0 +1,19 @@
+"""Audio capture and playback utilities for the voice loop."""
+
+from __future__ import annotations
+
+import numpy as np
+
+# ── Defaults ────────────────────────────────────────────────────────────────
+
+DEFAULT_SAMPLE_RATE = 16000  # Whisper expects 16 kHz
+DEFAULT_CHANNELS = 1
+DEFAULT_SILENCE_THRESHOLD = 0.015  # RMS threshold — tune for your mic/room
+DEFAULT_SILENCE_DURATION = 1.5  # seconds of silence to end utterance
+DEFAULT_MIN_UTTERANCE = 0.5  # ignore clicks/bumps shorter than this
+DEFAULT_MAX_UTTERANCE = 30.0  # safety cap — don't record forever
+
+
+def _rms(block: np.ndarray) -> float:
+    """Compute root-mean-square energy of an audio block."""
+    return float(np.sqrt(np.mean(block.astype(np.float32) ** 2)))
diff --git a/src/timmy/voice/helpers.py b/src/timmy/voice/helpers.py
new file mode 100644
index 00000000..ef2fa706
--- /dev/null
+++ b/src/timmy/voice/helpers.py
@@ -0,0 +1,53 @@
+"""Miscellaneous helpers for the voice loop runtime."""
+
+from __future__ import annotations
+
+import logging
+import sys
+
+
+def _suppress_mcp_noise() -> None:
+    """Quiet down noisy MCP/Agno loggers during voice mode.
+
+    Sets specific loggers to WARNING so the terminal stays clean
+    for the voice transcript.
+    """
+    for name in (
+        "mcp",
+        "mcp.server",
+        "mcp.client",
+        "agno",
+        "agno.mcp",
+        "httpx",
+        "httpcore",
+    ):
+        logging.getLogger(name).setLevel(logging.WARNING)
+
+
+def _install_quiet_asyncgen_hooks() -> None:
+    """Silence MCP stdio_client async-generator teardown noise.
+
+    When the voice loop exits, Python GC finalizes Agno's MCP
+    stdio_client async generators. anyio's cancel-scope teardown
+    prints ugly tracebacks to stderr. These are harmless — the
+    MCP subprocesses die with the loop. We intercept them here.
+    """
+    _orig_hook = getattr(sys, "unraisablehook", None)
+
+    def _quiet_hook(args):
+        # Swallow RuntimeError from anyio cancel-scope teardown
+        # and BaseExceptionGroup from MCP stdio_client generators
+        if args.exc_type in (RuntimeError, BaseExceptionGroup):
+            msg = str(args.exc_value) if args.exc_value else ""
+            if "cancel scope" in msg or "unhandled errors" in msg:
+                return
+        # Also swallow GeneratorExit from stdio_client
+        if args.exc_type is GeneratorExit:
+            return
+        # Everything else: forward to original hook
+        if _orig_hook:
+            _orig_hook(args)
+        else:
+            sys.__unraisablehook__(args)
+
+    sys.unraisablehook = _quiet_hook
diff --git a/src/timmy/voice/llm.py b/src/timmy/voice/llm.py
new file mode 100644
index 00000000..aca3d134
--- /dev/null
+++ b/src/timmy/voice/llm.py
@@ -0,0 +1,68 @@
+"""LLM integration mixin — async chat and event-loop management."""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import sys
+import time
+import warnings
+
+from timmy.voice.speech_engines import _VOICE_PREAMBLE, _strip_markdown
+
+logger = logging.getLogger(__name__)
+
+
+class LLMMixin:
+    """Mixin providing LLM chat methods for :class:`VoiceLoop`."""
+
+    def _get_loop(self) -> asyncio.AbstractEventLoop:
+        """Return a persistent event loop, creating one if needed."""
+        if self._loop is None or self._loop.is_closed():
+            self._loop = asyncio.new_event_loop()
+        return self._loop
+
+    def _think(self, user_text: str) -> str:
+        """Send text to Timmy and get a response."""
+        sys.stdout.write("  💭 Thinking...\r")
+        sys.stdout.flush()
+        t0 = time.monotonic()
+        try:
+            loop = self._get_loop()
+            response = loop.run_until_complete(self._chat(user_text))
+        except (ConnectionError, RuntimeError, ValueError) as exc:
+            logger.error("Timmy chat failed: %s", exc)
+            response = "I'm having trouble thinking right now. Could you try again?"
+        elapsed = time.monotonic() - t0
+        logger.info("Timmy responded in %.1fs", elapsed)
+        response = _strip_markdown(response)
+        return response
+
+    async def _chat(self, message: str) -> str:
+        """Async wrapper around Timmy's session.chat()."""
+        from timmy.session import chat
+
+        voiced = f"{_VOICE_PREAMBLE}\n\nUser said: {message}"
+        return await chat(voiced, session_id=self.config.session_id)
+
+    def _cleanup_loop(self) -> None:
+        """Shut down the persistent event loop cleanly."""
+        if self._loop is None or self._loop.is_closed():
+            return
+
+        self._loop.set_exception_handler(lambda loop, ctx: None)
+        try:
+            self._loop.run_until_complete(self._loop.shutdown_asyncgens())
+        except RuntimeError as exc:
+            logger.debug("Shutdown asyncgens failed: %s", exc)
+            pass
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", RuntimeWarning)
+            try:
+                self._loop.close()
+            except RuntimeError as exc:
+                logger.debug("Loop close failed: %s", exc)
+                pass
+
+        self._loop = None
diff --git a/src/timmy/voice/speech_engines.py b/src/timmy/voice/speech_engines.py
new file mode 100644
index 00000000..b525da3a
--- /dev/null
+++ b/src/timmy/voice/speech_engines.py
@@ -0,0 +1,48 @@
+"""Speech engine constants and text-processing utilities."""
+
+from __future__ import annotations
+
+import re
+from pathlib import Path
+
+# ── Defaults ────────────────────────────────────────────────────────────────
+
+DEFAULT_WHISPER_MODEL = "base.en"
+DEFAULT_PIPER_VOICE = Path.home() / ".local/share/piper-voices/en_US-lessac-medium.onnx"
+
+# ── Voice-mode system instruction ───────────────────────────────────────────
+# Prepended to user messages so Timmy responds naturally for TTS.
+_VOICE_PREAMBLE = (
+    "[VOICE MODE] You are speaking aloud through a text-to-speech system. "
+    "Respond in short, natural spoken sentences. No markdown, no bullet points, "
+    "no asterisks, no numbered lists, no headers, no bold/italic formatting. "
+    "Talk like a person in a conversation — concise, warm, direct. "
+    "Keep responses under 3-4 sentences unless the user asks for detail."
+)
+
+
+def _strip_markdown(text: str) -> str:
+    """Remove markdown formatting so TTS reads naturally.
+
+    Strips: **bold**, *italic*, `code`, # headers, - bullets,
+    numbered lists, [links](url), etc.
+    """
+    if not text:
+        return text
+    # Remove bold/italic markers
+    text = re.sub(r"\*{1,3}([^*]+)\*{1,3}", r"\1", text)
+    # Remove inline code
+    text = re.sub(r"`([^`]+)`", r"\1", text)
+    # Remove headers (# Header)
+    text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
+    # Remove bullet points (-, *, +) at start of line
+    text = re.sub(r"^[\s]*[-*+]\s+", "", text, flags=re.MULTILINE)
+    # Remove numbered lists (1. 2. etc)
+    text = re.sub(r"^[\s]*\d+\.\s+", "", text, flags=re.MULTILINE)
+    # Remove link syntax [text](url) → text
+    text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
+    # Remove horizontal rules
+    text = re.sub(r"^[-*_]{3,}\s*$", "", text, flags=re.MULTILINE)
+    # Collapse multiple newlines
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    return text.strip()
diff --git a/src/timmy/voice/stt.py b/src/timmy/voice/stt.py
new file mode 100644
index 00000000..4b3ea4e8
--- /dev/null
+++ b/src/timmy/voice/stt.py
@@ -0,0 +1,119 @@
+"""Speech-to-text mixin — microphone capture and Whisper transcription."""
+
+from __future__ import annotations
+
+import logging
+import sys
+import time
+
+import numpy as np
+
+from timmy.voice.audio_io import DEFAULT_CHANNELS, _rms
+
+logger = logging.getLogger(__name__)
+
+
+class STTMixin:
+    """Mixin providing STT methods for :class:`VoiceLoop`."""
+
+    def _load_whisper(self):
+        """Load Whisper model (lazy, first use only)."""
+        if self._whisper_model is not None:
+            return
+        import whisper
+
+        logger.info("Loading Whisper model: %s", self.config.whisper_model)
+        self._whisper_model = whisper.load_model(self.config.whisper_model)
+        logger.info("Whisper model loaded.")
+
+    def _record_utterance(self) -> np.ndarray | None:
+        """Record from microphone until silence is detected."""
+        import sounddevice as sd
+
+        sr = self.config.sample_rate
+        block_size = int(sr * 0.1)
+        silence_blocks = int(self.config.silence_duration / 0.1)
+        min_blocks = int(self.config.min_utterance / 0.1)
+        max_blocks = int(self.config.max_utterance / 0.1)
+
+        sys.stdout.write("\n  🎤 Listening... (speak now)\n")
+        sys.stdout.flush()
+
+        with sd.InputStream(
+            samplerate=sr,
+            channels=DEFAULT_CHANNELS,
+            dtype="float32",
+            blocksize=block_size,
+        ) as stream:
+            chunks = self._capture_audio_blocks(stream, block_size, silence_blocks, max_blocks)
+
+        return self._finalize_utterance(chunks, min_blocks, sr)
+
+    def _capture_audio_blocks(
+        self,
+        stream,
+        block_size: int,
+        silence_blocks: int,
+        max_blocks: int,
+    ) -> list[np.ndarray]:
+        """Read audio blocks from *stream* until silence or max length."""
+        chunks: list[np.ndarray] = []
+        silent_count = 0
+        recording = False
+
+        while self._running:
+            block, overflowed = stream.read(block_size)
+            if overflowed:
+                logger.debug("Audio buffer overflowed")
+
+            rms = _rms(block)
+
+            if not recording:
+                if rms > self.config.silence_threshold:
+                    recording = True
+                    silent_count = 0
+                    chunks.append(block.copy())
+                    sys.stdout.write("  📢 Recording...\r")
+                    sys.stdout.flush()
+            else:
+                chunks.append(block.copy())
+                if rms < self.config.silence_threshold:
+                    silent_count += 1
+                else:
+                    silent_count = 0
+                if silent_count >= silence_blocks:
+                    break
+                if len(chunks) >= max_blocks:
+                    logger.info("Max utterance length reached, stopping.")
+                    break
+
+        return chunks
+
+    @staticmethod
+    def _finalize_utterance(
+        chunks: list[np.ndarray], min_blocks: int, sample_rate: int
+    ) -> np.ndarray | None:
+        """Concatenate recorded chunks and report duration."""
+        if not chunks or len(chunks) < min_blocks:
+            return None
+
+        audio = np.concatenate(chunks, axis=0).flatten()
+        duration = len(audio) / sample_rate
+        sys.stdout.write(f"  ✂️  Captured {duration:.1f}s of audio\n")
+        sys.stdout.flush()
+        return audio
+
+    def _transcribe(self, audio: np.ndarray) -> str:
+        """Transcribe audio using local Whisper model."""
+        self._load_whisper()
+
+        sys.stdout.write("  🧠 Transcribing...\r")
+        sys.stdout.flush()
+
+        t0 = time.monotonic()
+        result = self._whisper_model.transcribe(audio, language="en", fp16=False)
+        elapsed = time.monotonic() - t0
+
+        text = result["text"].strip()
+        logger.info("Whisper transcribed in %.1fs: '%s'", elapsed, text[:80])
+        return text
diff --git a/src/timmy/voice/tts.py b/src/timmy/voice/tts.py
new file mode 100644
index 00000000..1bc3f95a
--- /dev/null
+++ b/src/timmy/voice/tts.py
@@ -0,0 +1,78 @@
+"""Text-to-speech mixin — Piper TTS and macOS ``say`` fallback."""
+
+from __future__ import annotations
+
+import logging
+import subprocess
+import tempfile
+import time
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+class TTSMixin:
+    """Mixin providing TTS methods for :class:`VoiceLoop`."""
+
+    def _speak(self, text: str) -> None:
+        """Speak text aloud using Piper TTS or macOS `say`."""
+        if not text:
+            return
+        self._speaking = True
+        try:
+            if self.config.use_say_fallback:
+                self._speak_say(text)
+            else:
+                self._speak_piper(text)
+        finally:
+            self._speaking = False
+
+    def _speak_piper(self, text: str) -> None:
+        """Speak using Piper TTS (local ONNX inference)."""
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+            tmp_path = tmp.name
+        try:
+            cmd = ["piper", "--model", str(self.config.piper_voice), "--output_file", tmp_path]
+            proc = subprocess.run(cmd, input=text, capture_output=True, text=True, timeout=30)
+            if proc.returncode != 0:
+                logger.error("Piper failed: %s", proc.stderr)
+                self._speak_say(text)
+                return
+            self._play_audio(tmp_path)
+        finally:
+            Path(tmp_path).unlink(missing_ok=True)
+
+    def _speak_say(self, text: str) -> None:
+        """Speak using macOS `say` command."""
+        try:
+            proc = subprocess.Popen(
+                ["say", "-r", "180", text],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+            proc.wait(timeout=60)
+        except subprocess.TimeoutExpired:
+            proc.kill()
+        except FileNotFoundError:
+            logger.error("macOS `say` command not found")
+
+    def _play_audio(self, path: str) -> None:
+        """Play a WAV file. Can be interrupted by setting self._interrupted."""
+        try:
+            proc = subprocess.Popen(
+                ["afplay", path],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+            while proc.poll() is None:
+                if self._interrupted:
+                    proc.terminate()
+                    self._interrupted = False
+                    logger.info("TTS interrupted by user")
+                    return
+                time.sleep(0.05)
+        except FileNotFoundError:
+            try:
+                subprocess.run(["aplay", path], capture_output=True, timeout=60)
+            except (FileNotFoundError, subprocess.TimeoutExpired):
+                logger.error("No audio player found (tried afplay, aplay)")
diff --git a/src/timmy/voice_loop.py b/src/timmy/voice_loop.py
index e2fcfb25..d57f6a8c 100644
--- a/src/timmy/voice_loop.py
+++ b/src/timmy/voice_loop.py
@@ -13,76 +13,41 @@ Usage:
 Requires: sounddevice, numpy, whisper, piper-tts
 """
 
+from __future__ import annotations
+
 import asyncio
 import logging
-import re
-import subprocess
 import sys
-import tempfile
-import time
 from dataclasses import dataclass
 from pathlib import Path
 
-import numpy as np
+from timmy.voice.activation import (
+    EXIT_COMMANDS,
+    WHISPER_HALLUCINATIONS,
+    is_exit_command,
+    is_hallucination,
+)
+from timmy.voice.audio_io import (
+    DEFAULT_MAX_UTTERANCE,
+    DEFAULT_MIN_UTTERANCE,
+    DEFAULT_SAMPLE_RATE,
+    DEFAULT_SILENCE_DURATION,
+    DEFAULT_SILENCE_THRESHOLD,
+)
+from timmy.voice.helpers import _install_quiet_asyncgen_hooks, _suppress_mcp_noise
+from timmy.voice.llm import LLMMixin
+from timmy.voice.speech_engines import (
+    DEFAULT_PIPER_VOICE,
+    DEFAULT_WHISPER_MODEL,
+)
+from timmy.voice.stt import STTMixin
+from timmy.voice.tts import TTSMixin
 
 logger = logging.getLogger(__name__)
 
-# ── Voice-mode system instruction ───────────────────────────────────────────
-# Prepended to user messages so Timmy responds naturally for TTS.
-_VOICE_PREAMBLE = (
-    "[VOICE MODE] You are speaking aloud through a text-to-speech system. "
-    "Respond in short, natural spoken sentences. No markdown, no bullet points, "
-    "no asterisks, no numbered lists, no headers, no bold/italic formatting. "
-    "Talk like a person in a conversation — concise, warm, direct. "
-    "Keep responses under 3-4 sentences unless the user asks for detail."
-)
-
-
-def _strip_markdown(text: str) -> str:
-    """Remove markdown formatting so TTS reads naturally.
-
-    Strips: **bold**, *italic*, `code`, # headers, - bullets,
-    numbered lists, [links](url), etc.
-    """
-    if not text:
-        return text
-    # Remove bold/italic markers
-    text = re.sub(r"\*{1,3}([^*]+)\*{1,3}", r"\1", text)
-    # Remove inline code
-    text = re.sub(r"`([^`]+)`", r"\1", text)
-    # Remove headers (# Header)
-    text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)
-    # Remove bullet points (-, *, +) at start of line
-    text = re.sub(r"^[\s]*[-*+]\s+", "", text, flags=re.MULTILINE)
-    # Remove numbered lists (1. 2. etc)
-    text = re.sub(r"^[\s]*\d+\.\s+", "", text, flags=re.MULTILINE)
-    # Remove link syntax [text](url) → text
-    text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
-    # Remove horizontal rules
-    text = re.sub(r"^[-*_]{3,}\s*$", "", text, flags=re.MULTILINE)
-    # Collapse multiple newlines
-    text = re.sub(r"\n{3,}", "\n\n", text)
-    return text.strip()
-
-
-# ── Defaults ────────────────────────────────────────────────────────────────
-
-DEFAULT_WHISPER_MODEL = "base.en"
-DEFAULT_PIPER_VOICE = Path.home() / ".local/share/piper-voices/en_US-lessac-medium.onnx"
-DEFAULT_SAMPLE_RATE = 16000  # Whisper expects 16 kHz
-DEFAULT_CHANNELS = 1
-DEFAULT_SILENCE_THRESHOLD = 0.015  # RMS threshold — tune for your mic/room
-DEFAULT_SILENCE_DURATION = 1.5  # seconds of silence to end utterance
-DEFAULT_MIN_UTTERANCE = 0.5  # ignore clicks/bumps shorter than this
-DEFAULT_MAX_UTTERANCE = 30.0  # safety cap — don't record forever
 DEFAULT_SESSION_ID = "voice"
 
 
-def _rms(block: np.ndarray) -> float:
-    """Compute root-mean-square energy of an audio block."""
-    return float(np.sqrt(np.mean(block.astype(np.float32) ** 2)))
-
-
 @dataclass
 class VoiceConfig:
     """Configuration for the voice loop."""
@@ -104,7 +69,7 @@ class VoiceConfig:
     model_size: str | None = None
 
 
-class VoiceLoop:
+class VoiceLoop(STTMixin, TTSMixin, LLMMixin):
     """Sovereign listen-think-speak loop.
 
     Everything runs locally:
@@ -113,312 +78,35 @@ class VoiceLoop:
     - TTS: Piper (local ONNX model) or macOS `say`
     """
 
+    # Class-level constants delegate to the activation module.
+    _WHISPER_HALLUCINATIONS = WHISPER_HALLUCINATIONS
+    _EXIT_COMMANDS = EXIT_COMMANDS
+
     def __init__(self, config: VoiceConfig | None = None) -> None:
         self.config = config or VoiceConfig()
         self._whisper_model = None
         self._running = False
-        self._speaking = False  # True while TTS is playing
-        self._interrupted = False  # set when user talks over TTS
-        # Persistent event loop — reused across all chat calls so Agno's
-        # MCP sessions don't die when the loop closes.
+        self._speaking = False
+        self._interrupted = False
         self._loop: asyncio.AbstractEventLoop | None = None
 
     # ── Lazy initialization ─────────────────────────────────────────────
 
-    def _load_whisper(self):
-        """Load Whisper model (lazy, first use only)."""
-        if self._whisper_model is not None:
-            return
-        import whisper
-
-        logger.info("Loading Whisper model: %s", self.config.whisper_model)
-        self._whisper_model = whisper.load_model(self.config.whisper_model)
-        logger.info("Whisper model loaded.")
-
     def _ensure_piper(self) -> bool:
         """Check that Piper voice model exists."""
         if self.config.use_say_fallback:
             return True
         voice_path = self.config.piper_voice
         if not voice_path.exists():
-            logger.warning("Piper voice not found at %s — falling back to `say`", voice_path)
+            logger.warning(
+                "Piper voice not found at %s — falling back to `say`", voice_path
+            )
             self.config.use_say_fallback = True
             return True
         return True
 
-    # ── STT: Microphone → Text ──────────────────────────────────────────
-
-    def _record_utterance(self) -> np.ndarray | None:
-        """Record from microphone until silence is detected.
-
-        Uses energy-based Voice Activity Detection:
-        1. Wait for speech (RMS above threshold)
-        2. Record until silence (RMS below threshold for silence_duration)
-        3. Return the audio as a numpy array
-
-        Returns None if interrupted or no speech detected.
-        """
-        import sounddevice as sd
-
-        sr = self.config.sample_rate
-        block_size = int(sr * 0.1)  # 100ms blocks
-        silence_blocks = int(self.config.silence_duration / 0.1)
-        min_blocks = int(self.config.min_utterance / 0.1)
-        max_blocks = int(self.config.max_utterance / 0.1)
-
-        sys.stdout.write("\n  🎤 Listening... (speak now)\n")
-        sys.stdout.flush()
-
-        with sd.InputStream(
-            samplerate=sr,
-            channels=DEFAULT_CHANNELS,
-            dtype="float32",
-            blocksize=block_size,
-        ) as stream:
-            chunks = self._capture_audio_blocks(stream, block_size, silence_blocks, max_blocks)
-
-        return self._finalize_utterance(chunks, min_blocks, sr)
-
-    def _capture_audio_blocks(
-        self,
-        stream,
-        block_size: int,
-        silence_blocks: int,
-        max_blocks: int,
-    ) -> list[np.ndarray]:
-        """Read audio blocks from *stream* until silence or max length.
-
-        Returns the list of captured audio chunks (may be empty).
-        """
-        chunks: list[np.ndarray] = []
-        silent_count = 0
-        recording = False
-
-        while self._running:
-            block, overflowed = stream.read(block_size)
-            if overflowed:
-                logger.debug("Audio buffer overflowed")
-
-            rms = _rms(block)
-
-            if not recording:
-                if rms > self.config.silence_threshold:
-                    recording = True
-                    silent_count = 0
-                    chunks.append(block.copy())
-                    sys.stdout.write("  📢 Recording...\r")
-                    sys.stdout.flush()
-            else:
-                chunks.append(block.copy())
-
-                if rms < self.config.silence_threshold:
-                    silent_count += 1
-                else:
-                    silent_count = 0
-
-                if silent_count >= silence_blocks:
-                    break
-
-                if len(chunks) >= max_blocks:
-                    logger.info("Max utterance length reached, stopping.")
-                    break
-
-        return chunks
-
-    @staticmethod
-    def _finalize_utterance(
-        chunks: list[np.ndarray], min_blocks: int, sample_rate: int
-    ) -> np.ndarray | None:
-        """Concatenate recorded chunks and report duration.
-
-        Returns ``None`` if the utterance is too short to be meaningful.
-        """
-        if not chunks or len(chunks) < min_blocks:
-            return None
-
-        audio = np.concatenate(chunks, axis=0).flatten()
-        duration = len(audio) / sample_rate
-        sys.stdout.write(f"  ✂️  Captured {duration:.1f}s of audio\n")
-        sys.stdout.flush()
-        return audio
-
-    def _transcribe(self, audio: np.ndarray) -> str:
-        """Transcribe audio using local Whisper model."""
-        self._load_whisper()
-
-        sys.stdout.write("  🧠 Transcribing...\r")
-        sys.stdout.flush()
-
-        t0 = time.monotonic()
-        result = self._whisper_model.transcribe(
-            audio,
-            language="en",
-            fp16=False,  # MPS/CPU — fp16 can cause issues on some setups
-        )
-        elapsed = time.monotonic() - t0
-
-        text = result["text"].strip()
-        logger.info("Whisper transcribed in %.1fs: '%s'", elapsed, text[:80])
-        return text
-
-    # ── TTS: Text → Speaker ─────────────────────────────────────────────
-
-    def _speak(self, text: str) -> None:
-        """Speak text aloud using Piper TTS or macOS `say`."""
-        if not text:
-            return
-
-        self._speaking = True
-        try:
-            if self.config.use_say_fallback:
-                self._speak_say(text)
-            else:
-                self._speak_piper(text)
-        finally:
-            self._speaking = False
-
-    def _speak_piper(self, text: str) -> None:
-        """Speak using Piper TTS (local ONNX inference)."""
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
-            tmp_path = tmp.name
-
-        try:
-            # Generate WAV with Piper
-            cmd = [
-                "piper",
-                "--model",
-                str(self.config.piper_voice),
-                "--output_file",
-                tmp_path,
-            ]
-
-            proc = subprocess.run(
-                cmd,
-                input=text,
-                capture_output=True,
-                text=True,
-                timeout=30,
-            )
-
-            if proc.returncode != 0:
-                logger.error("Piper failed: %s", proc.stderr)
-                self._speak_say(text)  # fallback
-                return
-
-            # Play with afplay (macOS) — interruptible
-            self._play_audio(tmp_path)
-
-        finally:
-            Path(tmp_path).unlink(missing_ok=True)
-
-    def _speak_say(self, text: str) -> None:
-        """Speak using macOS `say` command."""
-        try:
-            proc = subprocess.Popen(
-                ["say", "-r", "180", text],
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
-            )
-            proc.wait(timeout=60)
-        except subprocess.TimeoutExpired:
-            proc.kill()
-        except FileNotFoundError:
-            logger.error("macOS `say` command not found")
-
-    def _play_audio(self, path: str) -> None:
-        """Play a WAV file. Can be interrupted by setting self._interrupted."""
-        try:
-            proc = subprocess.Popen(
-                ["afplay", path],
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
-            )
-            # Poll so we can interrupt
-            while proc.poll() is None:
-                if self._interrupted:
-                    proc.terminate()
-                    self._interrupted = False
-                    logger.info("TTS interrupted by user")
-                    return
-                time.sleep(0.05)
-        except FileNotFoundError:
-            # Not macOS — try aplay (Linux)
-            try:
-                subprocess.run(["aplay", path], capture_output=True, timeout=60)
-            except (FileNotFoundError, subprocess.TimeoutExpired):
-                logger.error("No audio player found (tried afplay, aplay)")
-
-    # ── LLM: Text → Response ───────────────────────────────────────────
-
-    def _get_loop(self) -> asyncio.AbstractEventLoop:
-        """Return a persistent event loop, creating one if needed.
-
-        A single loop is reused for the entire voice session so Agno's
-        MCP tool-server connections survive across turns.
-        """
-        if self._loop is None or self._loop.is_closed():
-            self._loop = asyncio.new_event_loop()
-        return self._loop
-
-    def _think(self, user_text: str) -> str:
-        """Send text to Timmy and get a response."""
-        sys.stdout.write("  💭 Thinking...\r")
-        sys.stdout.flush()
-
-        t0 = time.monotonic()
-
-        try:
-            loop = self._get_loop()
-            response = loop.run_until_complete(self._chat(user_text))
-        except (ConnectionError, RuntimeError, ValueError) as exc:
-            logger.error("Timmy chat failed: %s", exc)
-            response = "I'm having trouble thinking right now. Could you try again?"
-
-        elapsed = time.monotonic() - t0
-        logger.info("Timmy responded in %.1fs", elapsed)
-
-        # Strip markdown so TTS doesn't read asterisks, bullets, etc.
-        response = _strip_markdown(response)
-        return response
-
-    async def _chat(self, message: str) -> str:
-        """Async wrapper around Timmy's session.chat().
-
-        Prepends the voice-mode instruction so Timmy responds in
-        natural spoken language rather than markdown.
-        """
-        from timmy.session import chat
-
-        voiced = f"{_VOICE_PREAMBLE}\n\nUser said: {message}"
-        return await chat(voiced, session_id=self.config.session_id)
-
     # ── Main Loop ───────────────────────────────────────────────────────
 
-    # Whisper hallucinates these on silence/noise — skip them.
-    _WHISPER_HALLUCINATIONS = frozenset(
-        {
-            "you",
-            "thanks.",
-            "thank you.",
-            "bye.",
-            "",
-            "thanks for watching!",
-            "thank you for watching!",
-        }
-    )
-
-    # Spoken phrases that end the voice session.
-    _EXIT_COMMANDS = frozenset(
-        {
-            "goodbye",
-            "exit",
-            "quit",
-            "stop",
-            "goodbye timmy",
-            "stop listening",
-        }
-    )
-
     def _log_banner(self) -> None:
         """Log the startup banner with STT/TTS/LLM configuration."""
         tts_label = (
@@ -438,21 +126,19 @@ class VoiceLoop:
 
     def _is_hallucination(self, text: str) -> bool:
         """Return True if *text* is a known Whisper hallucination."""
-        return not text or text.lower() in self._WHISPER_HALLUCINATIONS
+        return is_hallucination(text)
 
     def _is_exit_command(self, text: str) -> bool:
         """Return True if the user asked to stop the voice session."""
-        return text.lower().strip().rstrip(".!") in self._EXIT_COMMANDS
+        return is_exit_command(text)
 
     def _process_turn(self, text: str) -> None:
         """Handle a single listen-think-speak turn after transcription."""
         sys.stdout.write(f"\n  👤 You: {text}\n")
         sys.stdout.flush()
-
         response = self._think(text)
         sys.stdout.write(f"  🤖 Timmy: {response}\n")
         sys.stdout.flush()
-
         self._speak(response)
 
     def run(self) -> None:
@@ -461,112 +147,26 @@ class VoiceLoop:
         _suppress_mcp_noise()
         _install_quiet_asyncgen_hooks()
         self._log_banner()
-
         self._running = True
-
         try:
             while self._running:
                 audio = self._record_utterance()
                 if audio is None:
                     continue
-
                 text = self._transcribe(audio)
                 if self._is_hallucination(text):
                     logger.debug("Ignoring likely Whisper hallucination: '%s'", text)
                     continue
-
                 if self._is_exit_command(text):
                     logger.info("👋 Goodbye!")
                     break
-
                 self._process_turn(text)
-
         except KeyboardInterrupt:
             logger.info("👋 Voice loop stopped.")
         finally:
             self._running = False
             self._cleanup_loop()
 
-    def _cleanup_loop(self) -> None:
-        """Shut down the persistent event loop cleanly.
-
-        Agno's MCP stdio sessions leave async generators (stdio_client)
-        that complain loudly when torn down from a different task.
-        We swallow those errors — they're harmless, the subprocesses
-        die with the loop anyway.
-        """
-        if self._loop is None or self._loop.is_closed():
-            return
-
-        # Silence "error during closing of asynchronous generator" warnings
-        # from MCP's anyio/asyncio cancel-scope teardown.
-        import warnings
-
-        self._loop.set_exception_handler(lambda loop, ctx: None)
-
-        try:
-            self._loop.run_until_complete(self._loop.shutdown_asyncgens())
-        except RuntimeError as exc:
-            logger.debug("Shutdown asyncgens failed: %s", exc)
-            pass
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", RuntimeWarning)
-            try:
-                self._loop.close()
-            except RuntimeError as exc:
-                logger.debug("Loop close failed: %s", exc)
-                pass
-
-        self._loop = None
-
     def stop(self) -> None:
         """Stop the voice loop (from another thread)."""
         self._running = False
-
-
-def _suppress_mcp_noise() -> None:
-    """Quiet down noisy MCP/Agno loggers during voice mode.
-
-    Sets specific loggers to WARNING so the terminal stays clean
-    for the voice transcript.
-    """
-    for name in (
-        "mcp",
-        "mcp.server",
-        "mcp.client",
-        "agno",
-        "agno.mcp",
-        "httpx",
-        "httpcore",
-    ):
-        logging.getLogger(name).setLevel(logging.WARNING)
-
-
-def _install_quiet_asyncgen_hooks() -> None:
-    """Silence MCP stdio_client async-generator teardown noise.
-
-    When the voice loop exits, Python GC finalizes Agno's MCP
-    stdio_client async generators. anyio's cancel-scope teardown
-    prints ugly tracebacks to stderr. These are harmless — the
-    MCP subprocesses die with the loop. We intercept them here.
-    """
-    _orig_hook = getattr(sys, "unraisablehook", None)
-
-    def _quiet_hook(args):
-        # Swallow RuntimeError from anyio cancel-scope teardown
-        # and BaseExceptionGroup from MCP stdio_client generators
-        if args.exc_type in (RuntimeError, BaseExceptionGroup):
-            msg = str(args.exc_value) if args.exc_value else ""
-            if "cancel scope" in msg or "unhandled errors" in msg:
-                return
-        # Also swallow GeneratorExit from stdio_client
-        if args.exc_type is GeneratorExit:
-            return
-        # Everything else: forward to original hook
-        if _orig_hook:
-            _orig_hook(args)
-        else:
-            sys.__unraisablehook__(args)
-
-    sys.unraisablehook = _quiet_hook