Timmy-time-dashboard/src/integrations/mumble/bridge.py

"""Mumble voice bridge — bidirectional audio between Alexander and Timmy.

Connects Timmy to a Mumble server so voice conversations can happen during
co-play and be piped to the stream.  Timmy's TTS output is sent to the
Mumble channel; Alexander's microphone is captured on stream via Mumble.

Audio pipeline
--------------
  Timmy TTS → PCM 16-bit 48 kHz mono → Mumble channel → stream mix
  Mumble channel (Alexander's mic) → PCM callback → optional STT

Audio mode
----------
  "vad"  — voice activity detection: transmit when RMS > threshold
  "ptt"  — push-to-talk: transmit only while ``push_to_talk()`` context active

Optional dependency — install with:
    pip install ".[mumble]"

Degrades gracefully when ``pymumble`` is not installed or the server is
unreachable; all public methods become safe no-ops.
"""

from __future__ import annotations

import io
import logging
import struct
import threading
import time
from collections.abc import Callable
from contextlib import contextmanager
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    pass

logger = logging.getLogger(__name__)

# Mumble audio constants
_SAMPLE_RATE = 48000  # Hz — Mumble native sample rate
_CHANNELS = 1  # Mono
_SAMPLE_WIDTH = 2  # 16-bit PCM → 2 bytes per sample
_FRAME_MS = 10  # milliseconds per Mumble frame
_FRAME_SAMPLES = _SAMPLE_RATE * _FRAME_MS // 1000  # 480 samples per frame
_FRAME_BYTES = _FRAME_SAMPLES * _SAMPLE_WIDTH  # 960 bytes per frame


class MumbleBridge:
    """Manages a Mumble client connection for Timmy's voice bridge.

    Usage::

        bridge = MumbleBridge()
        await bridge.start()          # connect + join channel
        await bridge.speak("Hello!")  # TTS → Mumble audio
        await bridge.stop()           # disconnect

    Audio received from other users triggers ``on_audio`` callbacks
    registered via ``add_audio_callback()``.
    """

    def __init__(self) -> None:
        self._client = None
        self._connected: bool = False
        self._running: bool = False
        self._ptt_active: bool = False
        self._lock = threading.Lock()
        self._audio_callbacks: list[Callable[[str, bytes], None]] = []
        self._send_thread: threading.Thread | None = None
        self._audio_queue: list[bytes] = []
        self._queue_lock = threading.Lock()

    # ── Properties ────────────────────────────────────────────────────────────

    @property
    def connected(self) -> bool:
        """True when the Mumble client is connected and authenticated."""
        return self._connected

    @property
    def running(self) -> bool:
        """True when the bridge loop is active."""
        return self._running

    # ── Lifecycle ─────────────────────────────────────────────────────────────

    def start(self) -> bool:
        """Connect to Mumble and join the configured channel.

        Returns True on success, False if the bridge is disabled or
        ``pymumble`` is not installed.
        """
        try:
            from config import settings
        except Exception as exc:
            logger.warning("MumbleBridge: config unavailable — %s", exc)
            return False

        if not settings.mumble_enabled:
            logger.info("MumbleBridge: disabled (MUMBLE_ENABLED=false)")
            return False

        if self._connected:
            return True

        try:
            import pymumble_py3 as pymumble
        except ImportError:
            logger.warning(
                'MumbleBridge: pymumble-py3 not installed — run: pip install ".[mumble]"'
            )
            return False

        try:
            self._client = pymumble.Mumble(
                host=settings.mumble_host,
                user=settings.mumble_user,
                port=settings.mumble_port,
                password=settings.mumble_password,
                reconnect=True,
                stereo=False,
            )
            self._client.set_receive_sound(True)
            self._client.callbacks.set_callback(
                pymumble.constants.PYMUMBLE_CLBK_SOUNDRECEIVED,
                self._on_sound_received,
            )
            self._client.start()
            self._client.is_ready()  # blocks until connected + synced

            self._join_channel(settings.mumble_channel)

            self._running = True
            self._connected = True

            # Start the audio sender thread
            self._send_thread = threading.Thread(
                target=self._audio_sender_loop, daemon=True, name="mumble-sender"
            )
            self._send_thread.start()

            logger.info(
                "MumbleBridge: connected to %s:%d as %s, channel=%s",
                settings.mumble_host,
                settings.mumble_port,
                settings.mumble_user,
                settings.mumble_channel,
            )
            return True

        except Exception as exc:
            logger.warning("MumbleBridge: connection failed — %s", exc)
            self._connected = False
            self._running = False
            self._client = None
            return False

    def stop(self) -> None:
        """Disconnect from Mumble and clean up."""
        self._running = False
        self._connected = False

        if self._client is not None:
            try:
                self._client.stop()
            except Exception as exc:
                logger.debug("MumbleBridge: stop error — %s", exc)
            finally:
                self._client = None

        logger.info("MumbleBridge: disconnected")

    # ── Audio send ────────────────────────────────────────────────────────────

    def send_audio(self, pcm_bytes: bytes) -> None:
        """Enqueue raw PCM audio (16-bit, 48 kHz, mono) for transmission.

        The bytes are sliced into 10 ms frames and sent by the background
        sender thread.  Safe to call from any thread.
        """
        if not self._connected or self._client is None:
            return

        with self._queue_lock:
            self._audio_queue.append(pcm_bytes)

    def speak(self, text: str) -> None:
        """Convert *text* to speech and send the audio to the Mumble channel.

        Tries Piper TTS first (high quality), falls back to pyttsx3, and
        degrades silently if neither is available.
        """
        if not self._connected:
            logger.debug("MumbleBridge.speak: not connected, skipping")
            return

        pcm = self._tts_to_pcm(text)
        if pcm:
            self.send_audio(pcm)

    # ── Push-to-talk ──────────────────────────────────────────────────────────

    @contextmanager
    def push_to_talk(self):
        """Context manager that activates PTT for the duration of the block.

        Example::

            with bridge.push_to_talk():
                bridge.send_audio(pcm_data)
        """
        self._ptt_active = True
        try:
            yield
        finally:
            self._ptt_active = False

    # ── Audio receive callbacks ───────────────────────────────────────────────

    def add_audio_callback(self, callback: Callable[[str, bytes], None]) -> None:
        """Register a callback for incoming audio from other Mumble users.

        The callback receives ``(username: str, pcm_bytes: bytes)`` where
        ``pcm_bytes`` is 16-bit, 48 kHz, mono PCM audio.
        """
        self._audio_callbacks.append(callback)

    def remove_audio_callback(self, callback: Callable[[str, bytes], None]) -> None:
        """Unregister a previously added audio callback."""
        try:
            self._audio_callbacks.remove(callback)
        except ValueError:
            pass

    # ── Internal helpers ──────────────────────────────────────────────────────

    def _join_channel(self, channel_name: str) -> None:
        """Move to the named channel, creating it if it doesn't exist."""
        if self._client is None:
            return
        try:
            channels = self._client.channels
            channel = channels.find_by_name(channel_name)
            self._client.my_channel().move_in(channel)
            logger.debug("MumbleBridge: joined channel '%s'", channel_name)
        except Exception as exc:
            logger.warning("MumbleBridge: could not join channel '%s' — %s", channel_name, exc)

    def _on_sound_received(self, user, soundchunk) -> None:
        """Called by pymumble when audio arrives from another user."""
        try:
            username = user.get("name", "unknown")
            pcm = soundchunk.pcm
            if pcm and self._audio_callbacks:
                for cb in self._audio_callbacks:
                    try:
                        cb(username, pcm)
                    except Exception as exc:
                        logger.debug("MumbleBridge: audio callback error — %s", exc)
        except Exception as exc:
            logger.debug("MumbleBridge: _on_sound_received error — %s", exc)

    def _audio_sender_loop(self) -> None:
        """Background thread: drain the audio queue and send frames."""
        while self._running:
            chunks: list[bytes] = []
            with self._queue_lock:
                if self._audio_queue:
                    chunks = list(self._audio_queue)
                    self._audio_queue.clear()

            if chunks and self._client is not None:
                buf = b"".join(chunks)
                self._send_pcm_buffer(buf)
            else:
                time.sleep(0.005)

    def _send_pcm_buffer(self, pcm: bytes) -> None:
        """Slice a PCM buffer into 10 ms frames and send each one."""
        if self._client is None:
            return

        try:
            from config import settings

            mode = settings.mumble_audio_mode
            threshold = settings.mumble_vad_threshold
        except Exception:
            mode = "vad"
            threshold = 0.02

        offset = 0
        while offset < len(pcm):
            frame = pcm[offset : offset + _FRAME_BYTES]
            if len(frame) < _FRAME_BYTES:
                # Pad the last frame with silence
                frame = frame + b"\x00" * (_FRAME_BYTES - len(frame))
            offset += _FRAME_BYTES

            if mode == "vad":
                rms = _rms(frame)
                if rms < threshold:
                    continue  # silence — don't transmit

            if mode == "ptt" and not self._ptt_active:
                continue

            try:
                self._client.sound_output.add_sound(frame)
            except Exception as exc:
                logger.debug("MumbleBridge: send frame error — %s", exc)
                break

    def _tts_to_pcm(self, text: str) -> bytes | None:
        """Convert text to 16-bit 48 kHz mono PCM via Piper or pyttsx3."""
        # Try Piper TTS first (higher quality)
        pcm = self._piper_tts(text)
        if pcm:
            return pcm

        # Fall back to pyttsx3 via an in-memory WAV buffer
        pcm = self._pyttsx3_tts(text)
        if pcm:
            return pcm

        logger.debug("MumbleBridge._tts_to_pcm: no TTS engine available")
        return None

    def _piper_tts(self, text: str) -> bytes | None:
        """Synthesize speech via Piper TTS, returning 16-bit 48 kHz mono PCM."""
        try:
            import wave

            from piper.voice import PiperVoice

            try:
                from config import settings

                voice_path = getattr(settings, "piper_voice_path", None) or str(
                    __import__("pathlib").Path.home()
                    / ".local/share/piper-voices/en_US-lessac-medium.onnx"
                )
            except Exception:
                voice_path = str(
                    __import__("pathlib").Path.home()
                    / ".local/share/piper-voices/en_US-lessac-medium.onnx"
                )

            voice = PiperVoice.load(voice_path)
            buf = io.BytesIO()
            with wave.open(buf, "wb") as wf:
                wf.setnchannels(_CHANNELS)
                wf.setsampwidth(_SAMPLE_WIDTH)
                wf.setframerate(voice.config.sample_rate)
                voice.synthesize(text, wf)

            buf.seek(0)
            with wave.open(buf, "rb") as wf:
                raw = wf.readframes(wf.getnframes())
                src_rate = wf.getframerate()

            return _resample_pcm(raw, src_rate, _SAMPLE_RATE)

        except ImportError:
            return None
        except Exception as exc:
            logger.debug("MumbleBridge._piper_tts: %s", exc)
            return None

    def _pyttsx3_tts(self, text: str) -> bytes | None:
        """Synthesize speech via pyttsx3, returning 16-bit 48 kHz mono PCM.

        pyttsx3 doesn't support in-memory output directly, so we write to a
        temporary WAV file, read it back, and resample if necessary.
        """
        try:
            import os
            import tempfile
            import wave

            import pyttsx3

            engine = pyttsx3.init()
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
                tmp_path = tmp.name

            engine.save_to_file(text, tmp_path)
            engine.runAndWait()

            with wave.open(tmp_path, "rb") as wf:
                raw = wf.readframes(wf.getnframes())
                src_rate = wf.getframerate()
                src_channels = wf.getnchannels()

            os.unlink(tmp_path)

            # Convert stereo → mono if needed
            if src_channels == 2:
                raw = _stereo_to_mono(raw, _SAMPLE_WIDTH)

            return _resample_pcm(raw, src_rate, _SAMPLE_RATE)

        except ImportError:
            return None
        except Exception as exc:
            logger.debug("MumbleBridge._pyttsx3_tts: %s", exc)
            return None


# ── Helpers ───────────────────────────────────────────────────────────────────


def _rms(pcm: bytes) -> float:
    """Compute the root mean square (RMS) energy of a 16-bit PCM buffer."""
    if not pcm:
        return 0.0
    n = len(pcm) // _SAMPLE_WIDTH
    if n == 0:
        return 0.0
    samples = struct.unpack(f"<{n}h", pcm[: n * _SAMPLE_WIDTH])
    mean_sq = sum(s * s for s in samples) / n
    return (mean_sq**0.5) / 32768.0


def _stereo_to_mono(pcm: bytes, sample_width: int = 2) -> bytes:
    """Convert interleaved stereo 16-bit PCM to mono by averaging channels."""
    n = len(pcm) // (sample_width * 2)
    if n == 0:
        return pcm
    samples = struct.unpack(f"<{n * 2}h", pcm[: n * 2 * sample_width])
    mono = [(samples[i * 2] + samples[i * 2 + 1]) // 2 for i in range(n)]
    return struct.pack(f"<{n}h", *mono)


def _resample_pcm(pcm: bytes, src_rate: int, dst_rate: int, sample_width: int = 2) -> bytes:
    """Resample 16-bit mono PCM from *src_rate* to *dst_rate* Hz.

    Uses linear interpolation — adequate quality for voice.
    """
    if src_rate == dst_rate:
        return pcm
    n_src = len(pcm) // sample_width
    if n_src == 0:
        return pcm
    src = struct.unpack(f"<{n_src}h", pcm[: n_src * sample_width])
    ratio = src_rate / dst_rate
    n_dst = int(n_src / ratio)
    dst: list[int] = []
    for i in range(n_dst):
        pos = i * ratio
        lo = int(pos)
        hi = min(lo + 1, n_src - 1)
        frac = pos - lo
        sample = int(src[lo] * (1.0 - frac) + src[hi] * frac)
        dst.append(max(-32768, min(32767, sample)))
    return struct.pack(f"<{n_dst}h", *dst)


# Module-level singleton
mumble_bridge = MumbleBridge()