462 lines
16 KiB
Python
462 lines
16 KiB
Python
"""Mumble voice bridge — bidirectional audio between Alexander and Timmy.
|
|
|
|
Connects Timmy to a Mumble server so voice conversations can happen during
|
|
co-play and be piped to the stream. Timmy's TTS output is sent to the
|
|
Mumble channel; Alexander's microphone is captured on stream via Mumble.
|
|
|
|
Audio pipeline
|
|
--------------
|
|
Timmy TTS → PCM 16-bit 48 kHz mono → Mumble channel → stream mix
|
|
Mumble channel (Alexander's mic) → PCM callback → optional STT
|
|
|
|
Audio mode
|
|
----------
|
|
"vad" — voice activity detection: transmit when RMS > threshold
|
|
"ptt" — push-to-talk: transmit only while ``push_to_talk()`` context active
|
|
|
|
Optional dependency — install with:
|
|
pip install ".[mumble]"
|
|
|
|
Degrades gracefully when ``pymumble`` is not installed or the server is
|
|
unreachable; all public methods become safe no-ops.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import io
|
|
import logging
|
|
import struct
|
|
import threading
|
|
import time
|
|
from collections.abc import Callable
|
|
from contextlib import contextmanager
|
|
from typing import TYPE_CHECKING
|
|
|
|
if TYPE_CHECKING:
|
|
pass
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Mumble audio constants
|
|
_SAMPLE_RATE = 48000 # Hz — Mumble native sample rate
|
|
_CHANNELS = 1 # Mono
|
|
_SAMPLE_WIDTH = 2 # 16-bit PCM → 2 bytes per sample
|
|
_FRAME_MS = 10 # milliseconds per Mumble frame
|
|
_FRAME_SAMPLES = _SAMPLE_RATE * _FRAME_MS // 1000 # 480 samples per frame
|
|
_FRAME_BYTES = _FRAME_SAMPLES * _SAMPLE_WIDTH # 960 bytes per frame
|
|
|
|
|
|
class MumbleBridge:
|
|
"""Manages a Mumble client connection for Timmy's voice bridge.
|
|
|
|
Usage::
|
|
|
|
bridge = MumbleBridge()
|
|
await bridge.start() # connect + join channel
|
|
await bridge.speak("Hello!") # TTS → Mumble audio
|
|
await bridge.stop() # disconnect
|
|
|
|
Audio received from other users triggers ``on_audio`` callbacks
|
|
registered via ``add_audio_callback()``.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self._client = None
|
|
self._connected: bool = False
|
|
self._running: bool = False
|
|
self._ptt_active: bool = False
|
|
self._lock = threading.Lock()
|
|
self._audio_callbacks: list[Callable[[str, bytes], None]] = []
|
|
self._send_thread: threading.Thread | None = None
|
|
self._audio_queue: list[bytes] = []
|
|
self._queue_lock = threading.Lock()
|
|
|
|
# ── Properties ────────────────────────────────────────────────────────────
|
|
|
|
@property
|
|
def connected(self) -> bool:
|
|
"""True when the Mumble client is connected and authenticated."""
|
|
return self._connected
|
|
|
|
@property
|
|
def running(self) -> bool:
|
|
"""True when the bridge loop is active."""
|
|
return self._running
|
|
|
|
# ── Lifecycle ─────────────────────────────────────────────────────────────
|
|
|
|
def start(self) -> bool:
|
|
"""Connect to Mumble and join the configured channel.
|
|
|
|
Returns True on success, False if the bridge is disabled or
|
|
``pymumble`` is not installed.
|
|
"""
|
|
try:
|
|
from config import settings
|
|
except Exception as exc:
|
|
logger.warning("MumbleBridge: config unavailable — %s", exc)
|
|
return False
|
|
|
|
if not settings.mumble_enabled:
|
|
logger.info("MumbleBridge: disabled (MUMBLE_ENABLED=false)")
|
|
return False
|
|
|
|
if self._connected:
|
|
return True
|
|
|
|
try:
|
|
import pymumble_py3 as pymumble
|
|
except ImportError:
|
|
logger.warning(
|
|
'MumbleBridge: pymumble-py3 not installed — run: pip install ".[mumble]"'
|
|
)
|
|
return False
|
|
|
|
try:
|
|
self._client = pymumble.Mumble(
|
|
host=settings.mumble_host,
|
|
user=settings.mumble_user,
|
|
port=settings.mumble_port,
|
|
password=settings.mumble_password,
|
|
reconnect=True,
|
|
stereo=False,
|
|
)
|
|
self._client.set_receive_sound(True)
|
|
self._client.callbacks.set_callback(
|
|
pymumble.constants.PYMUMBLE_CLBK_SOUNDRECEIVED,
|
|
self._on_sound_received,
|
|
)
|
|
self._client.start()
|
|
self._client.is_ready() # blocks until connected + synced
|
|
|
|
self._join_channel(settings.mumble_channel)
|
|
|
|
self._running = True
|
|
self._connected = True
|
|
|
|
# Start the audio sender thread
|
|
self._send_thread = threading.Thread(
|
|
target=self._audio_sender_loop, daemon=True, name="mumble-sender"
|
|
)
|
|
self._send_thread.start()
|
|
|
|
logger.info(
|
|
"MumbleBridge: connected to %s:%d as %s, channel=%s",
|
|
settings.mumble_host,
|
|
settings.mumble_port,
|
|
settings.mumble_user,
|
|
settings.mumble_channel,
|
|
)
|
|
return True
|
|
|
|
except Exception as exc:
|
|
logger.warning("MumbleBridge: connection failed — %s", exc)
|
|
self._connected = False
|
|
self._running = False
|
|
self._client = None
|
|
return False
|
|
|
|
def stop(self) -> None:
|
|
"""Disconnect from Mumble and clean up."""
|
|
self._running = False
|
|
self._connected = False
|
|
|
|
if self._client is not None:
|
|
try:
|
|
self._client.stop()
|
|
except Exception as exc:
|
|
logger.debug("MumbleBridge: stop error — %s", exc)
|
|
finally:
|
|
self._client = None
|
|
|
|
logger.info("MumbleBridge: disconnected")
|
|
|
|
# ── Audio send ────────────────────────────────────────────────────────────
|
|
|
|
def send_audio(self, pcm_bytes: bytes) -> None:
|
|
"""Enqueue raw PCM audio (16-bit, 48 kHz, mono) for transmission.
|
|
|
|
The bytes are sliced into 10 ms frames and sent by the background
|
|
sender thread. Safe to call from any thread.
|
|
"""
|
|
if not self._connected or self._client is None:
|
|
return
|
|
|
|
with self._queue_lock:
|
|
self._audio_queue.append(pcm_bytes)
|
|
|
|
def speak(self, text: str) -> None:
|
|
"""Convert *text* to speech and send the audio to the Mumble channel.
|
|
|
|
Tries Piper TTS first (high quality), falls back to pyttsx3, and
|
|
degrades silently if neither is available.
|
|
"""
|
|
if not self._connected:
|
|
logger.debug("MumbleBridge.speak: not connected, skipping")
|
|
return
|
|
|
|
pcm = self._tts_to_pcm(text)
|
|
if pcm:
|
|
self.send_audio(pcm)
|
|
|
|
# ── Push-to-talk ──────────────────────────────────────────────────────────
|
|
|
|
@contextmanager
|
|
def push_to_talk(self):
|
|
"""Context manager that activates PTT for the duration of the block.
|
|
|
|
Example::
|
|
|
|
with bridge.push_to_talk():
|
|
bridge.send_audio(pcm_data)
|
|
"""
|
|
self._ptt_active = True
|
|
try:
|
|
yield
|
|
finally:
|
|
self._ptt_active = False
|
|
|
|
# ── Audio receive callbacks ───────────────────────────────────────────────
|
|
|
|
def add_audio_callback(self, callback: Callable[[str, bytes], None]) -> None:
|
|
"""Register a callback for incoming audio from other Mumble users.
|
|
|
|
The callback receives ``(username: str, pcm_bytes: bytes)`` where
|
|
``pcm_bytes`` is 16-bit, 48 kHz, mono PCM audio.
|
|
"""
|
|
self._audio_callbacks.append(callback)
|
|
|
|
def remove_audio_callback(self, callback: Callable[[str, bytes], None]) -> None:
|
|
"""Unregister a previously added audio callback."""
|
|
try:
|
|
self._audio_callbacks.remove(callback)
|
|
except ValueError:
|
|
pass
|
|
|
|
# ── Internal helpers ──────────────────────────────────────────────────────
|
|
|
|
def _join_channel(self, channel_name: str) -> None:
|
|
"""Move to the named channel, creating it if it doesn't exist."""
|
|
if self._client is None:
|
|
return
|
|
try:
|
|
channels = self._client.channels
|
|
channel = channels.find_by_name(channel_name)
|
|
self._client.my_channel().move_in(channel)
|
|
logger.debug("MumbleBridge: joined channel '%s'", channel_name)
|
|
except Exception as exc:
|
|
logger.warning("MumbleBridge: could not join channel '%s' — %s", channel_name, exc)
|
|
|
|
def _on_sound_received(self, user, soundchunk) -> None:
|
|
"""Called by pymumble when audio arrives from another user."""
|
|
try:
|
|
username = user.get("name", "unknown")
|
|
pcm = soundchunk.pcm
|
|
if pcm and self._audio_callbacks:
|
|
for cb in self._audio_callbacks:
|
|
try:
|
|
cb(username, pcm)
|
|
except Exception as exc:
|
|
logger.debug("MumbleBridge: audio callback error — %s", exc)
|
|
except Exception as exc:
|
|
logger.debug("MumbleBridge: _on_sound_received error — %s", exc)
|
|
|
|
def _audio_sender_loop(self) -> None:
|
|
"""Background thread: drain the audio queue and send frames."""
|
|
while self._running:
|
|
chunks: list[bytes] = []
|
|
with self._queue_lock:
|
|
if self._audio_queue:
|
|
chunks = list(self._audio_queue)
|
|
self._audio_queue.clear()
|
|
|
|
if chunks and self._client is not None:
|
|
buf = b"".join(chunks)
|
|
self._send_pcm_buffer(buf)
|
|
else:
|
|
time.sleep(0.005)
|
|
|
|
def _send_pcm_buffer(self, pcm: bytes) -> None:
|
|
"""Slice a PCM buffer into 10 ms frames and send each one."""
|
|
if self._client is None:
|
|
return
|
|
|
|
try:
|
|
from config import settings
|
|
|
|
mode = settings.mumble_audio_mode
|
|
threshold = settings.mumble_vad_threshold
|
|
except Exception:
|
|
mode = "vad"
|
|
threshold = 0.02
|
|
|
|
offset = 0
|
|
while offset < len(pcm):
|
|
frame = pcm[offset : offset + _FRAME_BYTES]
|
|
if len(frame) < _FRAME_BYTES:
|
|
# Pad the last frame with silence
|
|
frame = frame + b"\x00" * (_FRAME_BYTES - len(frame))
|
|
offset += _FRAME_BYTES
|
|
|
|
if mode == "vad":
|
|
rms = _rms(frame)
|
|
if rms < threshold:
|
|
continue # silence — don't transmit
|
|
|
|
if mode == "ptt" and not self._ptt_active:
|
|
continue
|
|
|
|
try:
|
|
self._client.sound_output.add_sound(frame)
|
|
except Exception as exc:
|
|
logger.debug("MumbleBridge: send frame error — %s", exc)
|
|
break
|
|
|
|
def _tts_to_pcm(self, text: str) -> bytes | None:
|
|
"""Convert text to 16-bit 48 kHz mono PCM via Piper or pyttsx3."""
|
|
# Try Piper TTS first (higher quality)
|
|
pcm = self._piper_tts(text)
|
|
if pcm:
|
|
return pcm
|
|
|
|
# Fall back to pyttsx3 via an in-memory WAV buffer
|
|
pcm = self._pyttsx3_tts(text)
|
|
if pcm:
|
|
return pcm
|
|
|
|
logger.debug("MumbleBridge._tts_to_pcm: no TTS engine available")
|
|
return None
|
|
|
|
def _piper_tts(self, text: str) -> bytes | None:
|
|
"""Synthesize speech via Piper TTS, returning 16-bit 48 kHz mono PCM."""
|
|
try:
|
|
import wave
|
|
|
|
from piper.voice import PiperVoice
|
|
|
|
try:
|
|
from config import settings
|
|
|
|
voice_path = getattr(settings, "piper_voice_path", None) or str(
|
|
__import__("pathlib").Path.home()
|
|
/ ".local/share/piper-voices/en_US-lessac-medium.onnx"
|
|
)
|
|
except Exception:
|
|
voice_path = str(
|
|
__import__("pathlib").Path.home()
|
|
/ ".local/share/piper-voices/en_US-lessac-medium.onnx"
|
|
)
|
|
|
|
voice = PiperVoice.load(voice_path)
|
|
buf = io.BytesIO()
|
|
with wave.open(buf, "wb") as wf:
|
|
wf.setnchannels(_CHANNELS)
|
|
wf.setsampwidth(_SAMPLE_WIDTH)
|
|
wf.setframerate(voice.config.sample_rate)
|
|
voice.synthesize(text, wf)
|
|
|
|
buf.seek(0)
|
|
with wave.open(buf, "rb") as wf:
|
|
raw = wf.readframes(wf.getnframes())
|
|
src_rate = wf.getframerate()
|
|
|
|
return _resample_pcm(raw, src_rate, _SAMPLE_RATE)
|
|
|
|
except ImportError:
|
|
return None
|
|
except Exception as exc:
|
|
logger.debug("MumbleBridge._piper_tts: %s", exc)
|
|
return None
|
|
|
|
def _pyttsx3_tts(self, text: str) -> bytes | None:
|
|
"""Synthesize speech via pyttsx3, returning 16-bit 48 kHz mono PCM.
|
|
|
|
pyttsx3 doesn't support in-memory output directly, so we write to a
|
|
temporary WAV file, read it back, and resample if necessary.
|
|
"""
|
|
try:
|
|
import os
|
|
import tempfile
|
|
import wave
|
|
|
|
import pyttsx3
|
|
|
|
engine = pyttsx3.init()
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
|
tmp_path = tmp.name
|
|
|
|
engine.save_to_file(text, tmp_path)
|
|
engine.runAndWait()
|
|
|
|
with wave.open(tmp_path, "rb") as wf:
|
|
raw = wf.readframes(wf.getnframes())
|
|
src_rate = wf.getframerate()
|
|
src_channels = wf.getnchannels()
|
|
|
|
os.unlink(tmp_path)
|
|
|
|
# Convert stereo → mono if needed
|
|
if src_channels == 2:
|
|
raw = _stereo_to_mono(raw, _SAMPLE_WIDTH)
|
|
|
|
return _resample_pcm(raw, src_rate, _SAMPLE_RATE)
|
|
|
|
except ImportError:
|
|
return None
|
|
except Exception as exc:
|
|
logger.debug("MumbleBridge._pyttsx3_tts: %s", exc)
|
|
return None
|
|
|
|
|
|
# ── Helpers ───────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def _rms(pcm: bytes) -> float:
|
|
"""Compute the root mean square (RMS) energy of a 16-bit PCM buffer."""
|
|
if not pcm:
|
|
return 0.0
|
|
n = len(pcm) // _SAMPLE_WIDTH
|
|
if n == 0:
|
|
return 0.0
|
|
samples = struct.unpack(f"<{n}h", pcm[: n * _SAMPLE_WIDTH])
|
|
mean_sq = sum(s * s for s in samples) / n
|
|
return (mean_sq**0.5) / 32768.0
|
|
|
|
|
|
def _stereo_to_mono(pcm: bytes, sample_width: int = 2) -> bytes:
|
|
"""Convert interleaved stereo 16-bit PCM to mono by averaging channels."""
|
|
n = len(pcm) // (sample_width * 2)
|
|
if n == 0:
|
|
return pcm
|
|
samples = struct.unpack(f"<{n * 2}h", pcm[: n * 2 * sample_width])
|
|
mono = [(samples[i * 2] + samples[i * 2 + 1]) // 2 for i in range(n)]
|
|
return struct.pack(f"<{n}h", *mono)
|
|
|
|
|
|
def _resample_pcm(pcm: bytes, src_rate: int, dst_rate: int, sample_width: int = 2) -> bytes:
|
|
"""Resample 16-bit mono PCM from *src_rate* to *dst_rate* Hz.
|
|
|
|
Uses linear interpolation — adequate quality for voice.
|
|
"""
|
|
if src_rate == dst_rate:
|
|
return pcm
|
|
n_src = len(pcm) // sample_width
|
|
if n_src == 0:
|
|
return pcm
|
|
src = struct.unpack(f"<{n_src}h", pcm[: n_src * sample_width])
|
|
ratio = src_rate / dst_rate
|
|
n_dst = int(n_src / ratio)
|
|
dst: list[int] = []
|
|
for i in range(n_dst):
|
|
pos = i * ratio
|
|
lo = int(pos)
|
|
hi = min(lo + 1, n_src - 1)
|
|
frac = pos - lo
|
|
sample = int(src[lo] * (1.0 - frac) + src[hi] * frac)
|
|
dst.append(max(-32768, min(32767, sample)))
|
|
return struct.pack(f"<{n_dst}h", *dst)
|
|
|
|
|
|
# Module-level singleton
|
|
mumble_bridge = MumbleBridge()
|