Timmy-time-dashboard/src/timmy/voice/audio_io.py

"""Audio capture and playback utilities for the voice loop."""

from __future__ import annotations

import numpy as np

# ── Defaults ────────────────────────────────────────────────────────────────

DEFAULT_SAMPLE_RATE = 16000  # Whisper expects 16 kHz
DEFAULT_CHANNELS = 1
DEFAULT_SILENCE_THRESHOLD = 0.015  # RMS threshold — tune for your mic/room
DEFAULT_SILENCE_DURATION = 1.5  # seconds of silence to end utterance
DEFAULT_MIN_UTTERANCE = 0.5  # ignore clicks/bumps shorter than this
DEFAULT_MAX_UTTERANCE = 30.0  # safety cap — don't record forever


def _rms(block: np.ndarray) -> float:
    """Compute root-mean-square energy of an audio block."""
    return float(np.sqrt(np.mean(block.astype(np.float32) ** 2)))