20 lines
859 B
Python
20 lines
859 B
Python
"""Audio capture and playback utilities for the voice loop."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import numpy as np
|
|
|
|
# ── Defaults ────────────────────────────────────────────────────────────────
|
|
|
|
DEFAULT_SAMPLE_RATE = 16000 # Whisper expects 16 kHz
|
|
DEFAULT_CHANNELS = 1
|
|
DEFAULT_SILENCE_THRESHOLD = 0.015 # RMS threshold — tune for your mic/room
|
|
DEFAULT_SILENCE_DURATION = 1.5 # seconds of silence to end utterance
|
|
DEFAULT_MIN_UTTERANCE = 0.5 # ignore clicks/bumps shorter than this
|
|
DEFAULT_MAX_UTTERANCE = 30.0 # safety cap — don't record forever
|
|
|
|
|
|
def _rms(block: np.ndarray) -> float:
|
|
"""Compute root-mean-square energy of an audio block."""
|
|
return float(np.sqrt(np.mean(block.astype(np.float32) ** 2)))
|