feat: add silence filter, hallucination guard, and continuous mode control

- Skip silent recordings before STT call (RMS check in AudioRecorder.stop)
- Filter known Whisper hallucinations ("Thank you.", "Bye." etc.)
- Continuous mode: Ctrl+R starts loop, Ctrl+R during recording exits it
- Wait for TTS to finish before auto-restart to avoid recording speaker
- Silence timeout increased to 3s for natural pauses
- Tests: hallucination filter, silent recording skip, real speech passthrough
This commit is contained in:
0xbyt4
2026-03-03 19:58:38 +03:00
parent bfd9c97705
commit 32b033c11c
2 changed files with 111 additions and 3 deletions

View File

@@ -235,6 +235,12 @@ class AudioRecorder:
logger.debug("Recording too short (%d samples), discarding", len(audio_data))
return None
# Skip silent recordings (RMS below threshold = no real speech)
rms = int(np.sqrt(np.mean(audio_data.astype(np.float64) ** 2)))
if rms < SILENCE_RMS_THRESHOLD:
logger.info("Recording too quiet (RMS=%d < %d), discarding", rms, SILENCE_RMS_THRESHOLD)
return None
return self._write_wav(audio_data)
def cancel(self) -> None:
@@ -276,6 +282,36 @@ class AudioRecorder:
return wav_path
# ============================================================================
# Whisper hallucination filter
# ============================================================================
# Whisper commonly hallucinates these phrases on silent/near-silent audio.
WHISPER_HALLUCINATIONS = {
"thank you.",
"thank you",
"thanks for watching.",
"thanks for watching",
"subscribe to my channel.",
"subscribe to my channel",
"like and subscribe.",
"like and subscribe",
"please subscribe.",
"please subscribe",
"thank you for watching.",
"thank you for watching",
"bye.",
"bye",
"you",
"the end.",
"the end",
}
def is_whisper_hallucination(transcript: str) -> bool:
"""Check if a transcript is a known Whisper hallucination on silence."""
return transcript.strip().lower() in WHISPER_HALLUCINATIONS
# ============================================================================
# STT dispatch
# ============================================================================
@@ -283,6 +319,7 @@ def transcribe_recording(wav_path: str, model: Optional[str] = None) -> Dict[str
"""Transcribe a WAV recording using the existing Whisper pipeline.
Delegates to ``tools.transcription_tools.transcribe_audio()``.
Filters out known Whisper hallucinations on silent audio.
Args:
wav_path: Path to the WAV file.
@@ -293,7 +330,14 @@ def transcribe_recording(wav_path: str, model: Optional[str] = None) -> Dict[str
"""
from tools.transcription_tools import transcribe_audio
return transcribe_audio(wav_path, model=model)
result = transcribe_audio(wav_path, model=model)
# Filter out Whisper hallucinations (common on silent/near-silent audio)
if result.get("success") and is_whisper_hallucination(result.get("transcript", "")):
logger.info("Filtered Whisper hallucination: %r", result["transcript"])
return {"success": True, "transcript": "", "filtered": True}
return result
# ============================================================================