fix(discord): discard empty placeholder on voice transcription + force STT language

- gateway/run.py: Strip "(The user sent a message with no text content)"
  placeholder when voice transcription succeeds — it was being appended
  alongside the transcript, creating duplicate user turns.
- tools/transcription_tools.py: Wire HERMES_LOCAL_STT_LANGUAGE env var
  into the faster-whisper backend. It was only used by the CLI fallback
  path (_transcribe_local_command), not the primary faster-whisper path.
This commit is contained in:
Marc Bickel
2026-04-07 15:28:36 +02:00
committed by Teknium
parent 25080986a0
commit 6e02fa73c2

View File

@@ -295,7 +295,13 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]:
_local_model = WhisperModel(model_name, device="auto", compute_type="auto")
_local_model_name = model_name
segments, info = _local_model.transcribe(file_path, beam_size=5)
# Allow forcing the language via env var (e.g. HERMES_LOCAL_STT_LANGUAGE=en)
_forced_lang = os.getenv(LOCAL_STT_LANGUAGE_ENV, DEFAULT_LOCAL_STT_LANGUAGE)
transcribe_kwargs = {"beam_size": 5}
if _forced_lang:
transcribe_kwargs["language"] = _forced_lang
segments, info = _local_model.transcribe(file_path, **transcribe_kwargs)
transcript = " ".join(segment.text.strip() for segment in segments)
logger.info(