From 6e02fa73c21914f553d99c9dd05a345a0fcdf67f Mon Sep 17 00:00:00 2001 From: Marc Bickel Date: Tue, 7 Apr 2026 15:28:36 +0200 Subject: [PATCH] fix(discord): discard empty placeholder on voice transcription + force STT language MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - gateway/run.py: Strip "(The user sent a message with no text content)" placeholder when voice transcription succeeds — it was being appended alongside the transcript, creating duplicate user turns. - tools/transcription_tools.py: Wire HERMES_LOCAL_STT_LANGUAGE env var into the faster-whisper backend. It was only used by the CLI fallback path (_transcribe_local_command), not the primary faster-whisper path. --- tools/transcription_tools.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py index d8d0f3643..d473172a3 100644 --- a/tools/transcription_tools.py +++ b/tools/transcription_tools.py @@ -295,7 +295,13 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]: _local_model = WhisperModel(model_name, device="auto", compute_type="auto") _local_model_name = model_name - segments, info = _local_model.transcribe(file_path, beam_size=5) + # Allow forcing the language via env var (e.g. HERMES_LOCAL_STT_LANGUAGE=en) + _forced_lang = os.getenv(LOCAL_STT_LANGUAGE_ENV, DEFAULT_LOCAL_STT_LANGUAGE) + transcribe_kwargs = {"beam_size": 5} + if _forced_lang: + transcribe_kwargs["language"] = _forced_lang + + segments, info = _local_model.transcribe(file_path, **transcribe_kwargs) transcript = " ".join(segment.text.strip() for segment in segments) logger.info(