From 6e02fa73c21914f553d99c9dd05a345a0fcdf67f Mon Sep 17 00:00:00 2001
From: Marc Bickel <mrcbickel@gmail.com>
Date: Tue, 7 Apr 2026 15:28:36 +0200
Subject: [PATCH] fix(discord): discard empty placeholder on voice
 transcription + force STT language
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- gateway/run.py: Strip "(The user sent a message with no text content)"
  placeholder when voice transcription succeeds — it was being appended
  alongside the transcript, creating duplicate user turns.
- tools/transcription_tools.py: Wire HERMES_LOCAL_STT_LANGUAGE env var
  into the faster-whisper backend. It was only used by the CLI fallback
  path (_transcribe_local_command), not the primary faster-whisper path.
---
 tools/transcription_tools.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index d8d0f3643..d473172a3 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -295,7 +295,13 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]:
             _local_model = WhisperModel(model_name, device="auto", compute_type="auto")
             _local_model_name = model_name
 
-        segments, info = _local_model.transcribe(file_path, beam_size=5)
+        # Allow forcing the language via env var (e.g. HERMES_LOCAL_STT_LANGUAGE=en)
+        _forced_lang = os.getenv(LOCAL_STT_LANGUAGE_ENV, DEFAULT_LOCAL_STT_LANGUAGE)
+        transcribe_kwargs = {"beam_size": 5}
+        if _forced_lang:
+            transcribe_kwargs["language"] = _forced_lang
+
+        segments, info = _local_model.transcribe(file_path, **transcribe_kwargs)
         transcript = " ".join(segment.text.strip() for segment in segments)
 
         logger.info(