fix(discord): discard empty placeholder on voice transcription + force STT language

- gateway/run.py: Strip "(The user sent a message with no text content)" placeholder when voice transcription succeeds — it was being appended alongside the transcript, creating duplicate user turns. - tools/transcription_tools.py: Wire HERMES_LOCAL_STT_LANGUAGE env var into the faster-whisper backend. It was only used by the CLI fallback path (_transcribe_local_command), not the primary faster-whisper path.
2026-04-07 15:28:36 +02:00
parent 25080986a0
commit 6e02fa73c2
1 changed files with 7 additions and 1 deletions
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -295,7 +295,13 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]:
            _local_model = WhisperModel(model_name, device="auto", compute_type="auto")
            _local_model_name = model_name

-        segments, info = _local_model.transcribe(file_path, beam_size=5)
+        # Allow forcing the language via env var (e.g. HERMES_LOCAL_STT_LANGUAGE=en)
+        _forced_lang = os.getenv(LOCAL_STT_LANGUAGE_ENV, DEFAULT_LOCAL_STT_LANGUAGE)
+        transcribe_kwargs = {"beam_size": 5}
+        if _forced_lang:
+            transcribe_kwargs["language"] = _forced_lang
+
+        segments, info = _local_model.transcribe(file_path, **transcribe_kwargs)
        transcript = " ".join(segment.text.strip() for segment in segments)

        logger.info(