Enhance TTS tool to support platform-specific audio formats

- Added detection of the platform from the environment variable to determine the appropriate audio output format. - Implemented logic to output Opus (.ogg) files for Telegram when using compatible TTS providers, while defaulting to MP3 for others.
2026-02-14 16:13:26 -08:00
parent 586b0a7047
commit ff9ea6c4b1
1 changed files with 13 additions and 1 deletions
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -268,6 +268,13 @@ def text_to_speech_tool(
    tts_config = _load_tts_config()
    provider = _get_provider(tts_config)

+    # Detect platform from gateway env var to choose the best output format.
+    # Telegram voice bubbles require Opus (.ogg); OpenAI and ElevenLabs can
+    # produce Opus natively (no ffmpeg needed).  Edge TTS always outputs MP3
+    # and needs ffmpeg for conversion.
+    platform = os.getenv("HERMES_SESSION_PLATFORM", "").lower()
+    want_opus = (platform == "telegram")
+
    # Determine output path
    if output_path:
        file_path = Path(output_path).expanduser()
@@ -275,7 +282,12 @@ def text_to_speech_tool(
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        out_dir = Path(DEFAULT_OUTPUT_DIR)
        out_dir.mkdir(parents=True, exist_ok=True)
-        file_path = out_dir / f"tts_{timestamp}.mp3"
+        # Use .ogg for Telegram with providers that support native Opus output,
+        # otherwise fall back to .mp3 (Edge TTS will attempt ffmpeg conversion later).
+        if want_opus and provider in ("openai", "elevenlabs"):
+            file_path = out_dir / f"tts_{timestamp}.ogg"
+        else:
+            file_path = out_dir / f"tts_{timestamp}.mp3"

    # Ensure parent directory exists
    file_path.parent.mkdir(parents=True, exist_ok=True)