From ff9ea6c4b1c69ebe450a6128e8f76d39162565ac Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 14 Feb 2026 16:13:26 -0800
Subject: [PATCH] Enhance TTS tool to support platform-specific audio formats

- Added detection of the platform from the environment variable to determine the appropriate audio output format.
- Implemented logic to output Opus (.ogg) files for Telegram when using compatible TTS providers, while defaulting to MP3 for others.
---
 tools/tts_tool.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 5129196a0..3d1d3d2fb 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -268,6 +268,13 @@ def text_to_speech_tool(
     tts_config = _load_tts_config()
     provider = _get_provider(tts_config)
 
+    # Detect platform from gateway env var to choose the best output format.
+    # Telegram voice bubbles require Opus (.ogg); OpenAI and ElevenLabs can
+    # produce Opus natively (no ffmpeg needed).  Edge TTS always outputs MP3
+    # and needs ffmpeg for conversion.
+    platform = os.getenv("HERMES_SESSION_PLATFORM", "").lower()
+    want_opus = (platform == "telegram")
+
     # Determine output path
     if output_path:
         file_path = Path(output_path).expanduser()
@@ -275,7 +282,12 @@ def text_to_speech_tool(
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         out_dir = Path(DEFAULT_OUTPUT_DIR)
         out_dir.mkdir(parents=True, exist_ok=True)
-        file_path = out_dir / f"tts_{timestamp}.mp3"
+        # Use .ogg for Telegram with providers that support native Opus output,
+        # otherwise fall back to .mp3 (Edge TTS will attempt ffmpeg conversion later).
+        if want_opus and provider in ("openai", "elevenlabs"):
+            file_path = out_dir / f"tts_{timestamp}.ogg"
+        else:
+            file_path = out_dir / f"tts_{timestamp}.mp3"
 
     # Ensure parent directory exists
     file_path.parent.mkdir(parents=True, exist_ok=True)