From ff9ea6c4b1c69ebe450a6128e8f76d39162565ac Mon Sep 17 00:00:00 2001 From: teknium1 Date: Sat, 14 Feb 2026 16:13:26 -0800 Subject: [PATCH] Enhance TTS tool to support platform-specific audio formats - Added detection of the platform from the environment variable to determine the appropriate audio output format. - Implemented logic to output Opus (.ogg) files for Telegram when using compatible TTS providers, while defaulting to MP3 for others. --- tools/tts_tool.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/tts_tool.py b/tools/tts_tool.py index 5129196a0..3d1d3d2fb 100644 --- a/tools/tts_tool.py +++ b/tools/tts_tool.py @@ -268,6 +268,13 @@ def text_to_speech_tool( tts_config = _load_tts_config() provider = _get_provider(tts_config) + # Detect platform from gateway env var to choose the best output format. + # Telegram voice bubbles require Opus (.ogg); OpenAI and ElevenLabs can + # produce Opus natively (no ffmpeg needed). Edge TTS always outputs MP3 + # and needs ffmpeg for conversion. + platform = os.getenv("HERMES_SESSION_PLATFORM", "").lower() + want_opus = (platform == "telegram") + # Determine output path if output_path: file_path = Path(output_path).expanduser() @@ -275,7 +282,12 @@ def text_to_speech_tool( timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") out_dir = Path(DEFAULT_OUTPUT_DIR) out_dir.mkdir(parents=True, exist_ok=True) - file_path = out_dir / f"tts_{timestamp}.mp3" + # Use .ogg for Telegram with providers that support native Opus output, + # otherwise fall back to .mp3 (Edge TTS will attempt ffmpeg conversion later). + if want_opus and provider in ("openai", "elevenlabs"): + file_path = out_dir / f"tts_{timestamp}.ogg" + else: + file_path = out_dir / f"tts_{timestamp}.mp3" # Ensure parent directory exists file_path.parent.mkdir(parents=True, exist_ok=True)