From 4e5dea97868b07ede2d6ac565b40a94059028fdc Mon Sep 17 00:00:00 2001 From: Ezra Date: Sun, 5 Apr 2026 07:42:32 +0000 Subject: [PATCH] =?UTF-8?q?[DEEP-DIVE]=20Scaffold=20component=20=E2=80=94?= =?UTF-8?q?=20#830?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scaffold/deep-dive/tts/tts_pipeline.py | 99 ++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 scaffold/deep-dive/tts/tts_pipeline.py diff --git a/scaffold/deep-dive/tts/tts_pipeline.py b/scaffold/deep-dive/tts/tts_pipeline.py new file mode 100644 index 0000000..3f42691 --- /dev/null +++ b/scaffold/deep-dive/tts/tts_pipeline.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +""" +TTS Pipeline for Deep Dive +Converts briefing text to audio via Piper (local) or API +""" + +import subprocess +import tempfile +import os +from pathlib import Path +from typing import Optional + +# Piper configuration +PIPER_MODEL = "en_US-lessac-medium" # Good quality, reasonable speed +PIPER_MODEL_URL = f"https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/lessac/medium/{PIPER_MODEL}.onnx" +PIVER_CONFIG_URL = f"https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/lessac/medium/{PIPER_MODEL}.onnx.json" + +class TTSGenerator: + def __init__(self, output_dir: str = "./audio_output"): + self.output_dir = Path(output_dir) + self.output_dir.mkdir(exist_ok=True) + self.model_path = self._ensure_model() + + def _ensure_model(self) -> Path: + """Download Piper model if not present.""" + model_dir = Path("./piper_models") + model_dir.mkdir(exist_ok=True) + + model_file = model_dir / f"{PIPER_MODEL}.onnx" + config_file = model_dir / f"{PIPER_MODEL}.onnx.json" + + if not model_file.exists(): + print(f"Downloading Piper model...") + subprocess.run(["curl", "-L", "-o", str(model_file), PIPER_MODEL_URL], check=True) + subprocess.run(["curl", "-L", "-o", str(config_file), PIVER_CONFIG_URL], check=True) + + return model_file + + def generate_audio(self, text: str, output_name: str = None) -> Path: + """Generate audio from text using Piper.""" + output_name = output_name or f"briefing_{datetime.now().strftime('%Y%m%d')}" + output_wav = self.output_dir / f"{output_name}.wav" + + # Piper command + cmd = [ + "piper", + "--model", str(self.model_path), + "--output_file", str(output_wav) + ] + + # Run Piper + process = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + stdout, stderr = process.communicate(input=text) + + if process.returncode != 0: + raise RuntimeError(f"Piper failed: {stderr}") + + return output_wav + + def convert_to_opus(self, wav_path: Path) -> Path: + """Convert WAV to Opus for Telegram (smaller, better quality).""" + opus_path = wav_path.with_suffix(".opus") + + cmd = [ + "ffmpeg", "-y", + "-i", str(wav_path), + "-c:a", "libopus", + "-b:a", "24k", # Good quality for speech + str(opus_path) + ] + + subprocess.run(cmd, check=True, capture_output=True) + return opus_path + +def generate_briefing_audio(text: str, output_dir: str = "./audio_output") -> Path: + """Convenience function: text → opus for Telegram.""" + tts = TTSGenerator(output_dir) + wav = tts.generate_audio(text) + opus = tts.convert_to_opus(wav) + + # Clean up WAV + wav.unlink() + + return opus + +if __name__ == "__main__": + # Test with sample text + sample = "This is a test of the Deep Dive briefing system. Piper TTS is running locally." + try: + result = generate_briefing_audio(sample) + print(f"Generated: {result}") + except Exception as e: + print(f"TTS failed (expected if Piper not installed): {e}")