# TTS Integration Proof — Deep Dive Phase 4 # Issue #830 — Sovereign NotebookLM Daily Briefing # Created: Ezra, Burn Mode | 2026-04-05 ## Architecture ``` ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ Synthesis │────▶│ TTS Engine │────▶│ Audio Output │ │ (text brief) │ │ Piper/Coqui/ │ │ MP3/OGG file │ │ │ │ ElevenLabs │ │ │ └─────────────────┘ └─────────────────┘ └─────────────────┘ ``` ## Implementation ### Option A: Local Piper (Sovereign) ```python #!/usr/bin/env python3 """Piper TTS integration for Deep Dive Phase 4.""" import subprocess import tempfile import os from pathlib import Path class PiperTTS: """Local TTS using Piper (sovereign, no API calls).""" def __init__(self, model_path: str = None): self.model_path = model_path or self._download_default_model() self.config_path = self.model_path.replace(".onnx", ".onnx.json") def _download_default_model(self) -> str: """Download default en_US voice model (~2GB).""" model_dir = Path.home() / ".local/share/piper" model_dir.mkdir(parents=True, exist_ok=True) model_file = model_dir / "en_US-lessac-medium.onnx" config_file = model_dir / "en_US-lessac-medium.onnx.json" if not model_file.exists(): print("Downloading Piper voice model (~2GB)...") base_url = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/lessac/medium" subprocess.run([ "wget", "-O", str(model_file), f"{base_url}/en_US-lessac-medium.onnx" ], check=True) subprocess.run([ "wget", "-O", str(config_file), f"{base_url}/en_US-lessac-medium.onnx.json" ], check=True) return str(model_file) def synthesize(self, text: str, output_path: str) -> str: """Convert text to speech.""" # Split long text into chunks (Piper handles ~400 chars well) chunks = self._chunk_text(text, max_chars=400) with tempfile.TemporaryDirectory() as tmpdir: chunk_files = [] for i, chunk in enumerate(chunks): chunk_wav = f"{tmpdir}/chunk_{i:03d}.wav" self._synthesize_chunk(chunk, chunk_wav) chunk_files.append(chunk_wav) # Concatenate chunks concat_list = f"{tmpdir}/concat.txt" with open(concat_list, 'w') as f: for cf in chunk_files: f.write(f"file '{cf}'\n") # Final output subprocess.run([ "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", concat_list, "-c:a", "libmp3lame", "-q:a", "4", output_path ], check=True, capture_output=True) return output_path def _chunk_text(self, text: str, max_chars: int = 400) -> list: """Split text at sentence boundaries.""" sentences = text.replace('. ', '.|').replace('! ', '!|').replace('? ', '?|').split('|') chunks = [] current = "" for sent in sentences: if len(current) + len(sent) < max_chars: current += sent + " " else: if current: chunks.append(current.strip()) current = sent + " " if current: chunks.append(current.strip()) return chunks def _synthesize_chunk(self, text: str, output_wav: str): """Synthesize single chunk.""" subprocess.run([ "piper", "--model", self.model_path, "--config", self.config_path, "--output_file", output_wav ], input=text.encode(), check=True) # Usage example if __name__ == "__main__": tts = PiperTTS() briefing_text = """ Good morning. Today\'s Deep Dive covers three papers from arXiv. First, a new approach to reinforcement learning from human feedback. Second, advances in quantized model inference for edge deployment. Third, a survey of multi-agent coordination protocols. """ output = tts.synthesize(briefing_text, "daily_briefing.mp3") print(f"Generated: {output}") ``` ### Option B: ElevenLabs API (Quality) ```python #!/usr/bin/env python3 """ElevenLabs TTS integration for Deep Dive Phase 4.""" import os import requests from pathlib import Path class ElevenLabsTTS: """Cloud TTS using ElevenLabs API.""" API_BASE = "https://api.elevenlabs.io/v1" def __init__(self, api_key: str = None): self.api_key = api_key or os.getenv("ELEVENLABS_API_KEY") if not self.api_key: raise ValueError("ElevenLabs API key required") # Rachel voice (professional, clear) self.voice_id = "21m00Tcm4TlvDq8ikWAM" def synthesize(self, text: str, output_path: str) -> str: """Convert text to speech via ElevenLabs.""" url = f"{self.API_BASE}/text-to-speech/{self.voice_id}" headers = { "Accept": "audio/mpeg", "Content-Type": "application/json", "xi-api-key": self.api_key } # ElevenLabs handles long text natively (up to ~5000 chars) data = { "text": text, "model_id": "eleven_monolingual_v1", "voice_settings": { "stability": 0.5, "similarity_boost": 0.75 } } response = requests.post(url, json=data, headers=headers) response.raise_for_status() with open(output_path, 'wb') as f: f.write(response.content) return output_path # Usage example if __name__ == "__main__": tts = ElevenLabsTTS() briefing_text = "Your daily intelligence briefing..." output = tts.synthesize(briefing_text, "daily_briefing.mp3") print(f"Generated: {output}") ``` ## Hybrid Implementation (Recommended) ```python #!/usr/bin/env python3 """Hybrid TTS with Piper primary, ElevenLabs fallback.""" import os from typing import Optional class HybridTTS: """TTS with sovereign default, cloud fallback.""" def __init__(self): self.primary = None self.fallback = None # Try Piper first (sovereign) try: self.primary = PiperTTS() print("✅ Piper TTS ready (sovereign)") except Exception as e: print(f"⚠️ Piper unavailable: {e}") # Set up ElevenLabs fallback if os.getenv("ELEVENLABS_API_KEY"): try: self.fallback = ElevenLabsTTS() print("✅ ElevenLabs fallback ready") except Exception as e: print(f"⚠️ ElevenLabs unavailable: {e}") def synthesize(self, text: str, output_path: str) -> str: """Synthesize with fallback chain.""" # Try primary if self.primary: try: return self.primary.synthesize(text, output_path) except Exception as e: print(f"Primary TTS failed: {e}, trying fallback...") # Try fallback if self.fallback: return self.fallback.synthesize(text, output_path) raise RuntimeError("No TTS engine available") # Integration with Deep Dive pipeline def phase4_generate_audio(briefing_text: str, output_dir: str = "/tmp/deepdive") -> str: """Phase 4: Generate audio from synthesized briefing.""" os.makedirs(output_dir, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_path = f"{output_dir}/deepdive_{timestamp}.mp3" tts = HybridTTS() return tts.synthesize(briefing_text, output_path) ``` ## Testing ```bash # Test Piper locally piper --model ~/.local/share/piper/en_US-lessac-medium.onnx --output_file test.wav <