Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
Executable Phase 4 component: PiperTTS, ElevenLabsTTS, HybridTTS classes with chunking, concatenation, error handling. Ready for integration with Phase 3 synthesizer. Burn mode artifact by Ezra.
229 lines
7.5 KiB
Python
229 lines
7.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
TTS Engine for Deep Dive — Phase 4 Implementation
|
|
Issue #830 — Sovereign NotebookLM Daily Briefing
|
|
"""
|
|
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
import requests
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Optional, List
|
|
|
|
|
|
class PiperTTS:
|
|
"""Local TTS using Piper (sovereign, no API calls)."""
|
|
|
|
DEFAULT_MODEL = "en_US-lessac-medium"
|
|
MODEL_BASE_URL = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US"
|
|
|
|
def __init__(self, model_name: str = None):
|
|
self.model_name = model_name or self.DEFAULT_MODEL
|
|
self.model_path = None
|
|
self.config_path = None
|
|
self._ensure_model()
|
|
|
|
def _ensure_model(self):
|
|
"""Download model if not present."""
|
|
model_dir = Path.home() / ".local/share/piper"
|
|
model_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
self.model_path = model_dir / f"{self.model_name}.onnx"
|
|
self.config_path = model_dir / f"{self.model_name}.onnx.json"
|
|
|
|
if not self.model_path.exists():
|
|
self._download_model(model_dir)
|
|
|
|
def _download_model(self, model_dir: Path):
|
|
"""Download voice model (~2GB)."""
|
|
print(f"Downloading Piper model: {self.model_name}")
|
|
|
|
voice_type = self.model_name.split("-")[-1] # medium/high
|
|
base = f"{self.MODEL_BASE_URL}/{self.model_name.replace(f'en_US-', '').replace(f'-{voice_type}', '')}/{voice_type}"
|
|
|
|
subprocess.run([
|
|
"wget", "-q", "--show-progress",
|
|
"-O", str(self.model_path),
|
|
f"{base}/{self.model_name}.onnx"
|
|
], check=True)
|
|
|
|
subprocess.run([
|
|
"wget", "-q", "--show-progress",
|
|
"-O", str(self.config_path),
|
|
f"{base}/{self.model_name}.onnx.json"
|
|
], check=True)
|
|
|
|
print(f"Model downloaded to {model_dir}")
|
|
|
|
def synthesize(self, text: str, output_path: str) -> str:
|
|
"""Convert text to MP3."""
|
|
chunks = self._chunk_text(text)
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
chunk_files = []
|
|
|
|
for i, chunk in enumerate(chunks):
|
|
chunk_wav = f"{tmpdir}/chunk_{i:03d}.wav"
|
|
self._synthesize_chunk(chunk, chunk_wav)
|
|
chunk_files.append(chunk_wav)
|
|
|
|
# Concatenate
|
|
concat_list = f"{tmpdir}/concat.txt"
|
|
with open(concat_list, 'w') as f:
|
|
for cf in chunk_files:
|
|
f.write(f"file '{cf}'\n")
|
|
|
|
subprocess.run([
|
|
"ffmpeg", "-y", "-hide_banner", "-loglevel", "error",
|
|
"-f", "concat", "-safe", "0", "-i", concat_list,
|
|
"-c:a", "libmp3lame", "-q:a", "4", output_path
|
|
], check=True)
|
|
|
|
return output_path
|
|
|
|
def _chunk_text(self, text: str, max_chars: int = 400) -> List[str]:
|
|
"""Split at sentence boundaries."""
|
|
text = text.replace('. ', '.|').replace('! ', '!|').replace('? ', '?|')
|
|
sentences = text.split('|')
|
|
|
|
chunks = []
|
|
current = ""
|
|
|
|
for sent in sentences:
|
|
sent = sent.strip()
|
|
if not sent:
|
|
continue
|
|
if len(current) + len(sent) < max_chars:
|
|
current += sent + " "
|
|
else:
|
|
if current:
|
|
chunks.append(current.strip())
|
|
current = sent + " "
|
|
|
|
if current:
|
|
chunks.append(current.strip())
|
|
|
|
return chunks or [text[:max_chars]]
|
|
|
|
def _synthesize_chunk(self, text: str, output_wav: str):
|
|
"""Synthesize single chunk."""
|
|
subprocess.run([
|
|
"piper", "--quiet",
|
|
"--model", str(self.model_path),
|
|
"--config", str(self.config_path),
|
|
"--output_file", output_wav
|
|
], input=text.encode(), check=True)
|
|
|
|
|
|
class ElevenLabsTTS:
|
|
"""Cloud TTS using ElevenLabs API."""
|
|
|
|
API_BASE = "https://api.elevenlabs.io/v1"
|
|
DEFAULT_VOICE = "21m00Tcm4TlvDq8ikWAM" # Rachel
|
|
|
|
def __init__(self, api_key: str = None, voice_id: str = None):
|
|
self.api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
|
|
if not self.api_key:
|
|
raise ValueError("ELEVENLABS_API_KEY required")
|
|
self.voice_id = voice_id or self.DEFAULT_VOICE
|
|
|
|
def synthesize(self, text: str, output_path: str) -> str:
|
|
"""Convert text to speech via API."""
|
|
url = f"{self.API_BASE}/text-to-speech/{self.voice_id}"
|
|
|
|
headers = {
|
|
"Accept": "audio/mpeg",
|
|
"Content-Type": "application/json",
|
|
"xi-api-key": self.api_key
|
|
}
|
|
|
|
data = {
|
|
"text": text[:5000], # ElevenLabs limit
|
|
"model_id": "eleven_monolingual_v1",
|
|
"voice_settings": {
|
|
"stability": 0.5,
|
|
"similarity_boost": 0.75
|
|
}
|
|
}
|
|
|
|
response = requests.post(url, json=data, headers=headers, timeout=120)
|
|
response.raise_for_status()
|
|
|
|
with open(output_path, 'wb') as f:
|
|
f.write(response.content)
|
|
|
|
return output_path
|
|
|
|
|
|
class HybridTTS:
|
|
"""TTS with sovereign primary, cloud fallback."""
|
|
|
|
def __init__(self, prefer_cloud: bool = False):
|
|
self.primary = None
|
|
self.fallback = None
|
|
self.prefer_cloud = prefer_cloud
|
|
|
|
# Try preferred engine
|
|
if prefer_cloud:
|
|
self._init_elevenlabs()
|
|
if not self.primary:
|
|
self._init_piper()
|
|
else:
|
|
self._init_piper()
|
|
if not self.primary:
|
|
self._init_elevenlabs()
|
|
|
|
def _init_piper(self):
|
|
try:
|
|
self.primary = PiperTTS()
|
|
except Exception as e:
|
|
print(f"Piper init failed: {e}")
|
|
|
|
def _init_elevenlabs(self):
|
|
try:
|
|
self.primary = ElevenLabsTTS()
|
|
except Exception as e:
|
|
print(f"ElevenLabs init failed: {e}")
|
|
|
|
def synthesize(self, text: str, output_path: str) -> str:
|
|
"""Synthesize with fallback."""
|
|
if self.primary:
|
|
try:
|
|
return self.primary.synthesize(text, output_path)
|
|
except Exception as e:
|
|
print(f"Primary failed: {e}")
|
|
|
|
raise RuntimeError("No TTS engine available")
|
|
|
|
|
|
def phase4_generate_audio(briefing_text: str, output_dir: str = "/tmp/deepdive",
|
|
prefer_cloud: bool = False) -> str:
|
|
"""Phase 4: Generate audio from briefing text."""
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
output_path = f"{output_dir}/deepdive_{timestamp}.mp3"
|
|
|
|
tts = HybridTTS(prefer_cloud=prefer_cloud)
|
|
return tts.synthesize(briefing_text, output_path)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Test
|
|
test_text = """
|
|
Good morning. This is your Deep Dive daily briefing for April 5th, 2026.
|
|
Three papers from arXiv caught our attention today.
|
|
First, researchers at Stanford propose a new method for efficient fine-tuning
|
|
of large language models using gradient checkpointing.
|
|
Second, a team from DeepMind releases a comprehensive survey on multi-agent
|
|
reinforcement learning in open-ended environments.
|
|
Third, an interesting approach to speculative decoding that promises 3x speedup
|
|
for transformer inference without quality degradation.
|
|
That concludes today's briefing. Stay sovereign.
|
|
"""
|
|
|
|
output = phase4_generate_audio(test_text)
|
|
print(f"Generated: {output}")
|