feat: add edge-tts as zero-cost voice output provider

- Add EdgeTTSAdapter to bin/deepdive_tts.py (provider key: "edge-tts")
  default voice: en-US-GuyNeural, no API key required
- Add EdgeTTS class to intelligence/deepdive/tts_engine.py
- Update HybridTTS to try edge-tts as fallback between piper and elevenlabs
- Add --voice-memo flag to bin/night_watch.py for spoken nightly reports
- Add edge-tts>=6.1.9 to requirements.txt
- Create docs/voice-output.md documenting all providers and fallback chain
- Add tests/test_edge_tts.py with 17 unit tests (all mocked, no network)

Fixes #1126

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Alexander Whitestone
2026-04-08 06:29:26 -04:00
parent a1c153c095
commit ef74536e33
6 changed files with 694 additions and 7 deletions

View File

@@ -152,17 +152,55 @@ class OpenAITTSAdapter:
return mp3_path
class EdgeTTSAdapter:
"""Zero-cost TTS using Microsoft Edge neural voices (no API key required).
Requires: pip install edge-tts>=6.1.9
Voices: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support
"""
DEFAULT_VOICE = "en-US-GuyNeural"
def __init__(self, config: TTSConfig):
self.config = config
self.voice = config.voice_id or self.DEFAULT_VOICE
def synthesize(self, text: str, output_path: Path) -> Path:
try:
import edge_tts
except ImportError:
raise RuntimeError("edge-tts not installed. Run: pip install edge-tts")
import asyncio
mp3_path = output_path.with_suffix(".mp3")
async def _run():
communicate = edge_tts.Communicate(text, self.voice)
await communicate.save(str(mp3_path))
asyncio.run(_run())
return mp3_path
ADAPTERS = {
"piper": PiperAdapter,
"elevenlabs": ElevenLabsAdapter,
"openai": OpenAITTSAdapter,
"edge-tts": EdgeTTSAdapter,
}
def get_provider_config() -> TTSConfig:
"""Load TTS configuration from environment."""
provider = os.environ.get("DEEPDIVE_TTS_PROVIDER", "openai")
voice = os.environ.get("DEEPDIVE_TTS_VOICE", "alloy" if provider == "openai" else "matthew")
if provider == "openai":
default_voice = "alloy"
elif provider == "edge-tts":
default_voice = EdgeTTSAdapter.DEFAULT_VOICE
else:
default_voice = "matthew"
voice = os.environ.get("DEEPDIVE_TTS_VOICE", default_voice)
return TTSConfig(
provider=provider,