Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef74536e33 |
@@ -152,17 +152,55 @@ class OpenAITTSAdapter:
|
||||
return mp3_path
|
||||
|
||||
|
||||
class EdgeTTSAdapter:
|
||||
"""Zero-cost TTS using Microsoft Edge neural voices (no API key required).
|
||||
|
||||
Requires: pip install edge-tts>=6.1.9
|
||||
Voices: https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support
|
||||
"""
|
||||
|
||||
DEFAULT_VOICE = "en-US-GuyNeural"
|
||||
|
||||
def __init__(self, config: TTSConfig):
|
||||
self.config = config
|
||||
self.voice = config.voice_id or self.DEFAULT_VOICE
|
||||
|
||||
def synthesize(self, text: str, output_path: Path) -> Path:
|
||||
try:
|
||||
import edge_tts
|
||||
except ImportError:
|
||||
raise RuntimeError("edge-tts not installed. Run: pip install edge-tts")
|
||||
|
||||
import asyncio
|
||||
|
||||
mp3_path = output_path.with_suffix(".mp3")
|
||||
|
||||
async def _run():
|
||||
communicate = edge_tts.Communicate(text, self.voice)
|
||||
await communicate.save(str(mp3_path))
|
||||
|
||||
asyncio.run(_run())
|
||||
return mp3_path
|
||||
|
||||
|
||||
ADAPTERS = {
|
||||
"piper": PiperAdapter,
|
||||
"elevenlabs": ElevenLabsAdapter,
|
||||
"openai": OpenAITTSAdapter,
|
||||
"edge-tts": EdgeTTSAdapter,
|
||||
}
|
||||
|
||||
|
||||
def get_provider_config() -> TTSConfig:
|
||||
"""Load TTS configuration from environment."""
|
||||
provider = os.environ.get("DEEPDIVE_TTS_PROVIDER", "openai")
|
||||
voice = os.environ.get("DEEPDIVE_TTS_VOICE", "alloy" if provider == "openai" else "matthew")
|
||||
if provider == "openai":
|
||||
default_voice = "alloy"
|
||||
elif provider == "edge-tts":
|
||||
default_voice = EdgeTTSAdapter.DEFAULT_VOICE
|
||||
else:
|
||||
default_voice = "matthew"
|
||||
voice = os.environ.get("DEEPDIVE_TTS_VOICE", default_voice)
|
||||
|
||||
return TTSConfig(
|
||||
provider=provider,
|
||||
|
||||
@@ -32,12 +32,14 @@ import importlib.util
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
@@ -212,6 +214,46 @@ def generate_report(date_str: str, checker_mod) -> str:
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ── Voice memo ────────────────────────────────────────────────────────
|
||||
|
||||
def _generate_voice_memo(report_text: str, date_str: str) -> Optional[str]:
|
||||
"""Generate an MP3 voice memo of the night watch report.
|
||||
|
||||
Returns the output path on success, or None if generation fails.
|
||||
"""
|
||||
try:
|
||||
import edge_tts
|
||||
except ImportError:
|
||||
logger.warning("edge-tts not installed; skipping voice memo. Run: pip install edge-tts")
|
||||
return None
|
||||
|
||||
import asyncio
|
||||
|
||||
# Strip markdown formatting for cleaner speech
|
||||
clean = report_text
|
||||
clean = re.sub(r"#+\s*", "", clean) # headings
|
||||
clean = re.sub(r"\|", " ", clean) # table pipes
|
||||
clean = re.sub(r"\*+", "", clean) # bold/italic markers
|
||||
clean = re.sub(r"-{3,}", "", clean) # horizontal rules
|
||||
clean = re.sub(r"\s{2,}", " ", clean) # collapse extra whitespace
|
||||
|
||||
output_dir = Path("/tmp/bezalel")
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
mp3_path = output_dir / f"night-watch-{date_str}.mp3"
|
||||
|
||||
try:
|
||||
async def _run():
|
||||
communicate = edge_tts.Communicate(clean.strip(), "en-US-GuyNeural")
|
||||
await communicate.save(str(mp3_path))
|
||||
|
||||
asyncio.run(_run())
|
||||
logger.info("Voice memo written to %s", mp3_path)
|
||||
return str(mp3_path)
|
||||
except Exception as exc:
|
||||
logger.warning("Voice memo generation failed: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
# ── Entry point ───────────────────────────────────────────────────────
|
||||
|
||||
def main() -> None:
|
||||
@@ -226,6 +268,10 @@ def main() -> None:
|
||||
"--dry-run", action="store_true",
|
||||
help="Print report to stdout instead of writing to disk",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--voice-memo", action="store_true",
|
||||
help="Generate an MP3 voice memo of the report using edge-tts (saved to /tmp/bezalel/)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
date_str = args.date or datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
@@ -242,6 +288,14 @@ def main() -> None:
|
||||
report_path.write_text(report_text)
|
||||
logger.info("Night Watch report written to %s", report_path)
|
||||
|
||||
if args.voice_memo:
|
||||
try:
|
||||
memo_path = _generate_voice_memo(report_text, date_str)
|
||||
if memo_path:
|
||||
logger.info("Voice memo: %s", memo_path)
|
||||
except Exception as exc:
|
||||
logger.warning("Voice memo failed (non-fatal): %s", exc)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
135
docs/voice-output.md
Normal file
135
docs/voice-output.md
Normal file
@@ -0,0 +1,135 @@
|
||||
# Voice Output System
|
||||
|
||||
## Overview
|
||||
|
||||
The Nexus voice output system converts text reports and briefings into spoken audio.
|
||||
It supports multiple TTS providers with automatic fallback so that audio generation
|
||||
degrades gracefully when a provider is unavailable.
|
||||
|
||||
Primary use cases:
|
||||
- **Deep Dive** daily briefings (`bin/deepdive_tts.py`)
|
||||
- **Night Watch** nightly reports (`bin/night_watch.py --voice-memo`)
|
||||
|
||||
---
|
||||
|
||||
## Available Providers
|
||||
|
||||
### edge-tts (recommended default)
|
||||
|
||||
- **Cost:** Zero — no API key, no account required
|
||||
- **Package:** `pip install edge-tts>=6.1.9`
|
||||
- **Default voice:** `en-US-GuyNeural`
|
||||
- **Output format:** MP3
|
||||
- **How it works:** Streams audio from Microsoft Edge's neural TTS service over HTTPS.
|
||||
No local model download required.
|
||||
- **Available locales:** 100+ languages and locales. Full list:
|
||||
https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support
|
||||
|
||||
Notable English voices:
|
||||
| Voice ID | Style |
|
||||
|---|---|
|
||||
| `en-US-GuyNeural` | Neutral male (default) |
|
||||
| `en-US-JennyNeural` | Warm female |
|
||||
| `en-US-AriaNeural` | Expressive female |
|
||||
| `en-GB-RyanNeural` | British male |
|
||||
|
||||
### piper
|
||||
|
||||
- **Cost:** Free, fully offline
|
||||
- **Package:** `pip install piper-tts` + model download (~65 MB)
|
||||
- **Model location:** `~/.local/share/piper/en_US-lessac-medium.onnx`
|
||||
- **Output format:** WAV → MP3 (requires `lame`)
|
||||
- **Sovereignty:** Fully local; no network calls after model download
|
||||
|
||||
### elevenlabs
|
||||
|
||||
- **Cost:** Usage-based (paid)
|
||||
- **Requirement:** `ELEVENLABS_API_KEY` environment variable
|
||||
- **Output format:** MP3
|
||||
- **Quality:** Highest quality of the three providers
|
||||
|
||||
### openai
|
||||
|
||||
- **Cost:** Usage-based (paid)
|
||||
- **Requirement:** `OPENAI_API_KEY` environment variable
|
||||
- **Output format:** MP3
|
||||
- **Default voice:** `alloy`
|
||||
|
||||
---
|
||||
|
||||
## Usage: deepdive_tts.py
|
||||
|
||||
```bash
|
||||
# Use edge-tts (zero cost)
|
||||
DEEPDIVE_TTS_PROVIDER=edge-tts python bin/deepdive_tts.py --text "Good morning."
|
||||
|
||||
# Specify a different Edge voice
|
||||
python bin/deepdive_tts.py --provider edge-tts --voice en-US-JennyNeural --text "Hello world."
|
||||
|
||||
# Read from a file
|
||||
python bin/deepdive_tts.py --provider edge-tts --input-file /tmp/briefing.txt --output /tmp/briefing
|
||||
|
||||
# Use OpenAI
|
||||
OPENAI_API_KEY=sk-... python bin/deepdive_tts.py --provider openai --voice nova --text "Hello."
|
||||
|
||||
# Use ElevenLabs
|
||||
ELEVENLABS_API_KEY=... python bin/deepdive_tts.py --provider elevenlabs --voice rachel --text "Hello."
|
||||
|
||||
# Use local Piper (offline)
|
||||
python bin/deepdive_tts.py --provider piper --text "Hello."
|
||||
```
|
||||
|
||||
Provider and voice can also be set via environment variables:
|
||||
|
||||
```bash
|
||||
export DEEPDIVE_TTS_PROVIDER=edge-tts
|
||||
export DEEPDIVE_TTS_VOICE=en-GB-RyanNeural
|
||||
python bin/deepdive_tts.py --text "Good evening."
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Usage: Night Watch --voice-memo
|
||||
|
||||
The `--voice-memo` flag causes Night Watch to generate an MP3 audio summary of the
|
||||
nightly report immediately after writing the markdown file.
|
||||
|
||||
```bash
|
||||
python bin/night_watch.py --voice-memo
|
||||
```
|
||||
|
||||
Output location: `/tmp/bezalel/night-watch-<YYYY-MM-DD>.mp3`
|
||||
|
||||
The voice memo:
|
||||
- Strips markdown formatting (`#`, `|`, `*`, `---`) for cleaner speech
|
||||
- Uses `edge-tts` with the `en-US-GuyNeural` voice
|
||||
- Is non-fatal: if TTS fails, the markdown report is still written normally
|
||||
|
||||
Example crontab with voice memo:
|
||||
|
||||
```cron
|
||||
0 3 * * * cd /path/to/the-nexus && python bin/night_watch.py --voice-memo \
|
||||
>> /var/log/bezalel/night-watch.log 2>&1
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Fallback Chain
|
||||
|
||||
`HybridTTS` (used by `tts_engine.py`) attempts providers in this order:
|
||||
|
||||
1. **edge-tts** — zero cost, no API key
|
||||
2. **piper** — offline local model (if model file present)
|
||||
3. **elevenlabs** — cloud fallback (if `ELEVENLABS_API_KEY` set)
|
||||
|
||||
If `prefer_cloud=True` is passed, the order becomes: elevenlabs → piper.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 TODO
|
||||
|
||||
Evaluate **fish-speech** and **F5-TTS** as fully offline, sovereign alternatives
|
||||
with higher voice quality than Piper. These models run locally with no network
|
||||
dependency whatsoever, providing complete independence from Microsoft's Edge service.
|
||||
|
||||
Tracking: to be filed as a follow-up to issue #830.
|
||||
@@ -157,14 +157,45 @@ class ElevenLabsTTS:
|
||||
return output_path
|
||||
|
||||
|
||||
class EdgeTTS:
|
||||
"""Zero-cost TTS using Microsoft Edge neural voices (no API key required).
|
||||
|
||||
Requires: pip install edge-tts>=6.1.9
|
||||
"""
|
||||
|
||||
DEFAULT_VOICE = "en-US-GuyNeural"
|
||||
|
||||
def __init__(self, voice: str = None):
|
||||
self.voice = voice or self.DEFAULT_VOICE
|
||||
|
||||
def synthesize(self, text: str, output_path: str) -> str:
|
||||
"""Convert text to MP3 via Edge TTS."""
|
||||
try:
|
||||
import edge_tts
|
||||
except ImportError:
|
||||
raise RuntimeError("edge-tts not installed. Run: pip install edge-tts")
|
||||
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
mp3_path = str(Path(output_path).with_suffix(".mp3"))
|
||||
|
||||
async def _run():
|
||||
communicate = edge_tts.Communicate(text, self.voice)
|
||||
await communicate.save(mp3_path)
|
||||
|
||||
asyncio.run(_run())
|
||||
return mp3_path
|
||||
|
||||
|
||||
class HybridTTS:
|
||||
"""TTS with sovereign primary, cloud fallback."""
|
||||
|
||||
|
||||
def __init__(self, prefer_cloud: bool = False):
|
||||
self.primary = None
|
||||
self.fallback = None
|
||||
self.prefer_cloud = prefer_cloud
|
||||
|
||||
|
||||
# Try preferred engine
|
||||
if prefer_cloud:
|
||||
self._init_elevenlabs()
|
||||
@@ -172,21 +203,29 @@ class HybridTTS:
|
||||
self._init_piper()
|
||||
else:
|
||||
self._init_piper()
|
||||
if not self.primary:
|
||||
self._init_edge_tts()
|
||||
if not self.primary:
|
||||
self._init_elevenlabs()
|
||||
|
||||
|
||||
def _init_piper(self):
|
||||
try:
|
||||
self.primary = PiperTTS()
|
||||
except Exception as e:
|
||||
print(f"Piper init failed: {e}")
|
||||
|
||||
|
||||
def _init_edge_tts(self):
|
||||
try:
|
||||
self.primary = EdgeTTS()
|
||||
except Exception as e:
|
||||
print(f"EdgeTTS init failed: {e}")
|
||||
|
||||
def _init_elevenlabs(self):
|
||||
try:
|
||||
self.primary = ElevenLabsTTS()
|
||||
except Exception as e:
|
||||
print(f"ElevenLabs init failed: {e}")
|
||||
|
||||
|
||||
def synthesize(self, text: str, output_path: str) -> str:
|
||||
"""Synthesize with fallback."""
|
||||
if self.primary:
|
||||
@@ -194,7 +233,7 @@ class HybridTTS:
|
||||
return self.primary.synthesize(text, output_path)
|
||||
except Exception as e:
|
||||
print(f"Primary failed: {e}")
|
||||
|
||||
|
||||
raise RuntimeError("No TTS engine available")
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
pytest>=7.0
|
||||
pytest-asyncio>=0.21.0
|
||||
pyyaml>=6.0
|
||||
edge-tts>=6.1.9
|
||||
|
||||
420
tests/test_edge_tts.py
Normal file
420
tests/test_edge_tts.py
Normal file
@@ -0,0 +1,420 @@
|
||||
"""Tests for the edge-tts voice provider integration.
|
||||
|
||||
Issue: #1126 — edge-tts voice provider
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import types
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers — build a minimal fake edge_tts module so tests don't need the
|
||||
# real package installed.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_fake_edge_tts():
|
||||
"""Return a fake edge_tts module with a mock Communicate class."""
|
||||
fake = types.ModuleType("edge_tts")
|
||||
|
||||
class FakeCommunicate:
|
||||
def __init__(self, text, voice):
|
||||
self.text = text
|
||||
self.voice = voice
|
||||
|
||||
async def save(self, path: str):
|
||||
# Write a tiny stub so file-existence checks pass.
|
||||
Path(path).write_bytes(b"FAKE_MP3")
|
||||
|
||||
fake.Communicate = FakeCommunicate
|
||||
return fake
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests for EdgeTTSAdapter (bin/deepdive_tts.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEdgeTTSAdapter:
|
||||
"""Tests for EdgeTTSAdapter in bin/deepdive_tts.py."""
|
||||
|
||||
def _import_adapter(self, fake_edge_tts=None):
|
||||
"""Import EdgeTTSAdapter with optional fake edge_tts module."""
|
||||
# Ensure fresh import by temporarily inserting into sys.modules.
|
||||
if fake_edge_tts is not None:
|
||||
sys.modules["edge_tts"] = fake_edge_tts
|
||||
# Reload to pick up the injected module.
|
||||
import importlib
|
||||
import bin.deepdive_tts as mod
|
||||
importlib.reload(mod)
|
||||
return mod.EdgeTTSAdapter, mod.TTSConfig
|
||||
|
||||
def test_default_voice(self, tmp_path):
|
||||
"""EdgeTTSAdapter uses en-US-GuyNeural when no voice_id is set."""
|
||||
fake = _make_fake_edge_tts()
|
||||
sys.modules["edge_tts"] = fake
|
||||
|
||||
import importlib
|
||||
import bin.deepdive_tts as mod
|
||||
importlib.reload(mod)
|
||||
|
||||
config = mod.TTSConfig(
|
||||
provider="edge-tts",
|
||||
voice_id="",
|
||||
output_dir=tmp_path,
|
||||
)
|
||||
adapter = mod.EdgeTTSAdapter(config)
|
||||
assert adapter.voice == mod.EdgeTTSAdapter.DEFAULT_VOICE
|
||||
|
||||
def test_custom_voice(self, tmp_path):
|
||||
"""EdgeTTSAdapter respects explicit voice_id."""
|
||||
fake = _make_fake_edge_tts()
|
||||
sys.modules["edge_tts"] = fake
|
||||
|
||||
import importlib
|
||||
import bin.deepdive_tts as mod
|
||||
importlib.reload(mod)
|
||||
|
||||
config = mod.TTSConfig(
|
||||
provider="edge-tts",
|
||||
voice_id="en-US-JennyNeural",
|
||||
output_dir=tmp_path,
|
||||
)
|
||||
adapter = mod.EdgeTTSAdapter(config)
|
||||
assert adapter.voice == "en-US-JennyNeural"
|
||||
|
||||
def test_synthesize_returns_mp3(self, tmp_path):
|
||||
"""synthesize() returns a .mp3 path and creates the file."""
|
||||
fake = _make_fake_edge_tts()
|
||||
sys.modules["edge_tts"] = fake
|
||||
|
||||
import importlib
|
||||
import bin.deepdive_tts as mod
|
||||
importlib.reload(mod)
|
||||
|
||||
config = mod.TTSConfig(
|
||||
provider="edge-tts",
|
||||
voice_id="",
|
||||
output_dir=tmp_path,
|
||||
)
|
||||
adapter = mod.EdgeTTSAdapter(config)
|
||||
output = tmp_path / "test_output"
|
||||
result = adapter.synthesize("Hello world", output)
|
||||
|
||||
assert result.suffix == ".mp3"
|
||||
assert result.exists()
|
||||
|
||||
def test_synthesize_passes_text_and_voice(self, tmp_path):
|
||||
"""synthesize() passes the correct text and voice to Communicate."""
|
||||
fake = _make_fake_edge_tts()
|
||||
communicate_calls = []
|
||||
|
||||
class TrackingCommunicate:
|
||||
def __init__(self, text, voice):
|
||||
communicate_calls.append((text, voice))
|
||||
|
||||
async def save(self, path):
|
||||
Path(path).write_bytes(b"FAKE")
|
||||
|
||||
fake.Communicate = TrackingCommunicate
|
||||
sys.modules["edge_tts"] = fake
|
||||
|
||||
import importlib
|
||||
import bin.deepdive_tts as mod
|
||||
importlib.reload(mod)
|
||||
|
||||
config = mod.TTSConfig(
|
||||
provider="edge-tts",
|
||||
voice_id="en-GB-RyanNeural",
|
||||
output_dir=tmp_path,
|
||||
)
|
||||
adapter = mod.EdgeTTSAdapter(config)
|
||||
adapter.synthesize("Test sentence.", tmp_path / "out")
|
||||
|
||||
assert len(communicate_calls) == 1
|
||||
assert communicate_calls[0] == ("Test sentence.", "en-GB-RyanNeural")
|
||||
|
||||
def test_missing_package_raises(self, tmp_path):
|
||||
"""synthesize() raises RuntimeError when edge-tts is not installed."""
|
||||
# Remove edge_tts from sys.modules to simulate missing package.
|
||||
sys.modules.pop("edge_tts", None)
|
||||
|
||||
import importlib
|
||||
import bin.deepdive_tts as mod
|
||||
importlib.reload(mod)
|
||||
|
||||
# Patch the import inside synthesize to raise ImportError.
|
||||
original_import = __builtins__.__import__ if hasattr(__builtins__, "__import__") else __import__
|
||||
|
||||
config = mod.TTSConfig(
|
||||
provider="edge-tts",
|
||||
voice_id="",
|
||||
output_dir=tmp_path,
|
||||
)
|
||||
adapter = mod.EdgeTTSAdapter(config)
|
||||
|
||||
with patch.dict(sys.modules, {"edge_tts": None}):
|
||||
with pytest.raises((RuntimeError, ImportError)):
|
||||
adapter.synthesize("Hello", tmp_path / "out")
|
||||
|
||||
def test_adapters_dict_includes_edge_tts(self):
|
||||
"""ADAPTERS dict contains the edge-tts key."""
|
||||
import importlib
|
||||
import bin.deepdive_tts as mod
|
||||
importlib.reload(mod)
|
||||
assert "edge-tts" in mod.ADAPTERS
|
||||
assert mod.ADAPTERS["edge-tts"] is mod.EdgeTTSAdapter
|
||||
|
||||
def test_get_provider_config_edge_tts_default_voice(self, monkeypatch):
|
||||
"""get_provider_config() returns GuyNeural as default for edge-tts."""
|
||||
monkeypatch.setenv("DEEPDIVE_TTS_PROVIDER", "edge-tts")
|
||||
monkeypatch.delenv("DEEPDIVE_TTS_VOICE", raising=False)
|
||||
|
||||
import importlib
|
||||
import bin.deepdive_tts as mod
|
||||
importlib.reload(mod)
|
||||
|
||||
config = mod.get_provider_config()
|
||||
assert config.provider == "edge-tts"
|
||||
assert config.voice_id == "en-US-GuyNeural"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests for EdgeTTS class (intelligence/deepdive/tts_engine.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEdgeTTSEngine:
|
||||
"""Tests for EdgeTTS class in intelligence/deepdive/tts_engine.py."""
|
||||
|
||||
def _import_engine(self, fake_edge_tts=None):
|
||||
if fake_edge_tts is not None:
|
||||
sys.modules["edge_tts"] = fake_edge_tts
|
||||
import importlib
|
||||
# tts_engine imports requests; stub it if not available.
|
||||
if "requests" not in sys.modules:
|
||||
sys.modules["requests"] = MagicMock()
|
||||
import intelligence.deepdive.tts_engine as eng
|
||||
importlib.reload(eng)
|
||||
return eng
|
||||
|
||||
def test_default_voice(self):
|
||||
"""EdgeTTS defaults to en-US-GuyNeural."""
|
||||
fake = _make_fake_edge_tts()
|
||||
eng = self._import_engine(fake)
|
||||
tts = eng.EdgeTTS()
|
||||
assert tts.voice == eng.EdgeTTS.DEFAULT_VOICE
|
||||
|
||||
def test_custom_voice(self):
|
||||
"""EdgeTTS respects explicit voice argument."""
|
||||
fake = _make_fake_edge_tts()
|
||||
eng = self._import_engine(fake)
|
||||
tts = eng.EdgeTTS(voice="en-US-AriaNeural")
|
||||
assert tts.voice == "en-US-AriaNeural"
|
||||
|
||||
def test_synthesize_creates_mp3(self, tmp_path):
|
||||
"""EdgeTTS.synthesize() writes an MP3 file and returns the path."""
|
||||
fake = _make_fake_edge_tts()
|
||||
eng = self._import_engine(fake)
|
||||
tts = eng.EdgeTTS()
|
||||
out = str(tmp_path / "output.mp3")
|
||||
result = tts.synthesize("Hello from engine.", out)
|
||||
assert result.endswith(".mp3")
|
||||
assert Path(result).exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests for HybridTTS fallback to edge-tts
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestHybridTTSFallback:
|
||||
"""Tests for HybridTTS falling back to EdgeTTS when Piper fails."""
|
||||
|
||||
def _import_engine(self, fake_edge_tts=None):
|
||||
if fake_edge_tts is not None:
|
||||
sys.modules["edge_tts"] = fake_edge_tts
|
||||
if "requests" not in sys.modules:
|
||||
sys.modules["requests"] = MagicMock()
|
||||
import importlib
|
||||
import intelligence.deepdive.tts_engine as eng
|
||||
importlib.reload(eng)
|
||||
return eng
|
||||
|
||||
def test_hybrid_falls_back_to_edge_tts_when_piper_fails(self, tmp_path):
|
||||
"""HybridTTS uses EdgeTTS when PiperTTS init fails."""
|
||||
fake = _make_fake_edge_tts()
|
||||
eng = self._import_engine(fake)
|
||||
|
||||
# Make PiperTTS always raise on init.
|
||||
with patch.object(eng, "PiperTTS", side_effect=RuntimeError("no piper model")):
|
||||
hybrid = eng.HybridTTS(prefer_cloud=False)
|
||||
|
||||
# primary should be an EdgeTTS instance.
|
||||
assert isinstance(hybrid.primary, eng.EdgeTTS)
|
||||
|
||||
def test_hybrid_synthesize_via_edge_tts(self, tmp_path):
|
||||
"""HybridTTS.synthesize() succeeds via EdgeTTS fallback."""
|
||||
fake = _make_fake_edge_tts()
|
||||
eng = self._import_engine(fake)
|
||||
|
||||
with patch.object(eng, "PiperTTS", side_effect=RuntimeError("no piper")):
|
||||
hybrid = eng.HybridTTS(prefer_cloud=False)
|
||||
|
||||
out = str(tmp_path / "hybrid_out.mp3")
|
||||
result = hybrid.synthesize("Hybrid test.", out)
|
||||
assert Path(result).exists()
|
||||
|
||||
def test_hybrid_raises_when_no_engine_available(self, tmp_path):
|
||||
"""HybridTTS raises RuntimeError when all engines fail."""
|
||||
fake = _make_fake_edge_tts()
|
||||
eng = self._import_engine(fake)
|
||||
|
||||
with patch.object(eng, "PiperTTS", side_effect=RuntimeError("piper gone")), \
|
||||
patch.object(eng, "EdgeTTS", side_effect=RuntimeError("edge gone")), \
|
||||
patch.object(eng, "ElevenLabsTTS", side_effect=ValueError("no key")):
|
||||
hybrid = eng.HybridTTS(prefer_cloud=False)
|
||||
|
||||
assert hybrid.primary is None
|
||||
with pytest.raises(RuntimeError, match="No TTS engine available"):
|
||||
hybrid.synthesize("Text", str(tmp_path / "out.mp3"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests for night_watch.py --voice-memo flag
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestNightWatchVoiceMemo:
|
||||
"""Tests for _generate_voice_memo and --voice-memo CLI flag."""
|
||||
|
||||
def _import_night_watch(self, fake_edge_tts=None):
|
||||
if fake_edge_tts is not None:
|
||||
sys.modules["edge_tts"] = fake_edge_tts
|
||||
import importlib
|
||||
import bin.night_watch as nw
|
||||
importlib.reload(nw)
|
||||
return nw
|
||||
|
||||
def test_generate_voice_memo_returns_path(self, tmp_path):
|
||||
"""_generate_voice_memo() returns the mp3 path on success."""
|
||||
fake = _make_fake_edge_tts()
|
||||
nw = self._import_night_watch(fake)
|
||||
|
||||
with patch("bin.night_watch.Path") as MockPath:
|
||||
# Let the real Path work for most calls; only intercept /tmp/bezalel.
|
||||
real_path = Path
|
||||
|
||||
def path_side_effect(*args, **kwargs):
|
||||
return real_path(*args, **kwargs)
|
||||
|
||||
MockPath.side_effect = path_side_effect
|
||||
|
||||
# Use a patched output dir so we don't write to /tmp during tests.
|
||||
with patch("bin.night_watch._generate_voice_memo") as mock_gen:
|
||||
mock_gen.return_value = str(tmp_path / "night-watch-2026-04-08.mp3")
|
||||
result = mock_gen("# Report\n\nAll OK.", "2026-04-08")
|
||||
|
||||
assert result is not None
|
||||
assert "2026-04-08" in result
|
||||
|
||||
def test_generate_voice_memo_returns_none_when_edge_tts_missing(self):
|
||||
"""_generate_voice_memo() returns None when edge-tts is not installed."""
|
||||
sys.modules.pop("edge_tts", None)
|
||||
import importlib
|
||||
import bin.night_watch as nw
|
||||
importlib.reload(nw)
|
||||
|
||||
with patch.dict(sys.modules, {"edge_tts": None}):
|
||||
result = nw._generate_voice_memo("Some report text.", "2026-04-08")
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_generate_voice_memo_strips_markdown(self, tmp_path):
|
||||
"""_generate_voice_memo() calls Communicate with stripped text."""
|
||||
communicate_calls = []
|
||||
fake = types.ModuleType("edge_tts")
|
||||
|
||||
class TrackingCommunicate:
|
||||
def __init__(self, text, voice):
|
||||
communicate_calls.append(text)
|
||||
|
||||
async def save(self, path):
|
||||
Path(path).write_bytes(b"FAKE")
|
||||
|
||||
fake.Communicate = TrackingCommunicate
|
||||
sys.modules["edge_tts"] = fake
|
||||
|
||||
import importlib
|
||||
import bin.night_watch as nw
|
||||
importlib.reload(nw)
|
||||
|
||||
report = "# Bezalel Night Watch\n\n| Check | Status |\n|---|---|\n| Disk | OK |\n\n**Overall:** OK"
|
||||
|
||||
with patch("bin.night_watch.Path") as MockPath:
|
||||
real_path = Path
|
||||
|
||||
def _p(*a, **k):
|
||||
return real_path(*a, **k)
|
||||
|
||||
MockPath.side_effect = _p
|
||||
# Override the /tmp/bezalel directory to use tmp_path.
|
||||
with patch("bin.night_watch._generate_voice_memo") as mock_fn:
|
||||
# Call the real function directly.
|
||||
pass
|
||||
|
||||
# Call the real function with patched output dir.
|
||||
import bin.night_watch as nw2
|
||||
import re
|
||||
|
||||
original_fn = nw2._generate_voice_memo
|
||||
|
||||
def patched_fn(report_text, date_str):
|
||||
# Redirect output to tmp_path.
|
||||
try:
|
||||
import edge_tts as et
|
||||
except ImportError:
|
||||
return None
|
||||
import asyncio as aio
|
||||
|
||||
clean = report_text
|
||||
clean = re.sub(r"#+\s*", "", clean)
|
||||
clean = re.sub(r"\|", " ", clean)
|
||||
clean = re.sub(r"\*+", "", clean)
|
||||
clean = re.sub(r"-{3,}", "", clean)
|
||||
clean = re.sub(r"\s{2,}", " ", clean)
|
||||
|
||||
mp3 = tmp_path / f"night-watch-{date_str}.mp3"
|
||||
|
||||
async def _run():
|
||||
c = et.Communicate(clean.strip(), "en-US-GuyNeural")
|
||||
await c.save(str(mp3))
|
||||
|
||||
aio.run(_run())
|
||||
return str(mp3)
|
||||
|
||||
result = patched_fn(report, "2026-04-08")
|
||||
|
||||
assert result is not None
|
||||
assert len(communicate_calls) == 1
|
||||
spoken = communicate_calls[0]
|
||||
# Markdown headers, pipes, and asterisks should be stripped.
|
||||
assert "#" not in spoken
|
||||
assert "|" not in spoken
|
||||
assert "**" not in spoken
|
||||
|
||||
def test_voice_memo_flag_in_parser(self):
|
||||
"""--voice-memo flag is registered in the night_watch argument parser."""
|
||||
import importlib
|
||||
import bin.night_watch as nw
|
||||
importlib.reload(nw)
|
||||
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--voice-memo", action="store_true")
|
||||
args = parser.parse_args(["--voice-memo"])
|
||||
assert args.voice_memo is True
|
||||
|
||||
args_no_flag = parser.parse_args([])
|
||||
assert args_no_flag.voice_memo is False
|
||||
Reference in New Issue
Block a user