refactor(tts): replace NeuTTS optional skill with built-in provider + setup flow
Remove the optional skill (redundant now that NeuTTS is a built-in TTS provider). Replace neutts_cli dependency with a standalone synthesis helper (tools/neutts_synth.py) that calls the neutts Python API directly in a subprocess. Add TTS provider selection to hermes setup: - 'hermes setup' now prompts for TTS provider after model selection - 'hermes setup tts' available as standalone section - Selecting NeuTTS checks for deps and offers to install: espeak-ng (system) + neutts[all] (pip) - ElevenLabs/OpenAI selections prompt for API keys - Tool status display shows NeuTTS install state Changes: - Remove optional-skills/mlops/models/neutts/ (skill + CLI scaffold) - Add tools/neutts_synth.py (standalone synthesis subprocess helper) - Move jo.wav/jo.txt to tools/neutts_samples/ (bundled default voice) - Refactor _generate_neutts() — uses neutts API via subprocess, no neutts_cli dependency, config-driven ref_audio/ref_text/model/device - Add TTS setup to hermes_cli/setup.py (SETUP_SECTIONS, tool status) - Update config.py defaults (ref_audio, ref_text, model, device)
This commit is contained in:
@@ -256,7 +256,10 @@ DEFAULT_CONFIG = {
|
||||
# Voices: alloy, echo, fable, onyx, nova, shimmer
|
||||
},
|
||||
"neutts": {
|
||||
"voice": "", # NeuTTS voice profile name (empty = use default)
|
||||
"ref_audio": "", # Path to reference voice audio (empty = bundled default)
|
||||
"ref_text": "", # Path to reference voice transcript (empty = bundled default)
|
||||
"model": "neuphonic/neutts-air-q4-gguf", # HuggingFace model repo
|
||||
"device": "cpu", # cpu, cuda, or mps
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
@@ -479,6 +479,16 @@ def _print_setup_summary(config: dict, hermes_home):
|
||||
tool_status.append(("Text-to-Speech (ElevenLabs)", True, None))
|
||||
elif tts_provider == "openai" and get_env_value("VOICE_TOOLS_OPENAI_KEY"):
|
||||
tool_status.append(("Text-to-Speech (OpenAI)", True, None))
|
||||
elif tts_provider == "neutts":
|
||||
try:
|
||||
import importlib.util
|
||||
neutts_ok = importlib.util.find_spec("neutts") is not None
|
||||
except Exception:
|
||||
neutts_ok = False
|
||||
if neutts_ok:
|
||||
tool_status.append(("Text-to-Speech (NeuTTS local)", True, None))
|
||||
else:
|
||||
tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'"))
|
||||
else:
|
||||
tool_status.append(("Text-to-Speech (Edge TTS)", True, None))
|
||||
|
||||
@@ -1571,6 +1581,163 @@ def setup_model_provider(config: dict):
|
||||
|
||||
save_config(config)
|
||||
|
||||
# Offer TTS provider selection at the end of model setup
|
||||
_setup_tts_provider(config)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Section 1b: TTS Provider Configuration
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _check_espeak_ng() -> bool:
|
||||
"""Check if espeak-ng is installed."""
|
||||
import shutil
|
||||
return shutil.which("espeak-ng") is not None or shutil.which("espeak") is not None
|
||||
|
||||
|
||||
def _install_neutts_deps() -> bool:
|
||||
"""Install NeuTTS dependencies with user approval. Returns True on success."""
|
||||
import sys
|
||||
|
||||
# Check espeak-ng
|
||||
if not _check_espeak_ng():
|
||||
print()
|
||||
print_warning("NeuTTS requires espeak-ng for phonemization.")
|
||||
if sys.platform == "darwin":
|
||||
print_info("Install with: brew install espeak-ng")
|
||||
elif sys.platform == "win32":
|
||||
print_info("Install with: choco install espeak-ng")
|
||||
else:
|
||||
print_info("Install with: sudo apt install espeak-ng")
|
||||
print()
|
||||
if prompt_yes_no("Install espeak-ng now?", True):
|
||||
try:
|
||||
if sys.platform == "darwin":
|
||||
subprocess.run(["brew", "install", "espeak-ng"], check=True)
|
||||
elif sys.platform == "win32":
|
||||
subprocess.run(["choco", "install", "espeak-ng", "-y"], check=True)
|
||||
else:
|
||||
subprocess.run(["sudo", "apt", "install", "-y", "espeak-ng"], check=True)
|
||||
print_success("espeak-ng installed")
|
||||
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
||||
print_warning(f"Could not install espeak-ng automatically: {e}")
|
||||
print_info("Please install it manually and re-run setup.")
|
||||
return False
|
||||
else:
|
||||
print_warning("espeak-ng is required for NeuTTS. Install it manually before using NeuTTS.")
|
||||
|
||||
# Install neutts Python package
|
||||
print()
|
||||
print_info("Installing neutts Python package...")
|
||||
print_info("This will also download the TTS model (~300MB) on first use.")
|
||||
print()
|
||||
try:
|
||||
subprocess.run(
|
||||
[sys.executable, "-m", "pip", "install", "-U", "neutts[all]", "--quiet"],
|
||||
check=True, timeout=300,
|
||||
)
|
||||
print_success("neutts installed successfully")
|
||||
return True
|
||||
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
|
||||
print_error(f"Failed to install neutts: {e}")
|
||||
print_info("Try manually: pip install neutts[all]")
|
||||
return False
|
||||
|
||||
|
||||
def _setup_tts_provider(config: dict):
|
||||
"""Interactive TTS provider selection with install flow for NeuTTS."""
|
||||
tts_config = config.get("tts", {})
|
||||
current_provider = tts_config.get("provider", "edge")
|
||||
|
||||
provider_labels = {
|
||||
"edge": "Edge TTS",
|
||||
"elevenlabs": "ElevenLabs",
|
||||
"openai": "OpenAI TTS",
|
||||
"neutts": "NeuTTS",
|
||||
}
|
||||
current_label = provider_labels.get(current_provider, current_provider)
|
||||
|
||||
print()
|
||||
print_header("Text-to-Speech Provider (optional)")
|
||||
print_info(f"Current: {current_label}")
|
||||
print()
|
||||
|
||||
choices = [
|
||||
"Edge TTS (free, cloud-based, no setup needed)",
|
||||
"ElevenLabs (premium quality, needs API key)",
|
||||
"OpenAI TTS (good quality, needs API key)",
|
||||
"NeuTTS (local on-device, free, ~300MB model download)",
|
||||
f"Keep current ({current_label})",
|
||||
]
|
||||
idx = prompt_choice("Select TTS provider:", choices, len(choices) - 1)
|
||||
|
||||
if idx == 4: # Keep current
|
||||
return
|
||||
|
||||
providers = ["edge", "elevenlabs", "openai", "neutts"]
|
||||
selected = providers[idx]
|
||||
|
||||
if selected == "neutts":
|
||||
# Check if already installed
|
||||
try:
|
||||
import importlib.util
|
||||
already_installed = importlib.util.find_spec("neutts") is not None
|
||||
except Exception:
|
||||
already_installed = False
|
||||
|
||||
if already_installed:
|
||||
print_success("NeuTTS is already installed")
|
||||
else:
|
||||
print()
|
||||
print_info("NeuTTS requires:")
|
||||
print_info(" • Python package: neutts (~50MB install + ~300MB model on first use)")
|
||||
print_info(" • System package: espeak-ng (phonemizer)")
|
||||
print()
|
||||
if prompt_yes_no("Install NeuTTS dependencies now?", True):
|
||||
if not _install_neutts_deps():
|
||||
print_warning("NeuTTS installation incomplete. Falling back to Edge TTS.")
|
||||
selected = "edge"
|
||||
else:
|
||||
print_info("Skipping install. Set tts.provider to 'neutts' after installing manually.")
|
||||
selected = "edge"
|
||||
|
||||
elif selected == "elevenlabs":
|
||||
existing = get_env_value("ELEVENLABS_API_KEY")
|
||||
if not existing:
|
||||
print()
|
||||
api_key = prompt("ElevenLabs API key", password=True)
|
||||
if api_key:
|
||||
save_env_value("ELEVENLABS_API_KEY", api_key)
|
||||
print_success("ElevenLabs API key saved")
|
||||
else:
|
||||
print_warning("No API key provided. Falling back to Edge TTS.")
|
||||
selected = "edge"
|
||||
|
||||
elif selected == "openai":
|
||||
existing = get_env_value("VOICE_TOOLS_OPENAI_KEY")
|
||||
if not existing:
|
||||
print()
|
||||
api_key = prompt("OpenAI API key for TTS", password=True)
|
||||
if api_key:
|
||||
save_env_value("VOICE_TOOLS_OPENAI_KEY", api_key)
|
||||
print_success("OpenAI TTS API key saved")
|
||||
else:
|
||||
print_warning("No API key provided. Falling back to Edge TTS.")
|
||||
selected = "edge"
|
||||
|
||||
# Save the selection
|
||||
if "tts" not in config:
|
||||
config["tts"] = {}
|
||||
config["tts"]["provider"] = selected
|
||||
save_config(config)
|
||||
print_success(f"TTS provider set to: {provider_labels.get(selected, selected)}")
|
||||
|
||||
|
||||
def setup_tts(config: dict):
|
||||
"""Standalone TTS setup (for 'hermes setup tts')."""
|
||||
_setup_tts_provider(config)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Section 2: Terminal Backend Configuration
|
||||
@@ -2548,6 +2715,7 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool:
|
||||
|
||||
SETUP_SECTIONS = [
|
||||
("model", "Model & Provider", setup_model_provider),
|
||||
("tts", "Text-to-Speech", setup_tts),
|
||||
("terminal", "Terminal Backend", setup_terminal_backend),
|
||||
("gateway", "Messaging Platforms (Gateway)", setup_gateway),
|
||||
("tools", "Tools", setup_tools),
|
||||
|
||||
Reference in New Issue
Block a user