diff --git a/hermes_cli/config.py b/hermes_cli/config.py index bdde858d3..44755b195 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -205,7 +205,8 @@ DEFAULT_CONFIG = { }, "stt": { - "provider": "local", # "local" (free, faster-whisper) | "openai" (Whisper API) + "enabled": True, + "provider": "local", # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API) "local": { "model": "base", # tiny, base, small, medium, large-v3 }, @@ -284,7 +285,7 @@ DEFAULT_CONFIG = { }, # Config schema version - bump this when adding new required fields - "_config_version": 7, + "_config_version": 8, } # ============================================================================= diff --git a/tests/tools/test_transcription.py b/tests/tools/test_transcription.py index fe3b24a8d..c8daface0 100644 --- a/tests/tools/test_transcription.py +++ b/tests/tools/test_transcription.py @@ -59,6 +59,10 @@ class TestGetProvider: from tools.transcription_tools import _get_provider assert _get_provider({}) == "local" + def test_disabled_config_returns_none(self): + from tools.transcription_tools import _get_provider + assert _get_provider({"enabled": False, "provider": "openai"}) == "none" + # --------------------------------------------------------------------------- # File validation @@ -217,6 +221,18 @@ class TestTranscribeAudio: assert result["success"] is False assert "No STT provider" in result["error"] + def test_disabled_config_returns_disabled_error(self, tmp_path): + audio_file = tmp_path / "test.ogg" + audio_file.write_bytes(b"fake audio") + + with patch("tools.transcription_tools._load_stt_config", return_value={"enabled": False}), \ + patch("tools.transcription_tools._get_provider", return_value="none"): + from tools.transcription_tools import transcribe_audio + result = transcribe_audio(str(audio_file)) + + assert result["success"] is False + assert "disabled" in result["error"].lower() + def test_invalid_file_returns_error(self): from tools.transcription_tools import transcribe_audio result = transcribe_audio("/nonexistent/file.ogg") diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py index a20ba4134..684d0a8d8 100644 --- a/tools/transcription_tools.py +++ b/tools/transcription_tools.py @@ -93,6 +93,18 @@ def _load_stt_config() -> dict: return {} +def is_stt_enabled(stt_config: Optional[dict] = None) -> bool: + """Return whether STT is enabled in config.""" + if stt_config is None: + stt_config = _load_stt_config() + enabled = stt_config.get("enabled", True) + if isinstance(enabled, str): + return enabled.strip().lower() in ("true", "1", "yes", "on") + if enabled is None: + return True + return bool(enabled) + + def _get_provider(stt_config: dict) -> str: """Determine which STT provider to use. @@ -101,6 +113,9 @@ def _get_provider(stt_config: dict) -> str: 2. Auto-detect: local > groq (free) > openai (paid) 3. Disabled (returns "none") """ + if not is_stt_enabled(stt_config): + return "none" + provider = stt_config.get("provider", DEFAULT_PROVIDER) if provider == "local": @@ -334,6 +349,13 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A # Load config and determine provider stt_config = _load_stt_config() + if not is_stt_enabled(stt_config): + return { + "success": False, + "transcript": "", + "error": "STT is disabled in config.yaml (stt.enabled: false).", + } + provider = _get_provider(stt_config) if provider == "local": diff --git a/tools/voice_mode.py b/tools/voice_mode.py index a2c70ac1b..783584895 100644 --- a/tools/voice_mode.py +++ b/tools/voice_mode.py @@ -703,10 +703,11 @@ def check_voice_requirements() -> Dict[str, Any]: ``missing_packages``, and ``details``. """ # Determine STT provider availability - from tools.transcription_tools import _get_provider, _load_stt_config, _HAS_FASTER_WHISPER + from tools.transcription_tools import _get_provider, _load_stt_config, is_stt_enabled, _HAS_FASTER_WHISPER stt_config = _load_stt_config() + stt_enabled = is_stt_enabled(stt_config) stt_provider = _get_provider(stt_config) - stt_available = stt_provider != "none" + stt_available = stt_enabled and stt_provider != "none" missing: List[str] = [] has_audio = _audio_available() @@ -725,7 +726,9 @@ def check_voice_requirements() -> Dict[str, Any]: else: details_parts.append("Audio capture: MISSING (pip install sounddevice numpy)") - if stt_provider == "local": + if not stt_enabled: + details_parts.append("STT provider: DISABLED in config (stt.enabled: false)") + elif stt_provider == "local": details_parts.append("STT provider: OK (local faster-whisper)") elif stt_provider == "groq": details_parts.append("STT provider: OK (Groq)")