fix: propagate STT disable through shared transcription config

- add stt.enabled to the default user config - make transcription_tools respect the disabled flag globally - surface disabled state cleanly in voice mode diagnostics - add regression coverage for disabled STT provider selection
2026-03-14 22:09:59 -07:00
parent c36136084a
commit f8ceadbad0
4 changed files with 47 additions and 5 deletions
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -205,7 +205,8 @@ DEFAULT_CONFIG = {
    },
    
    "stt": {
-        "provider": "local",  # "local" (free, faster-whisper) | "openai" (Whisper API)
+        "enabled": True,
+        "provider": "local",  # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API)
        "local": {
            "model": "base",  # tiny, base, small, medium, large-v3
        },
@@ -284,7 +285,7 @@ DEFAULT_CONFIG = {
    },

    # Config schema version - bump this when adding new required fields
-    "_config_version": 7,
+    "_config_version": 8,
 }

 # =============================================================================
--- a/tests/tools/test_transcription.py
+++ b/tests/tools/test_transcription.py
@@ -59,6 +59,10 @@ class TestGetProvider:
            from tools.transcription_tools import _get_provider
            assert _get_provider({}) == "local"

+    def test_disabled_config_returns_none(self):
+        from tools.transcription_tools import _get_provider
+        assert _get_provider({"enabled": False, "provider": "openai"}) == "none"
+

 # ---------------------------------------------------------------------------
 # File validation
@@ -217,6 +221,18 @@ class TestTranscribeAudio:
        assert result["success"] is False
        assert "No STT provider" in result["error"]

+    def test_disabled_config_returns_disabled_error(self, tmp_path):
+        audio_file = tmp_path / "test.ogg"
+        audio_file.write_bytes(b"fake audio")
+
+        with patch("tools.transcription_tools._load_stt_config", return_value={"enabled": False}), \
+             patch("tools.transcription_tools._get_provider", return_value="none"):
+            from tools.transcription_tools import transcribe_audio
+            result = transcribe_audio(str(audio_file))
+
+        assert result["success"] is False
+        assert "disabled" in result["error"].lower()
+
    def test_invalid_file_returns_error(self):
        from tools.transcription_tools import transcribe_audio
        result = transcribe_audio("/nonexistent/file.ogg")
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -93,6 +93,18 @@ def _load_stt_config() -> dict:
        return {}


+def is_stt_enabled(stt_config: Optional[dict] = None) -> bool:
+    """Return whether STT is enabled in config."""
+    if stt_config is None:
+        stt_config = _load_stt_config()
+    enabled = stt_config.get("enabled", True)
+    if isinstance(enabled, str):
+        return enabled.strip().lower() in ("true", "1", "yes", "on")
+    if enabled is None:
+        return True
+    return bool(enabled)
+
+
 def _get_provider(stt_config: dict) -> str:
    """Determine which STT provider to use.

@@ -101,6 +113,9 @@ def _get_provider(stt_config: dict) -> str:
      2. Auto-detect: local > groq (free) > openai (paid)
      3. Disabled (returns "none")
    """
+    if not is_stt_enabled(stt_config):
+        return "none"
+
    provider = stt_config.get("provider", DEFAULT_PROVIDER)

    if provider == "local":
@@ -334,6 +349,13 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A

    # Load config and determine provider
    stt_config = _load_stt_config()
+    if not is_stt_enabled(stt_config):
+        return {
+            "success": False,
+            "transcript": "",
+            "error": "STT is disabled in config.yaml (stt.enabled: false).",
+        }
+
    provider = _get_provider(stt_config)

    if provider == "local":
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -703,10 +703,11 @@ def check_voice_requirements() -> Dict[str, Any]:
        ``missing_packages``, and ``details``.
    """
    # Determine STT provider availability
-    from tools.transcription_tools import _get_provider, _load_stt_config, _HAS_FASTER_WHISPER
+    from tools.transcription_tools import _get_provider, _load_stt_config, is_stt_enabled, _HAS_FASTER_WHISPER
    stt_config = _load_stt_config()
+    stt_enabled = is_stt_enabled(stt_config)
    stt_provider = _get_provider(stt_config)
-    stt_available = stt_provider != "none"
+    stt_available = stt_enabled and stt_provider != "none"

    missing: List[str] = []
    has_audio = _audio_available()
@@ -725,7 +726,9 @@ def check_voice_requirements() -> Dict[str, Any]:
    else:
        details_parts.append("Audio capture: MISSING (pip install sounddevice numpy)")

-    if stt_provider == "local":
+    if not stt_enabled:
+        details_parts.append("STT provider: DISABLED in config (stt.enabled: false)")
+    elif stt_provider == "local":
        details_parts.append("STT provider: OK (local faster-whisper)")
    elif stt_provider == "groq":
        details_parts.append("STT provider: OK (Groq)")