From c36136084a86a37cf6abee7ffe98301d3d780d03 Mon Sep 17 00:00:00 2001
From: teyrebaz33 <hakanerten02@hotmail.com>
Date: Sat, 14 Mar 2026 22:09:53 -0700
Subject: [PATCH 1/2] fix(gateway): honor stt.enabled false for voice
 transcription

- bridge stt.enabled from config.yaml into gateway runtime config
- preserve the flag in GatewayConfig serialization
- skip gateway voice transcription when STT is disabled
- add regression tests for config loading and disabled transcription flow
---
 gateway/config.py                | 26 ++++++++++++++++
 gateway/run.py                   |  8 ++++-
 tests/gateway/test_stt_config.py | 53 ++++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 tests/gateway/test_stt_config.py

diff --git a/gateway/config.py b/gateway/config.py
index 47c739e91..2b187c521 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -21,6 +21,17 @@ from hermes_cli.config import get_hermes_home
 logger = logging.getLogger(__name__)
 
 
+def _coerce_bool(value: Any, default: bool = True) -> bool:
+    """Coerce bool-ish config values, preserving a caller-provided default."""
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        return value.strip().lower() in ("true", "1", "yes", "on")
+    return bool(value)
+
+
 class Platform(Enum):
     """Supported messaging platforms."""
     LOCAL = "local"
@@ -160,6 +171,9 @@ class GatewayConfig:
     
     # Delivery settings
     always_log_local: bool = True  # Always save cron outputs to local files
+
+    # STT settings
+    stt_enabled: bool = True  # Whether to auto-transcribe inbound voice messages
     
     def get_connected_platforms(self) -> List[Platform]:
         """Return list of platforms that are enabled and configured."""
@@ -224,6 +238,7 @@ class GatewayConfig:
             "quick_commands": self.quick_commands,
             "sessions_dir": str(self.sessions_dir),
             "always_log_local": self.always_log_local,
+            "stt_enabled": self.stt_enabled,
         }
     
     @classmethod
@@ -260,6 +275,10 @@ class GatewayConfig:
         if not isinstance(quick_commands, dict):
             quick_commands = {}
 
+        stt_enabled = data.get("stt_enabled")
+        if stt_enabled is None:
+            stt_enabled = data.get("stt", {}).get("enabled") if isinstance(data.get("stt"), dict) else None
+
         return cls(
             platforms=platforms,
             default_reset_policy=default_policy,
@@ -269,6 +288,7 @@ class GatewayConfig:
             quick_commands=quick_commands,
             sessions_dir=sessions_dir,
             always_log_local=data.get("always_log_local", True),
+            stt_enabled=_coerce_bool(stt_enabled, True),
         )
 
 
@@ -318,6 +338,12 @@ def load_gateway_config() -> GatewayConfig:
                 else:
                     logger.warning("Ignoring invalid quick_commands in config.yaml (expected mapping, got %s)", type(qc).__name__)
 
+            # Bridge STT enable/disable from config.yaml into gateway runtime.
+            # This keeps the gateway aligned with the user-facing config source.
+            stt_cfg = yaml_cfg.get("stt")
+            if isinstance(stt_cfg, dict) and "enabled" in stt_cfg:
+                config.stt_enabled = _coerce_bool(stt_cfg.get("enabled"), True)
+
             # Bridge discord settings from config.yaml to env vars
             # (env vars take precedence — only set if not already defined)
             discord_cfg = yaml_cfg.get("discord", {})
diff --git a/gateway/run.py b/gateway/run.py
index e973852b4..f955573c1 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3512,7 +3512,7 @@ class GatewayRunner:
         audio_paths: List[str],
     ) -> str:
         """
-        Auto-transcribe user voice/audio messages using OpenAI Whisper API
+        Auto-transcribe user voice/audio messages using the configured STT provider
         and prepend the transcript to the message text.
 
         Args:
@@ -3522,6 +3522,12 @@ class GatewayRunner:
         Returns:
             The enriched message string with transcriptions prepended.
         """
+        if not getattr(self.config, "stt_enabled", True):
+            disabled_note = "[The user sent voice message(s), but transcription is disabled in config.]"
+            if user_text:
+                return f"{disabled_note}\n\n{user_text}"
+            return disabled_note
+
         from tools.transcription_tools import transcribe_audio, get_stt_model_from_config
         import asyncio
 
diff --git a/tests/gateway/test_stt_config.py b/tests/gateway/test_stt_config.py
new file mode 100644
index 000000000..d5a9fc55b
--- /dev/null
+++ b/tests/gateway/test_stt_config.py
@@ -0,0 +1,53 @@
+"""Gateway STT config tests — honor stt.enabled: false from config.yaml."""
+
+from pathlib import Path
+from unittest.mock import AsyncMock, patch
+
+import pytest
+import yaml
+
+from gateway.config import GatewayConfig, load_gateway_config
+
+
+def test_gateway_config_stt_disabled_from_dict_nested():
+    config = GatewayConfig.from_dict({"stt": {"enabled": False}})
+    assert config.stt_enabled is False
+
+
+def test_load_gateway_config_bridges_stt_enabled_from_config_yaml(tmp_path, monkeypatch):
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    (hermes_home / "config.yaml").write_text(
+        yaml.dump({"stt": {"enabled": False}}),
+        encoding="utf-8",
+    )
+
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+    config = load_gateway_config()
+
+    assert config.stt_enabled is False
+
+
+@pytest.mark.asyncio
+async def test_enrich_message_with_transcription_skips_when_stt_disabled():
+    from gateway.run import GatewayRunner
+
+    runner = GatewayRunner.__new__(GatewayRunner)
+    runner.config = GatewayConfig(stt_enabled=False)
+
+    with patch(
+        "tools.transcription_tools.transcribe_audio",
+        side_effect=AssertionError("transcribe_audio should not be called when STT is disabled"),
+    ), patch(
+        "tools.transcription_tools.get_stt_model_from_config",
+        return_value=None,
+    ):
+        result = await runner._enrich_message_with_transcription(
+            "caption",
+            ["/tmp/voice.ogg"],
+        )
+
+    assert "transcription is disabled" in result.lower()
+    assert "caption" in result

From f8ceadbad0c0aaaacbda59ed8293fc806b867f84 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sat, 14 Mar 2026 22:09:59 -0700
Subject: [PATCH 2/2] fix: propagate STT disable through shared transcription
 config

- add stt.enabled to the default user config
- make transcription_tools respect the disabled flag globally
- surface disabled state cleanly in voice mode diagnostics
- add regression coverage for disabled STT provider selection
---
 hermes_cli/config.py              |  5 +++--
 tests/tools/test_transcription.py | 16 ++++++++++++++++
 tools/transcription_tools.py      | 22 ++++++++++++++++++++++
 tools/voice_mode.py               |  9 ++++++---
 4 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index bdde858d3..44755b195 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -205,7 +205,8 @@ DEFAULT_CONFIG = {
     },
     
     "stt": {
-        "provider": "local",  # "local" (free, faster-whisper) | "openai" (Whisper API)
+        "enabled": True,
+        "provider": "local",  # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API)
         "local": {
             "model": "base",  # tiny, base, small, medium, large-v3
         },
@@ -284,7 +285,7 @@ DEFAULT_CONFIG = {
     },
 
     # Config schema version - bump this when adding new required fields
-    "_config_version": 7,
+    "_config_version": 8,
 }
 
 # =============================================================================
diff --git a/tests/tools/test_transcription.py b/tests/tools/test_transcription.py
index fe3b24a8d..c8daface0 100644
--- a/tests/tools/test_transcription.py
+++ b/tests/tools/test_transcription.py
@@ -59,6 +59,10 @@ class TestGetProvider:
             from tools.transcription_tools import _get_provider
             assert _get_provider({}) == "local"
 
+    def test_disabled_config_returns_none(self):
+        from tools.transcription_tools import _get_provider
+        assert _get_provider({"enabled": False, "provider": "openai"}) == "none"
+
 
 # ---------------------------------------------------------------------------
 # File validation
@@ -217,6 +221,18 @@ class TestTranscribeAudio:
         assert result["success"] is False
         assert "No STT provider" in result["error"]
 
+    def test_disabled_config_returns_disabled_error(self, tmp_path):
+        audio_file = tmp_path / "test.ogg"
+        audio_file.write_bytes(b"fake audio")
+
+        with patch("tools.transcription_tools._load_stt_config", return_value={"enabled": False}), \
+             patch("tools.transcription_tools._get_provider", return_value="none"):
+            from tools.transcription_tools import transcribe_audio
+            result = transcribe_audio(str(audio_file))
+
+        assert result["success"] is False
+        assert "disabled" in result["error"].lower()
+
     def test_invalid_file_returns_error(self):
         from tools.transcription_tools import transcribe_audio
         result = transcribe_audio("/nonexistent/file.ogg")
diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index a20ba4134..684d0a8d8 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -93,6 +93,18 @@ def _load_stt_config() -> dict:
         return {}
 
 
+def is_stt_enabled(stt_config: Optional[dict] = None) -> bool:
+    """Return whether STT is enabled in config."""
+    if stt_config is None:
+        stt_config = _load_stt_config()
+    enabled = stt_config.get("enabled", True)
+    if isinstance(enabled, str):
+        return enabled.strip().lower() in ("true", "1", "yes", "on")
+    if enabled is None:
+        return True
+    return bool(enabled)
+
+
 def _get_provider(stt_config: dict) -> str:
     """Determine which STT provider to use.
 
@@ -101,6 +113,9 @@ def _get_provider(stt_config: dict) -> str:
       2. Auto-detect: local > groq (free) > openai (paid)
       3. Disabled (returns "none")
     """
+    if not is_stt_enabled(stt_config):
+        return "none"
+
     provider = stt_config.get("provider", DEFAULT_PROVIDER)
 
     if provider == "local":
@@ -334,6 +349,13 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A
 
     # Load config and determine provider
     stt_config = _load_stt_config()
+    if not is_stt_enabled(stt_config):
+        return {
+            "success": False,
+            "transcript": "",
+            "error": "STT is disabled in config.yaml (stt.enabled: false).",
+        }
+
     provider = _get_provider(stt_config)
 
     if provider == "local":
diff --git a/tools/voice_mode.py b/tools/voice_mode.py
index a2c70ac1b..783584895 100644
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -703,10 +703,11 @@ def check_voice_requirements() -> Dict[str, Any]:
         ``missing_packages``, and ``details``.
     """
     # Determine STT provider availability
-    from tools.transcription_tools import _get_provider, _load_stt_config, _HAS_FASTER_WHISPER
+    from tools.transcription_tools import _get_provider, _load_stt_config, is_stt_enabled, _HAS_FASTER_WHISPER
     stt_config = _load_stt_config()
+    stt_enabled = is_stt_enabled(stt_config)
     stt_provider = _get_provider(stt_config)
-    stt_available = stt_provider != "none"
+    stt_available = stt_enabled and stt_provider != "none"
 
     missing: List[str] = []
     has_audio = _audio_available()
@@ -725,7 +726,9 @@ def check_voice_requirements() -> Dict[str, Any]:
     else:
         details_parts.append("Audio capture: MISSING (pip install sounddevice numpy)")
 
-    if stt_provider == "local":
+    if not stt_enabled:
+        details_parts.append("STT provider: DISABLED in config (stt.enabled: false)")
+    elif stt_provider == "local":
         details_parts.append("STT provider: OK (local faster-whisper)")
     elif stt_provider == "groq":
         details_parts.append("STT provider: OK (Groq)")