diff --git a/cli.py b/cli.py index 351e23a6..cc9f522a 100755 --- a/cli.py +++ b/cli.py @@ -3555,12 +3555,12 @@ class HermesCLI: "Install with: pip install sounddevice numpy\n" "Or: pip install hermes-agent[voice]" ) - if not reqs["stt_key_set"]: + if not reqs.get("stt_available", reqs.get("stt_key_set")): raise RuntimeError( - "Voice mode requires an STT API key for transcription.\n" - "Set GROQ_API_KEY (free) or VOICE_TOOLS_OPENAI_KEY.\n" - "Groq: https://console.groq.com/keys\n" - "OpenAI: https://platform.openai.com/api-keys" + "Voice mode requires an STT provider for transcription.\n" + "Option 1: pip install faster-whisper (free, local)\n" + "Option 2: Set GROQ_API_KEY (free tier)\n" + "Option 3: Set VOICE_TOOLS_OPENAI_KEY (paid)" ) # Prevent double-start from concurrent threads (atomic check-and-set) diff --git a/tests/tools/test_voice_mode.py b/tests/tools/test_voice_mode.py index f92bf6f2..70424fee 100644 --- a/tests/tools/test_voice_mode.py +++ b/tests/tools/test_voice_mode.py @@ -65,14 +65,14 @@ class TestCheckVoiceRequirements: monkeypatch.setattr("tools.voice_mode._audio_available", lambda: True) monkeypatch.setattr("tools.voice_mode.detect_audio_environment", lambda: {"available": True, "warnings": []}) - monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test-key") + monkeypatch.setattr("tools.transcription_tools._get_provider", lambda cfg: "openai") from tools.voice_mode import check_voice_requirements result = check_voice_requirements() assert result["available"] is True assert result["audio_available"] is True - assert result["stt_key_set"] is True + assert result["stt_available"] is True assert result["missing_packages"] == [] def test_missing_audio_packages(self, monkeypatch): @@ -89,19 +89,18 @@ class TestCheckVoiceRequirements: assert "sounddevice" in result["missing_packages"] assert "numpy" in result["missing_packages"] - def test_missing_stt_key(self, monkeypatch): + def test_missing_stt_provider(self, monkeypatch): monkeypatch.setattr("tools.voice_mode._audio_available", lambda: True) monkeypatch.setattr("tools.voice_mode.detect_audio_environment", lambda: {"available": True, "warnings": []}) - monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False) - monkeypatch.delenv("GROQ_API_KEY", raising=False) + monkeypatch.setattr("tools.transcription_tools._get_provider", lambda cfg: "none") from tools.voice_mode import check_voice_requirements result = check_voice_requirements() assert result["available"] is False - assert result["stt_key_set"] is False - assert "STT API key: MISSING" in result["details"] + assert result["stt_available"] is False + assert "STT provider: MISSING" in result["details"] # ============================================================================ diff --git a/tools/voice_mode.py b/tools/voice_mode.py index a108ed84..93e044eb 100644 --- a/tools/voice_mode.py +++ b/tools/voice_mode.py @@ -676,12 +676,15 @@ def check_voice_requirements() -> Dict[str, Any]: """Check if all voice mode requirements are met. Returns: - Dict with ``available``, ``audio_available``, ``stt_key_set``, + Dict with ``available``, ``audio_available``, ``stt_available``, ``missing_packages``, and ``details``. """ - openai_key = bool(os.getenv("VOICE_TOOLS_OPENAI_KEY")) - groq_key = bool(os.getenv("GROQ_API_KEY")) - stt_key_set = openai_key or groq_key + # Determine STT provider availability + from tools.transcription_tools import _get_provider, _load_stt_config, _HAS_FASTER_WHISPER + stt_config = _load_stt_config() + stt_provider = _get_provider(stt_config) + stt_available = stt_provider != "none" + missing: List[str] = [] has_audio = _audio_available() @@ -691,7 +694,7 @@ def check_voice_requirements() -> Dict[str, Any]: # Environment detection env_check = detect_audio_environment() - available = has_audio and stt_key_set and env_check["available"] + available = has_audio and stt_available and env_check["available"] details_parts = [] if has_audio: @@ -699,12 +702,17 @@ def check_voice_requirements() -> Dict[str, Any]: else: details_parts.append("Audio capture: MISSING (pip install sounddevice numpy)") - if openai_key: - details_parts.append("STT API key: OK (OpenAI)") - elif groq_key: - details_parts.append("STT API key: OK (Groq)") + if stt_provider == "local": + details_parts.append("STT provider: OK (local faster-whisper)") + elif stt_provider == "groq": + details_parts.append("STT provider: OK (Groq)") + elif stt_provider == "openai": + details_parts.append("STT provider: OK (OpenAI)") else: - details_parts.append("STT API key: MISSING (set GROQ_API_KEY or VOICE_TOOLS_OPENAI_KEY)") + details_parts.append( + "STT provider: MISSING (pip install faster-whisper, " + "or set GROQ_API_KEY / VOICE_TOOLS_OPENAI_KEY)" + ) for warning in env_check["warnings"]: details_parts.append(f"Environment: {warning}") @@ -712,7 +720,7 @@ def check_voice_requirements() -> Dict[str, Any]: return { "available": available, "audio_available": has_audio, - "stt_key_set": stt_key_set, + "stt_available": stt_available, "missing_packages": missing, "details": "\n".join(details_parts), "environment": env_check,