fix: allow voice mode in WSL when PulseAudio bridge is configured

WSL detection was treated as a hard fail, blocking voice mode even when audio worked via PulseAudio bridge. Now PULSE_SERVER env var presence makes WSL a soft notice instead of a blocking warning. Device query failures in WSL with PULSE_SERVER are also treated as non-blocking.
2026-03-30 11:30:26 +03:00
parent 7f670a06cf
commit 08171c1c31
2 changed files with 153 additions and 5 deletions
--- a/tests/tools/test_voice_mode.py
+++ b/tests/tools/test_voice_mode.py
@@ -56,6 +56,134 @@ def mock_sd(monkeypatch):
    return mock


+# ============================================================================
+# detect_audio_environment — WSL / SSH / Docker detection
+# ============================================================================
+
+class TestDetectAudioEnvironment:
+    def test_clean_environment_is_available(self, monkeypatch):
+        """No SSH, Docker, or WSL — should be available."""
+        monkeypatch.delenv("SSH_CLIENT", raising=False)
+        monkeypatch.delenv("SSH_TTY", raising=False)
+        monkeypatch.delenv("SSH_CONNECTION", raising=False)
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (MagicMock(), MagicMock()))
+
+        from tools.voice_mode import detect_audio_environment
+        result = detect_audio_environment()
+        assert result["available"] is True
+        assert result["warnings"] == []
+
+    def test_ssh_blocks_voice(self, monkeypatch):
+        """SSH environment should block voice mode."""
+        monkeypatch.setenv("SSH_CLIENT", "1.2.3.4 54321 22")
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (MagicMock(), MagicMock()))
+
+        from tools.voice_mode import detect_audio_environment
+        result = detect_audio_environment()
+        assert result["available"] is False
+        assert any("SSH" in w for w in result["warnings"])
+
+    def test_wsl_without_pulse_blocks_voice(self, monkeypatch, tmp_path):
+        """WSL without PULSE_SERVER should block voice mode."""
+        monkeypatch.delenv("SSH_CLIENT", raising=False)
+        monkeypatch.delenv("SSH_TTY", raising=False)
+        monkeypatch.delenv("SSH_CONNECTION", raising=False)
+        monkeypatch.delenv("PULSE_SERVER", raising=False)
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (MagicMock(), MagicMock()))
+
+        proc_version = tmp_path / "proc_version"
+        proc_version.write_text("Linux 5.15.0-microsoft-standard-WSL2")
+
+        _real_open = open
+        def _fake_open(f, *a, **kw):
+            if f == "/proc/version":
+                return _real_open(str(proc_version), *a, **kw)
+            return _real_open(f, *a, **kw)
+
+        with patch("builtins.open", side_effect=_fake_open):
+            from tools.voice_mode import detect_audio_environment
+            result = detect_audio_environment()
+
+        assert result["available"] is False
+        assert any("WSL" in w for w in result["warnings"])
+        assert any("PulseAudio" in w for w in result["warnings"])
+
+    def test_wsl_with_pulse_allows_voice(self, monkeypatch, tmp_path):
+        """WSL with PULSE_SERVER set should NOT block voice mode."""
+        monkeypatch.delenv("SSH_CLIENT", raising=False)
+        monkeypatch.delenv("SSH_TTY", raising=False)
+        monkeypatch.delenv("SSH_CONNECTION", raising=False)
+        monkeypatch.setenv("PULSE_SERVER", "unix:/mnt/wslg/PulseServer")
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (MagicMock(), MagicMock()))
+
+        proc_version = tmp_path / "proc_version"
+        proc_version.write_text("Linux 5.15.0-microsoft-standard-WSL2")
+
+        _real_open = open
+        def _fake_open(f, *a, **kw):
+            if f == "/proc/version":
+                return _real_open(str(proc_version), *a, **kw)
+            return _real_open(f, *a, **kw)
+
+        with patch("builtins.open", side_effect=_fake_open):
+            from tools.voice_mode import detect_audio_environment
+            result = detect_audio_environment()
+
+        assert result["available"] is True
+        assert result["warnings"] == []
+        assert any("WSL" in n for n in result.get("notices", []))
+
+    def test_wsl_device_query_fails_with_pulse_continues(self, monkeypatch, tmp_path):
+        """WSL device query failure should not block if PULSE_SERVER is set."""
+        monkeypatch.delenv("SSH_CLIENT", raising=False)
+        monkeypatch.delenv("SSH_TTY", raising=False)
+        monkeypatch.delenv("SSH_CONNECTION", raising=False)
+        monkeypatch.setenv("PULSE_SERVER", "unix:/mnt/wslg/PulseServer")
+
+        mock_sd = MagicMock()
+        mock_sd.query_devices.side_effect = Exception("device query failed")
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (mock_sd, MagicMock()))
+
+        proc_version = tmp_path / "proc_version"
+        proc_version.write_text("Linux 5.15.0-microsoft-standard-WSL2")
+
+        _real_open = open
+        def _fake_open(f, *a, **kw):
+            if f == "/proc/version":
+                return _real_open(str(proc_version), *a, **kw)
+            return _real_open(f, *a, **kw)
+
+        with patch("builtins.open", side_effect=_fake_open):
+            from tools.voice_mode import detect_audio_environment
+            result = detect_audio_environment()
+
+        assert result["available"] is True
+        assert any("device query failed" in n for n in result.get("notices", []))
+
+    def test_device_query_fails_without_pulse_blocks(self, monkeypatch):
+        """Device query failure without PULSE_SERVER should block."""
+        monkeypatch.delenv("SSH_CLIENT", raising=False)
+        monkeypatch.delenv("SSH_TTY", raising=False)
+        monkeypatch.delenv("SSH_CONNECTION", raising=False)
+        monkeypatch.delenv("PULSE_SERVER", raising=False)
+
+        mock_sd = MagicMock()
+        mock_sd.query_devices.side_effect = Exception("device query failed")
+        monkeypatch.setattr("tools.voice_mode._import_audio",
+                            lambda: (mock_sd, MagicMock()))
+
+        from tools.voice_mode import detect_audio_environment
+        result = detect_audio_environment()
+
+        assert result["available"] is False
+        assert any("PortAudio" in w for w in result["warnings"])
+
+
 # ============================================================================
 # check_voice_requirements
 # ============================================================================
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -51,9 +51,12 @@ def _audio_available() -> bool:
 def detect_audio_environment() -> dict:
    """Detect if the current environment supports audio I/O.

-    Returns dict with 'available' (bool) and 'warnings' (list of strings).
+    Returns dict with 'available' (bool), 'warnings' (list of hard-fail
+    reasons that block voice mode), and 'notices' (list of informational
+    messages that do NOT block voice mode).
    """
-    warnings = []
+    warnings = []   # hard-fail: these block voice mode
+    notices = []     # informational: logged but don't block

    # SSH detection
    if any(os.environ.get(v) for v in ('SSH_CLIENT', 'SSH_TTY', 'SSH_CONNECTION')):
@@ -63,11 +66,20 @@ def detect_audio_environment() -> dict:
    if os.path.exists('/.dockerenv'):
        warnings.append("Running inside Docker container -- no audio devices")

-    # WSL detection
+    # WSL detection — PulseAudio bridge makes audio work in WSL.
+    # Only block if PULSE_SERVER is not configured.
    try:
        with open('/proc/version', 'r') as f:
            if 'microsoft' in f.read().lower():
-                warnings.append("Running in WSL -- audio requires PulseAudio bridge to Windows")
+                if os.environ.get('PULSE_SERVER'):
+                    notices.append("Running in WSL with PulseAudio bridge")
+                else:
+                    warnings.append(
+                        "Running in WSL -- audio requires PulseAudio bridge.\n"
+                        "  1. Set PULSE_SERVER=unix:/mnt/wslg/PulseServer\n"
+                        "  2. Create ~/.asoundrc pointing ALSA at PulseAudio\n"
+                        "  3. Verify with: arecord -d 3 /tmp/test.wav && aplay /tmp/test.wav"
+                    )
    except (FileNotFoundError, PermissionError, OSError):
        pass

@@ -79,7 +91,12 @@ def detect_audio_environment() -> dict:
            if not devices:
                warnings.append("No audio input/output devices detected")
        except Exception:
-            warnings.append("Audio subsystem error (PortAudio cannot query devices)")
+            # In WSL with PulseAudio, device queries can fail even though
+            # recording/playback works fine. Don't block if PULSE_SERVER is set.
+            if os.environ.get('PULSE_SERVER'):
+                notices.append("Audio device query failed but PULSE_SERVER is set -- continuing")
+            else:
+                warnings.append("Audio subsystem error (PortAudio cannot query devices)")
    except ImportError:
        warnings.append("Audio libraries not installed (pip install sounddevice numpy)")
    except OSError:
@@ -93,6 +110,7 @@ def detect_audio_environment() -> dict:
    return {
        "available": len(warnings) == 0,
        "warnings": warnings,
+        "notices": notices,
    }

 # ---------------------------------------------------------------------------
@@ -748,6 +766,8 @@ def check_voice_requirements() -> Dict[str, Any]:

    for warning in env_check["warnings"]:
        details_parts.append(f"Environment: {warning}")
+    for notice in env_check.get("notices", []):
+        details_parts.append(f"Environment: {notice}")

    return {
        "available": available,