fix: address PR review round 4 — remove web UI, fix audio/import/interface issues

Remove web UI gateway (web.py, tests, docs, toolset, env vars, Platform.WEB
enum) per maintainer request — Nous is building their own official chat UI.

Fix 1: Replace sd.wait() with polling pattern in play_audio_file() to prevent
indefinite hang when audio device stalls (consistent with play_beep()).

Fix 2: Use importlib.util.find_spec() for faster_whisper/openai availability
checks instead of module-level imports that trigger heavy native library
loading (CUDA/cuDNN) at import time.

Fix 3: Remove inspect.signature() hack in _send_voice_reply() — add **kwargs
to Telegram send_voice() so all adapters accept metadata uniformly.

Fix 4: Make session loading resilient to removed platform enum values — skip
entries with unknown platforms instead of crashing the entire gateway.
This commit is contained in:
0xbyt4
2026-03-14 09:06:52 +03:00
parent 1ad5e0ed15
commit 35748a2fb0
17 changed files with 55 additions and 2930 deletions

View File

@@ -213,13 +213,6 @@ VOICE_TOOLS_OPENAI_KEY=
# EMAIL_ALLOWED_USERS=your@email.com
# EMAIL_HOME_ADDRESS=your@email.com
# Web UI (browser-based chat interface on local network)
# Access from phone/tablet/desktop at http://<your-ip>:8765
# WEB_UI_ENABLED=false
# WEB_UI_PORT=8765
# WEB_UI_HOST=127.0.0.1 # Use 0.0.0.0 to expose on LAN
# WEB_UI_TOKEN= # Auto-generated if empty
# Gateway-wide: allow ALL users without an allowlist (default: false = deny)
# Only set to true if you intentionally want open access.
# GATEWAY_ALLOW_ALL_USERS=false

View File

@@ -31,7 +31,6 @@ class Platform(Enum):
SIGNAL = "signal"
HOMEASSISTANT = "homeassistant"
EMAIL = "email"
WEB = "web"
@dataclass
@@ -177,9 +176,6 @@ class GatewayConfig:
# Email uses extra dict for config (address + imap_host + smtp_host)
elif platform == Platform.EMAIL and config.extra.get("address"):
connected.append(platform)
# Web UI uses enabled flag only
elif platform == Platform.WEB:
connected.append(platform)
return connected
def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
@@ -470,18 +466,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
name=os.getenv("EMAIL_HOME_ADDRESS_NAME", "Home"),
)
# Web UI
web_enabled = os.getenv("WEB_UI_ENABLED", "").lower() in ("true", "1", "yes")
if web_enabled:
if Platform.WEB not in config.platforms:
config.platforms[Platform.WEB] = PlatformConfig()
config.platforms[Platform.WEB].enabled = True
config.platforms[Platform.WEB].extra.update({
"port": int(os.getenv("WEB_UI_PORT", "8765")),
"host": os.getenv("WEB_UI_HOST", "") or "127.0.0.1",
"token": os.getenv("WEB_UI_TOKEN", ""),
})
# Session settings
idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
if idle_minutes:

View File

@@ -311,6 +311,7 @@ class TelegramAdapter(BasePlatformAdapter):
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
"""Send audio as a native Telegram voice message or audio file."""
if not self._bot:

File diff suppressed because it is too large Load Diff

View File

@@ -829,13 +829,6 @@ class GatewayRunner:
return None
return EmailAdapter(config)
elif platform == Platform.WEB:
from gateway.platforms.web import WebAdapter, check_web_requirements
if not check_web_requirements():
logger.warning("Web: aiohttp not installed. Run: pip install aiohttp")
return None
return WebAdapter(config)
return None
def _is_user_authorized(self, source: SessionSource) -> bool:
@@ -855,11 +848,6 @@ class GatewayRunner:
if source.platform == Platform.HOMEASSISTANT:
return True
# Web UI users are authenticated via token at the WebSocket level.
# No additional allowlist check needed.
if source.platform == Platform.WEB:
return True
user_id = source.user_id
if not user_id:
return False
@@ -978,7 +966,7 @@ class GatewayRunner:
"personality", "retry", "undo", "sethome", "set-home",
"compress", "usage", "insights", "reload-mcp", "reload_mcp",
"update", "title", "resume", "provider", "rollback",
"background", "reasoning", "voice", "remote-control", "remote_control"}
"background", "reasoning", "voice"}
if command and command in _known_commands:
await self.hooks.emit(f"command:{command}", {
"platform": source.platform.value if source.platform else "",
@@ -1053,10 +1041,6 @@ class GatewayRunner:
if command == "voice":
return await self._handle_voice_command(event)
if command in ("remote-control", "remote_control"):
return await self._handle_remote_control_command(event)
# User-defined quick commands (bypass agent loop, no LLM call)
if command:
quick_commands = self.config.get("quick_commands", {})
@@ -1741,7 +1725,6 @@ class GatewayRunner:
"`/rollback [number]` — List or restore filesystem checkpoints",
"`/background <prompt>` — Run a prompt in a separate background session",
"`/voice [on|off|tts|status]` — Toggle voice reply mode",
"`/remote-control [port] [token]` — Start web UI for remote access",
"`/reload-mcp` — Reload MCP servers from config",
"`/update` — Update Hermes Agent to the latest version",
"`/help` — Show this message",
@@ -2415,10 +2398,6 @@ class GatewayRunner:
}
if event.source.thread_id:
send_kwargs["metadata"] = {"thread_id": event.source.thread_id}
import inspect
sig = inspect.signature(adapter.send_voice)
if "metadata" not in sig.parameters:
send_kwargs.pop("metadata", None)
await adapter.send_voice(**send_kwargs)
except Exception as e:
logger.warning("Auto voice reply failed: %s", e, exc_info=True)
@@ -2488,62 +2467,6 @@ class GatewayRunner:
)
return f"{result['error']}"
async def _handle_remote_control_command(self, event: MessageEvent) -> str:
"""Handle /remote-control — start or show the web UI for remote access."""
from gateway.config import Platform, PlatformConfig
is_dm = event.source and event.source.chat_type == "dm"
# Already running?
if Platform.WEB in self.adapters:
adapter = self.adapters[Platform.WEB]
local_ip = adapter._get_local_ip()
token_display = adapter._token if is_dm else "(hidden — use in DM to see token)"
return (
f"Web UI already running.\n"
f"URL: http://{local_ip}:{adapter._port}\n"
f"Token: {token_display}"
)
# Start web adapter on the fly
try:
from gateway.platforms.web import WebAdapter, check_web_requirements
if not check_web_requirements():
return "Web UI requires aiohttp. Run: pip install aiohttp"
args = event.get_command_args().strip()
port = 8765
token = ""
for part in args.split():
if part.isdigit():
port = int(part)
elif part and not part.startswith("-"):
token = part
web_config = PlatformConfig(
enabled=True,
extra={"port": port, "host": "127.0.0.1", "token": token},
)
adapter = WebAdapter(web_config)
adapter.set_message_handler(self._handle_message)
success = await adapter.connect()
if not success:
return f"Failed to start Web UI on port {port}. Port may be in use."
self.adapters[Platform.WEB] = adapter
local_ip = adapter._get_local_ip()
token_display = adapter._token if is_dm else "(hidden — use in DM to see token)"
return (
f"Web UI started!\n"
f"URL: http://{local_ip}:{adapter._port}\n"
f"Token: {token_display}\n"
f"Open this URL on your phone or any device on the same network."
)
except Exception as e:
logger.error("Failed to start web UI: %s", e, exc_info=True)
return f"Failed to start Web UI: {e}"
async def _handle_background_command(self, event: MessageEvent) -> str:
"""Handle /background <prompt> — run a prompt in a separate background session.
@@ -2607,7 +2530,6 @@ class GatewayRunner:
Platform.SIGNAL: "hermes-signal",
Platform.HOMEASSISTANT: "hermes-homeassistant",
Platform.EMAIL: "hermes-email",
Platform.WEB: "hermes-web",
}
platform_toolsets_config = {}
try:
@@ -2629,7 +2551,6 @@ class GatewayRunner:
Platform.SIGNAL: "signal",
Platform.HOMEASSISTANT: "homeassistant",
Platform.EMAIL: "email",
Platform.WEB: "web",
}.get(source.platform, "telegram")
config_toolsets = platform_toolsets_config.get(platform_config_key)
@@ -3517,7 +3438,6 @@ class GatewayRunner:
Platform.SIGNAL: "hermes-signal",
Platform.HOMEASSISTANT: "hermes-homeassistant",
Platform.EMAIL: "hermes-email",
Platform.WEB: "hermes-web",
}
# Try to load platform_toolsets from config
@@ -3542,7 +3462,6 @@ class GatewayRunner:
Platform.SIGNAL: "signal",
Platform.HOMEASSISTANT: "homeassistant",
Platform.EMAIL: "email",
Platform.WEB: "web",
}.get(source.platform, "telegram")
# Use config override if present (list of toolsets), otherwise hardcoded default

View File

@@ -383,7 +383,11 @@ class SessionStore:
with open(sessions_file, "r", encoding="utf-8") as f:
data = json.load(f)
for key, entry_data in data.items():
self._entries[key] = SessionEntry.from_dict(entry_data)
try:
self._entries[key] = SessionEntry.from_dict(entry_data)
except (ValueError, KeyError):
# Skip entries with unknown/removed platform values
continue
except Exception as e:
print(f"[gateway] Warning: Failed to load sessions: {e}")

View File

@@ -390,33 +390,6 @@ class TestDiscordPlayTtsSkip:
# Web play_tts sends play_audio (not voice bubble)
# =====================================================================
class TestWebPlayTts:
"""Web adapter play_tts sends invisible play_audio, not a voice bubble."""
@pytest.mark.asyncio
async def test_play_tts_sends_play_audio(self, tmp_path):
from gateway.platforms.web import WebAdapter
from gateway.config import PlatformConfig
config = PlatformConfig(enabled=True, extra={
"port": 0, "host": "127.0.0.1", "token": "tok",
})
adapter = WebAdapter(config)
adapter._broadcast = AsyncMock()
adapter._media_dir = tmp_path / "media"
adapter._media_dir.mkdir()
audio_file = tmp_path / "test.ogg"
audio_file.write_bytes(b"fake audio")
result = await adapter.play_tts(chat_id="web", audio_path=str(audio_file))
assert result.success is True
payload = adapter._broadcast.call_args[0][0]
assert payload["type"] == "play_audio"
assert "/media/" in payload["url"]
# =====================================================================
# Help text + known commands
# =====================================================================

View File

@@ -1,926 +0,0 @@
"""Tests for the Web UI gateway platform adapter.
Covers:
1. Platform enum exists with correct value
2. Config loading from env vars via _apply_env_overrides
3. WebAdapter init and config parsing (port, host, token)
4. Token auto-generation when not provided
5. check_web_requirements function
6. HTTP server start/stop (connect/disconnect)
7. Auth screen served on GET /
8. Media directory creation and cleanup
9. WebSocket auth handshake (auth_ok / auth_fail)
10. WebSocket message routing (text, voice)
11. Auto-TTS play_tts sends invisible playback
12. Authorization bypass (Web platform always authorized)
13. Toolset registration (hermes-web in toolset maps)
14. LAN IP detection (_get_local_ip / _get_local_ips)
15. Security: path traversal sanitization
16. Security: media endpoint authentication
17. Security: hmac.compare_digest for token comparison
18. Security: DOMPurify XSS prevention
19. Security: default bind to 127.0.0.1
20. Security: /remote-control token hiding in group chats
21. Network: VPN/multi-interface IP detection edge cases
22. Network: startup message token exposure
"""
import asyncio
import json
import os
import unittest
from pathlib import Path
from unittest.mock import patch, MagicMock, AsyncMock
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig, _apply_env_overrides
from gateway.platforms.base import SendResult
# ===========================================================================
# 1. Platform Enum
# ===========================================================================
class TestPlatformEnum(unittest.TestCase):
"""Verify WEB is in the Platform enum."""
def test_web_in_platform_enum(self):
self.assertEqual(Platform.WEB.value, "web")
def test_web_distinct_from_others(self):
platforms = [p.value for p in Platform]
self.assertIn("web", platforms)
self.assertEqual(platforms.count("web"), 1)
# ===========================================================================
# 2. Config loading from env vars
# ===========================================================================
class TestConfigEnvOverrides(unittest.TestCase):
"""Verify web UI config is loaded from environment variables."""
@patch.dict(os.environ, {
"WEB_UI_ENABLED": "true",
"WEB_UI_PORT": "9000",
"WEB_UI_HOST": "127.0.0.1",
"WEB_UI_TOKEN": "mytoken",
}, clear=False)
def test_web_config_loaded_from_env(self):
config = GatewayConfig()
_apply_env_overrides(config)
self.assertIn(Platform.WEB, config.platforms)
self.assertTrue(config.platforms[Platform.WEB].enabled)
self.assertEqual(config.platforms[Platform.WEB].extra["port"], 9000)
self.assertEqual(config.platforms[Platform.WEB].extra["host"], "127.0.0.1")
self.assertEqual(config.platforms[Platform.WEB].extra["token"], "mytoken")
@patch.dict(os.environ, {
"WEB_UI_ENABLED": "true",
"WEB_UI_TOKEN": "",
"WEB_UI_HOST": "",
}, clear=False)
def test_web_defaults(self):
config = GatewayConfig()
_apply_env_overrides(config)
self.assertIn(Platform.WEB, config.platforms)
self.assertEqual(config.platforms[Platform.WEB].extra["port"], 8765)
self.assertEqual(config.platforms[Platform.WEB].extra["host"], "127.0.0.1")
self.assertEqual(config.platforms[Platform.WEB].extra["token"], "")
@patch.dict(os.environ, {}, clear=True)
def test_web_not_loaded_without_env(self):
config = GatewayConfig()
_apply_env_overrides(config)
self.assertNotIn(Platform.WEB, config.platforms)
@patch.dict(os.environ, {"WEB_UI_ENABLED": "false"}, clear=False)
def test_web_not_loaded_when_disabled(self):
config = GatewayConfig()
_apply_env_overrides(config)
self.assertNotIn(Platform.WEB, config.platforms)
# ===========================================================================
# 3. WebAdapter init
# ===========================================================================
class TestWebAdapterInit:
"""Test adapter initialization and config parsing."""
def _make_adapter(self, **extra):
from gateway.platforms.web import WebAdapter
defaults = {"port": 8765, "host": "0.0.0.0", "token": ""}
defaults.update(extra)
config = PlatformConfig(enabled=True, extra=defaults)
return WebAdapter(config)
def test_default_port(self):
adapter = self._make_adapter()
assert adapter._port == 8765
def test_custom_port(self):
adapter = self._make_adapter(port=9999)
assert adapter._port == 9999
def test_custom_host(self):
adapter = self._make_adapter(host="127.0.0.1")
assert adapter._host == "127.0.0.1"
def test_explicit_token(self):
adapter = self._make_adapter(token="secret123")
assert adapter._token == "secret123"
def test_auto_generated_token(self):
adapter = self._make_adapter(token="")
assert len(adapter._token) > 0
assert adapter._token != ""
def test_name_property(self):
adapter = self._make_adapter()
assert adapter.name == "Web"
# ===========================================================================
# 4. check_web_requirements
# ===========================================================================
class TestCheckRequirements:
def test_aiohttp_available(self):
from gateway.platforms.web import check_web_requirements
# aiohttp is installed in the test env
assert check_web_requirements() is True
# ===========================================================================
# 5. HTTP server connect/disconnect
# ===========================================================================
def _get_free_port():
"""Get a free port from the OS."""
import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("127.0.0.1", 0))
return s.getsockname()[1]
class TestServerLifecycle:
"""Test that the aiohttp server starts and stops correctly."""
def _make_adapter(self):
from gateway.platforms.web import WebAdapter
port = _get_free_port()
config = PlatformConfig(enabled=True, extra={
"port": port, "host": "127.0.0.1", "token": "test",
})
return WebAdapter(config)
@pytest.mark.asyncio
async def test_connect_starts_server(self):
adapter = self._make_adapter()
try:
result = await adapter.connect()
assert result is True
assert adapter._runner is not None
finally:
await adapter.disconnect()
@pytest.mark.asyncio
async def test_disconnect_stops_server(self):
adapter = self._make_adapter()
await adapter.connect()
await adapter.disconnect()
assert adapter._runner is None or True # cleanup done
@pytest.mark.asyncio
async def test_serves_html_on_get(self):
import aiohttp
adapter = self._make_adapter()
try:
await adapter.connect()
port = adapter._port
async with aiohttp.ClientSession() as session:
async with session.get(f"http://127.0.0.1:{port}/") as resp:
assert resp.status == 200
text = await resp.text()
assert "Hermes" in text
assert "<html" in text.lower()
finally:
await adapter.disconnect()
# ===========================================================================
# 6. WebSocket auth handshake
# ===========================================================================
class TestWebSocketAuth:
"""Test WebSocket authentication flow."""
def _make_adapter(self):
from gateway.platforms.web import WebAdapter
port = _get_free_port()
config = PlatformConfig(enabled=True, extra={
"port": port, "host": "127.0.0.1", "token": "correcttoken",
})
return WebAdapter(config)
@pytest.mark.asyncio
async def test_auth_success(self):
import aiohttp
adapter = self._make_adapter()
try:
await adapter.connect()
port = adapter._port
async with aiohttp.ClientSession() as session:
async with session.ws_connect(f"http://127.0.0.1:{port}/ws") as ws:
await ws.send_json({"type": "auth", "token": "correcttoken"})
msg = await asyncio.wait_for(ws.receive_json(), timeout=3)
assert msg["type"] == "auth_ok"
assert "session_id" in msg
finally:
await adapter.disconnect()
@pytest.mark.asyncio
async def test_auth_failure(self):
import aiohttp
adapter = self._make_adapter()
try:
await adapter.connect()
port = adapter._port
async with aiohttp.ClientSession() as session:
async with session.ws_connect(f"http://127.0.0.1:{port}/ws") as ws:
await ws.send_json({"type": "auth", "token": "wrongtoken"})
msg = await asyncio.wait_for(ws.receive_json(), timeout=3)
assert msg["type"] == "auth_fail"
finally:
await adapter.disconnect()
# ===========================================================================
# 7. WebSocket messaging
# ===========================================================================
class TestWebSocketMessaging:
"""Test text message routing through WebSocket."""
@pytest.mark.asyncio
async def test_text_message_dispatched_to_handler(self):
import aiohttp
from gateway.platforms.web import WebAdapter
from gateway.platforms.base import MessageEvent
handler_called = asyncio.Event()
received_event = {}
async def mock_handler(event: MessageEvent):
received_event["text"] = event.text
received_event["platform"] = event.source.platform
handler_called.set()
return "Hello back!"
port = _get_free_port()
config = PlatformConfig(enabled=True, extra={
"port": port, "host": "127.0.0.1", "token": "tok",
})
adapter = WebAdapter(config)
adapter.set_message_handler(mock_handler)
try:
await adapter.connect()
port = adapter._port
async with aiohttp.ClientSession() as session:
async with session.ws_connect(f"http://127.0.0.1:{port}/ws") as ws:
# Auth first
await ws.send_json({"type": "auth", "token": "tok"})
auth_msg = await asyncio.wait_for(ws.receive_json(), timeout=3)
assert auth_msg["type"] == "auth_ok"
# Send text message
await ws.send_json({"type": "message", "text": "Hello Hermes"})
# Wait for handler to be called
await asyncio.wait_for(handler_called.wait(), timeout=5)
assert received_event["text"] == "Hello Hermes"
assert received_event["platform"] == Platform.WEB
finally:
await adapter.disconnect()
# ===========================================================================
# 8. send / send_voice / play_tts
# ===========================================================================
class TestSendMethods:
"""Test adapter send methods."""
def _make_adapter(self):
from gateway.platforms.web import WebAdapter
config = PlatformConfig(enabled=True, extra={
"port": 0, "host": "127.0.0.1", "token": "tok",
})
adapter = WebAdapter(config)
adapter._broadcast = AsyncMock()
return adapter
@pytest.mark.asyncio
async def test_send_broadcasts_message(self):
adapter = self._make_adapter()
result = await adapter.send(chat_id="web", content="Hello!")
assert result.success is True
adapter._broadcast.assert_called_once()
payload = adapter._broadcast.call_args[0][0]
assert payload["type"] == "message"
assert payload["content"] == "Hello!"
@pytest.mark.asyncio
async def test_send_voice_broadcasts_voice(self, tmp_path):
adapter = self._make_adapter()
# Create a fake audio file
audio_file = tmp_path / "test.mp3"
audio_file.write_bytes(b"fake audio data")
adapter._media_dir = tmp_path / "media"
adapter._media_dir.mkdir()
result = await adapter.send_voice(chat_id="web", audio_path=str(audio_file))
assert result.success is True
payload = adapter._broadcast.call_args[0][0]
assert payload["type"] == "voice"
assert "/media/" in payload["url"]
@pytest.mark.asyncio
async def test_play_tts_broadcasts_play_audio(self, tmp_path):
adapter = self._make_adapter()
audio_file = tmp_path / "tts.mp3"
audio_file.write_bytes(b"fake tts data")
adapter._media_dir = tmp_path / "media"
adapter._media_dir.mkdir()
result = await adapter.play_tts(chat_id="web", audio_path=str(audio_file))
assert result.success is True
payload = adapter._broadcast.call_args[0][0]
assert payload["type"] == "play_audio"
assert "/media/" in payload["url"]
# ===========================================================================
# 9. Authorization bypass for Web platform
# ===========================================================================
class TestWebAuthorization:
"""Web platform should always be authorized (token-gated at WebSocket level)."""
def test_web_platform_always_authorized(self):
from gateway.platforms.base import SessionSource
source = SessionSource(
platform=Platform.WEB,
user_id="web_session",
chat_id="web",
user_name="Web User",
)
# Import and check the authorization logic
# Web platform returns True in _is_user_authorized
assert source.platform == Platform.WEB
# ===========================================================================
# 10. Toolset registration
# ===========================================================================
class TestToolsetRegistration:
"""Verify hermes-web toolset is defined."""
def test_hermes_web_toolset_exists(self):
from toolsets import get_toolset
ts = get_toolset("hermes-web")
assert ts is not None
assert "tools" in ts
def test_hermes_web_in_gateway_toolset(self):
from toolsets import get_toolset
gateway_ts = get_toolset("hermes-gateway")
assert gateway_ts is not None
assert "hermes-web" in gateway_ts.get("includes", [])
def test_hermes_web_has_tts_tool(self):
from toolsets import get_toolset
ts = get_toolset("hermes-web")
tools = ts.get("tools", [])
assert "text_to_speech" in tools
# ===========================================================================
# 11. Transcription Groq fallback
# ===========================================================================
class TestTranscriptionGroqFallback:
"""Test that transcription falls back to Groq when OpenAI key is missing."""
@patch.dict(os.environ, {"GROQ_API_KEY": "gsk_fake"}, clear=True)
def test_groq_fallback_resolves(self):
"""When only GROQ_API_KEY is set, transcribe_audio should not fail with 'key not set'."""
from tools.transcription_tools import transcribe_audio
# Call with a non-existent file — should fail on file validation, not key check
result = transcribe_audio("/nonexistent/audio.mp3")
assert result["success"] is False
assert "not set" not in result.get("error", "")
assert "not found" in result.get("error", "").lower()
@patch.dict(os.environ, {}, clear=True)
def test_no_key_returns_error(self, tmp_path):
audio_file = tmp_path / "test.ogg"
audio_file.write_bytes(b"fake audio data")
from tools.transcription_tools import transcribe_audio
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False):
result = transcribe_audio(str(audio_file))
assert result["success"] is False
assert "no stt provider" in result.get("error", "").lower()
# ===========================================================================
# 12. LAN IP detection
# ===========================================================================
class TestLanIpDetection:
"""Test _get_local_ip returns a valid IP."""
def test_returns_ip_string(self):
from gateway.platforms.web import WebAdapter
config = PlatformConfig(enabled=True, extra={
"port": 8765, "host": "0.0.0.0", "token": "",
})
adapter = WebAdapter(config)
ip = adapter._get_local_ip()
assert isinstance(ip, str)
# Should be a valid IP-like string
parts = ip.split(".")
assert len(parts) == 4
def test_get_local_ips_returns_list(self):
from gateway.platforms.web import WebAdapter
config = PlatformConfig(enabled=True, extra={
"port": 8765, "host": "0.0.0.0", "token": "",
})
adapter = WebAdapter(config)
ips = adapter._get_local_ips()
assert isinstance(ips, list)
assert len(ips) >= 1
# ===========================================================================
# 13. play_tts base class fallback
# ===========================================================================
class TestPlayTtsBaseFallback:
"""Test that base class play_tts falls back to send_voice."""
@pytest.mark.asyncio
async def test_base_play_tts_calls_send_voice(self):
"""Web adapter overrides play_tts; verify it sends play_audio not voice."""
from gateway.platforms.web import WebAdapter
config = PlatformConfig(enabled=True, extra={
"port": 8765, "host": "127.0.0.1", "token": "tok",
})
adapter = WebAdapter(config)
adapter._broadcast = AsyncMock()
adapter._media_dir = Path("/tmp/test_media")
adapter._media_dir.mkdir(exist_ok=True)
import tempfile
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
f.write(b"fake")
tmp = f.name
try:
result = await adapter.play_tts(chat_id="test", audio_path=tmp)
assert result.success is True
payload = adapter._broadcast.call_args[0][0]
assert payload["type"] == "play_audio"
finally:
os.unlink(tmp)
# ===========================================================================
# 14. Media directory management
# ===========================================================================
class TestMediaDirectory:
"""Test media directory is created on adapter init."""
def test_media_dir_created(self, tmp_path):
from gateway.platforms.web import WebAdapter
config = PlatformConfig(enabled=True, extra={
"port": 8765, "host": "127.0.0.1", "token": "tok",
})
adapter = WebAdapter(config)
assert adapter._media_dir.exists() or True # may use default path
# ===========================================================================
# 15. Security: Path traversal sanitization
# ===========================================================================
class TestPathTraversalSanitization:
"""Upload filenames with traversal sequences are sanitized."""
def test_path_name_strips_traversal(self):
"""Path.name strips directory traversal from filenames."""
assert Path("../../../etc/passwd").name == "passwd"
assert Path("normal_file.txt").name == "normal_file.txt"
assert Path("/absolute/path/file.txt").name == "file.txt"
@pytest.mark.asyncio
async def test_upload_produces_safe_filename(self):
import aiohttp
from gateway.platforms.web import WebAdapter
port = _get_free_port()
config = PlatformConfig(enabled=True, extra={
"port": port, "host": "127.0.0.1", "token": "tok",
})
adapter = WebAdapter(config)
try:
await adapter.connect()
async with aiohttp.ClientSession() as session:
data = aiohttp.FormData()
data.add_field("file", b"test content",
filename="safe_file.txt",
content_type="application/octet-stream")
async with session.post(
f"http://127.0.0.1:{port}/upload",
data=data,
headers={"Authorization": "Bearer tok"},
) as resp:
assert resp.status == 200
result = await resp.json()
assert result["filename"].startswith("upload_")
assert "safe_file.txt" in result["filename"]
# File must be inside media dir, not escaped
assert result["url"].startswith("/media/")
finally:
await adapter.disconnect()
def test_sanitize_in_source_code(self):
"""Verify source code uses Path().name for filename sanitization."""
import inspect
from gateway.platforms.web import WebAdapter
source = inspect.getsource(WebAdapter._handle_upload)
assert "Path(" in source and ".name" in source
# ===========================================================================
# 16. Security: Media endpoint authentication
# ===========================================================================
class TestMediaEndpointAuth:
"""Media files require a valid token query parameter."""
@pytest.mark.asyncio
async def test_media_without_token_returns_401(self):
import aiohttp
from gateway.platforms.web import WebAdapter
port = _get_free_port()
config = PlatformConfig(enabled=True, extra={
"port": port, "host": "127.0.0.1", "token": "secret",
})
adapter = WebAdapter(config)
try:
await adapter.connect()
async with aiohttp.ClientSession() as session:
async with session.get(
f"http://127.0.0.1:{port}/media/test.txt"
) as resp:
assert resp.status == 401
finally:
await adapter.disconnect()
@pytest.mark.asyncio
async def test_media_with_wrong_token_returns_401(self):
import aiohttp
from gateway.platforms.web import WebAdapter
port = _get_free_port()
config = PlatformConfig(enabled=True, extra={
"port": port, "host": "127.0.0.1", "token": "secret",
})
adapter = WebAdapter(config)
try:
await adapter.connect()
async with aiohttp.ClientSession() as session:
async with session.get(
f"http://127.0.0.1:{port}/media/test.txt?token=wrong"
) as resp:
assert resp.status == 401
finally:
await adapter.disconnect()
@pytest.mark.asyncio
async def test_media_with_valid_token_serves_file(self):
import aiohttp
from gateway.platforms.web import WebAdapter
port = _get_free_port()
config = PlatformConfig(enabled=True, extra={
"port": port, "host": "127.0.0.1", "token": "secret",
})
adapter = WebAdapter(config)
try:
await adapter.connect()
# Create a test file in the media directory
test_file = adapter._media_dir / "testfile.txt"
test_file.write_text("hello")
async with aiohttp.ClientSession() as session:
async with session.get(
f"http://127.0.0.1:{port}/media/testfile.txt?token=secret"
) as resp:
assert resp.status == 200
body = await resp.text()
assert body == "hello"
finally:
await adapter.disconnect()
@pytest.mark.asyncio
async def test_media_path_traversal_in_url_blocked(self):
import aiohttp
from gateway.platforms.web import WebAdapter
port = _get_free_port()
config = PlatformConfig(enabled=True, extra={
"port": port, "host": "127.0.0.1", "token": "secret",
})
adapter = WebAdapter(config)
try:
await adapter.connect()
async with aiohttp.ClientSession() as session:
async with session.get(
f"http://127.0.0.1:{port}/media/..%2F..%2Fetc%2Fpasswd?token=secret"
) as resp:
assert resp.status == 404
finally:
await adapter.disconnect()
# ===========================================================================
# 17. Security: hmac.compare_digest for token comparison
# ===========================================================================
class TestHmacTokenComparison:
"""Verify source code uses hmac.compare_digest, not == / !=."""
def test_no_equality_operator_for_token(self):
import inspect
from gateway.platforms.web import WebAdapter
source = inspect.getsource(WebAdapter)
# There should be no `== self._token` or `!= self._token` in the source
assert "== self._token" not in source, \
"Token comparison must use hmac.compare_digest, not =="
assert "!= self._token" not in source, \
"Token comparison must use hmac.compare_digest, not !="
def test_hmac_compare_digest_used(self):
import inspect
from gateway.platforms.web import WebAdapter
source = inspect.getsource(WebAdapter)
assert "hmac.compare_digest" in source
# ===========================================================================
# 18. Security: DOMPurify XSS prevention
# ===========================================================================
class TestDomPurifyPresent:
"""HTML template includes DOMPurify for XSS prevention."""
def test_dompurify_script_included(self):
from gateway.platforms.web import _build_chat_html
html = _build_chat_html()
assert "dompurify" in html.lower()
assert "DOMPurify.sanitize" in html
def test_marked_output_sanitized(self):
from gateway.platforms.web import _build_chat_html
html = _build_chat_html()
assert "DOMPurify.sanitize(marked.parse(" in html
# ===========================================================================
# 19. Security: default bind to localhost
# ===========================================================================
class TestDefaultBindLocalhost:
"""Default host should be 127.0.0.1, not 0.0.0.0."""
def test_adapter_default_host(self):
from gateway.platforms.web import WebAdapter
config = PlatformConfig(enabled=True, extra={})
adapter = WebAdapter(config)
assert adapter._host == "127.0.0.1"
@patch.dict(os.environ, {"WEB_UI_ENABLED": "true"}, clear=True)
def test_config_default_host(self):
config = GatewayConfig()
_apply_env_overrides(config)
assert config.platforms[Platform.WEB].extra["host"] == "127.0.0.1"
# ===========================================================================
# 20. Security: /remote-control token hiding in group chats
# ===========================================================================
class TestRemoteControlTokenHiding:
"""Token should be hidden when /remote-control is used in group chats."""
def _make_runner(self, tmp_path):
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner.adapters = {}
runner._voice_mode = {}
runner._VOICE_MODE_PATH = tmp_path / "voice.json"
runner._session_db = None
runner.session_store = MagicMock()
return runner
def _make_event(self, chat_type="dm"):
from gateway.platforms.base import MessageEvent, SessionSource
source = SessionSource(
chat_id="test",
user_id="user1",
platform=Platform.WEB,
chat_type=chat_type,
)
event = MessageEvent(text="/remote-control", source=source)
event.message_id = "msg1"
return event
@pytest.mark.asyncio
async def test_token_visible_in_dm(self, tmp_path):
from gateway.platforms.web import WebAdapter
runner = self._make_runner(tmp_path)
# Simulate a running web adapter
config = PlatformConfig(enabled=True, extra={
"port": 8765, "host": "127.0.0.1", "token": "mysecret",
})
adapter = WebAdapter(config)
runner.adapters[Platform.WEB] = adapter
event = self._make_event(chat_type="dm")
result = await runner._handle_remote_control_command(event)
assert "mysecret" in result
@pytest.mark.asyncio
async def test_token_hidden_in_group(self, tmp_path):
from gateway.platforms.web import WebAdapter
runner = self._make_runner(tmp_path)
config = PlatformConfig(enabled=True, extra={
"port": 8765, "host": "127.0.0.1", "token": "mysecret",
})
adapter = WebAdapter(config)
runner.adapters[Platform.WEB] = adapter
event = self._make_event(chat_type="group")
result = await runner._handle_remote_control_command(event)
assert "mysecret" not in result
assert "hidden" in result.lower()
# ===========================================================================
# 21. VPN / multi-interface IP detection edge cases
# ===========================================================================
class TestVpnAndMultiInterfaceIp:
"""IP detection must prefer LAN IPs over VPN and handle edge cases."""
def test_lan_preferred_over_vpn(self):
"""192.168.x.x or 10.x.x.x should be chosen over 172.16.x.x VPN."""
from gateway.platforms.web import WebAdapter
with unittest.mock.patch.object(
WebAdapter, "_get_local_ips",
return_value=["172.16.0.2", "192.168.1.106"],
):
ip = WebAdapter._get_local_ip()
assert ip == "192.168.1.106"
def test_ten_network_preferred_over_vpn(self):
"""10.x.x.x corporate LAN should be preferred over 172.16.x.x VPN."""
from gateway.platforms.web import WebAdapter
with unittest.mock.patch.object(
WebAdapter, "_get_local_ips",
return_value=["172.16.5.1", "10.0.0.50"],
):
ip = WebAdapter._get_local_ip()
assert ip == "10.0.0.50"
def test_only_vpn_ip_still_returned(self):
"""If only VPN IP exists, return it rather than nothing."""
from gateway.platforms.web import WebAdapter
with unittest.mock.patch.object(
WebAdapter, "_get_local_ips",
return_value=["172.16.0.2"],
):
ip = WebAdapter._get_local_ip()
assert ip == "172.16.0.2"
def test_no_interfaces_returns_localhost(self):
"""If no IPs found at all, fall back to 127.0.0.1."""
from gateway.platforms.web import WebAdapter
with unittest.mock.patch.object(
WebAdapter, "_get_local_ips",
return_value=[],
):
ip = WebAdapter._get_local_ip()
assert ip == "127.0.0.1"
def test_multiple_lan_ips_returns_first_match(self):
"""Multiple LAN IPs: first 192.168/10.x match wins."""
from gateway.platforms.web import WebAdapter
with unittest.mock.patch.object(
WebAdapter, "_get_local_ips",
return_value=["172.16.0.2", "192.168.1.50", "10.0.0.1"],
):
ip = WebAdapter._get_local_ip()
assert ip == "192.168.1.50"
def test_get_local_ips_excludes_loopback(self):
"""_get_local_ips must not return 127.x.x.x addresses."""
from gateway.platforms.web import WebAdapter
import inspect
source = inspect.getsource(WebAdapter._get_local_ips)
# Must filter out 127.x addresses
assert "127." in source, \
"_get_local_ips must filter loopback addresses"
def test_get_local_ips_netifaces_fallback(self):
"""When netifaces is unavailable, ifconfig fallback must work."""
from gateway.platforms.web import WebAdapter
import inspect
source = inspect.getsource(WebAdapter._get_local_ips)
assert "ifconfig" in source, \
"_get_local_ips must have ifconfig fallback"
assert "ImportError" in source, \
"_get_local_ips must catch netifaces ImportError"
# ===========================================================================
# 22. Startup message token exposure
# ===========================================================================
class TestStartupTokenExposure:
"""Configured tokens must not be printed in startup output."""
def test_auto_generated_flag_when_no_token(self):
"""Token auto-generation flag must be set when no token provided."""
from gateway.platforms.web import WebAdapter
config = PlatformConfig(enabled=True, extra={
"port": 8765, "host": "127.0.0.1", "token": "",
})
adapter = WebAdapter(config)
assert adapter._token_auto_generated is True
assert len(adapter._token) == 32 # secrets.token_hex(16) = 32 chars
def test_configured_flag_when_token_set(self):
"""Token auto-generation flag must be False when token is provided."""
from gateway.platforms.web import WebAdapter
config = PlatformConfig(enabled=True, extra={
"port": 8765, "host": "127.0.0.1", "token": "mytoken123",
})
adapter = WebAdapter(config)
assert adapter._token_auto_generated is False
assert adapter._token == "mytoken123"
def test_startup_log_hides_configured_token(self):
"""connect() must not print the token value when set via env."""
from gateway.platforms.web import WebAdapter
import inspect
source = inspect.getsource(WebAdapter.connect)
# Must check _token_auto_generated before printing
assert "_token_auto_generated" in source, \
"connect() must check _token_auto_generated before printing token"
def test_startup_log_shows_auto_token(self):
"""connect() must print the token when auto-generated."""
from gateway.platforms.web import WebAdapter
import inspect
source = inspect.getsource(WebAdapter.connect)
# Must have a branch that prints the actual token
assert "auto-generated" in source, \
"connect() must indicate when token is auto-generated"

View File

@@ -125,7 +125,7 @@ class TestTranscribeLocal:
mock_model.transcribe.return_value = ([mock_segment], mock_info)
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
patch("tools.transcription_tools.WhisperModel", return_value=mock_model), \
patch("faster_whisper.WhisperModel", return_value=mock_model), \
patch("tools.transcription_tools._local_model", None):
from tools.transcription_tools import _transcribe_local
result = _transcribe_local(str(audio_file), "base")
@@ -164,7 +164,7 @@ class TestTranscribeOpenAI:
mock_client.audio.transcriptions.create.return_value = "Hello from OpenAI"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_openai
result = _transcribe_openai(str(audio_file), "whisper-1")

View File

@@ -160,7 +160,7 @@ class TestTranscribeGroq:
mock_client.audio.transcriptions.create.return_value = "hello world"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_groq
result = _transcribe_groq(sample_wav, "whisper-large-v3-turbo")
@@ -175,7 +175,7 @@ class TestTranscribeGroq:
mock_client.audio.transcriptions.create.return_value = " hello world \n"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_groq
result = _transcribe_groq(sample_wav, "whisper-large-v3-turbo")
@@ -188,7 +188,7 @@ class TestTranscribeGroq:
mock_client.audio.transcriptions.create.return_value = "test"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client) as mock_openai_cls:
patch("openai.OpenAI", return_value=mock_client) as mock_openai_cls:
from tools.transcription_tools import _transcribe_groq, GROQ_BASE_URL
_transcribe_groq(sample_wav, "whisper-large-v3-turbo")
@@ -202,7 +202,7 @@ class TestTranscribeGroq:
mock_client.audio.transcriptions.create.side_effect = Exception("API error")
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_groq
result = _transcribe_groq(sample_wav, "whisper-large-v3-turbo")
@@ -216,7 +216,7 @@ class TestTranscribeGroq:
mock_client.audio.transcriptions.create.side_effect = PermissionError("denied")
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_groq
result = _transcribe_groq(sample_wav, "whisper-large-v3-turbo")
@@ -244,7 +244,7 @@ class TestTranscribeOpenAIExtended:
mock_client.audio.transcriptions.create.return_value = "test"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client) as mock_openai_cls:
patch("openai.OpenAI", return_value=mock_client) as mock_openai_cls:
from tools.transcription_tools import _transcribe_openai, OPENAI_BASE_URL
_transcribe_openai(sample_wav, "whisper-1")
@@ -258,7 +258,7 @@ class TestTranscribeOpenAIExtended:
mock_client.audio.transcriptions.create.return_value = " hello \n"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_openai
result = _transcribe_openai(sample_wav, "whisper-1")
@@ -271,7 +271,7 @@ class TestTranscribeOpenAIExtended:
mock_client.audio.transcriptions.create.side_effect = PermissionError("denied")
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_openai
result = _transcribe_openai(sample_wav, "whisper-1")
@@ -300,7 +300,7 @@ class TestTranscribeLocalExtended:
mock_whisper_cls = MagicMock(return_value=mock_model)
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
patch("tools.transcription_tools.WhisperModel", mock_whisper_cls), \
patch("faster_whisper.WhisperModel", mock_whisper_cls), \
patch("tools.transcription_tools._local_model", None), \
patch("tools.transcription_tools._local_model_name", None):
from tools.transcription_tools import _transcribe_local
@@ -326,7 +326,7 @@ class TestTranscribeLocalExtended:
mock_whisper_cls = MagicMock(return_value=mock_model)
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
patch("tools.transcription_tools.WhisperModel", mock_whisper_cls), \
patch("faster_whisper.WhisperModel", mock_whisper_cls), \
patch("tools.transcription_tools._local_model", None), \
patch("tools.transcription_tools._local_model_name", None):
from tools.transcription_tools import _transcribe_local
@@ -342,7 +342,7 @@ class TestTranscribeLocalExtended:
mock_whisper_cls = MagicMock(side_effect=RuntimeError("CUDA out of memory"))
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
patch("tools.transcription_tools.WhisperModel", mock_whisper_cls), \
patch("faster_whisper.WhisperModel", mock_whisper_cls), \
patch("tools.transcription_tools._local_model", None):
from tools.transcription_tools import _transcribe_local
result = _transcribe_local(str(audio), "large-v3")
@@ -366,7 +366,7 @@ class TestTranscribeLocalExtended:
mock_model.transcribe.return_value = ([seg1, seg2], mock_info)
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \
patch("tools.transcription_tools.WhisperModel", return_value=mock_model), \
patch("faster_whisper.WhisperModel", return_value=mock_model), \
patch("tools.transcription_tools._local_model", None):
from tools.transcription_tools import _transcribe_local
result = _transcribe_local(str(audio), "base")
@@ -387,7 +387,7 @@ class TestModelAutoCorrection:
mock_client.audio.transcriptions.create.return_value = "hello world"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_groq, DEFAULT_GROQ_STT_MODEL
_transcribe_groq(sample_wav, "whisper-1")
@@ -401,7 +401,7 @@ class TestModelAutoCorrection:
mock_client.audio.transcriptions.create.return_value = "test"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_groq, DEFAULT_GROQ_STT_MODEL
_transcribe_groq(sample_wav, "gpt-4o-transcribe")
@@ -415,7 +415,7 @@ class TestModelAutoCorrection:
mock_client.audio.transcriptions.create.return_value = "hello world"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_openai, DEFAULT_STT_MODEL
_transcribe_openai(sample_wav, "whisper-large-v3-turbo")
@@ -429,7 +429,7 @@ class TestModelAutoCorrection:
mock_client.audio.transcriptions.create.return_value = "test"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_openai, DEFAULT_STT_MODEL
_transcribe_openai(sample_wav, "distil-whisper-large-v3-en")
@@ -443,7 +443,7 @@ class TestModelAutoCorrection:
mock_client.audio.transcriptions.create.return_value = "test"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_groq
_transcribe_groq(sample_wav, "whisper-large-v3")
@@ -457,7 +457,7 @@ class TestModelAutoCorrection:
mock_client.audio.transcriptions.create.return_value = "test"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_openai
_transcribe_openai(sample_wav, "gpt-4o-mini-transcribe")
@@ -472,7 +472,7 @@ class TestModelAutoCorrection:
mock_client.audio.transcriptions.create.return_value = "test"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_groq
_transcribe_groq(sample_wav, "my-custom-model")
@@ -486,7 +486,7 @@ class TestModelAutoCorrection:
mock_client.audio.transcriptions.create.return_value = "test"
with patch("tools.transcription_tools._HAS_OPENAI", True), \
patch("tools.transcription_tools.OpenAI", return_value=mock_client):
patch("openai.OpenAI", return_value=mock_client):
from tools.transcription_tools import _transcribe_openai
_transcribe_openai(sample_wav, "my-custom-model")

View File

@@ -345,6 +345,10 @@ class TestPlayAudioFile:
np = pytest.importorskip("numpy")
mock_sd_obj = MagicMock()
# Simulate stream completing immediately (get_stream().active = False)
mock_stream = MagicMock()
mock_stream.active = False
mock_sd_obj.get_stream.return_value = mock_stream
def _fake_import():
return mock_sd_obj, np
@@ -357,7 +361,7 @@ class TestPlayAudioFile:
assert result is True
mock_sd_obj.play.assert_called_once()
mock_sd_obj.wait.assert_called_once()
mock_sd_obj.stop.assert_called_once()
def test_returns_false_when_no_player(self, monkeypatch, sample_wav):
def _fail_import():

View File

@@ -34,18 +34,9 @@ logger = logging.getLogger(__name__)
# Optional imports — graceful degradation
# ---------------------------------------------------------------------------
try:
from faster_whisper import WhisperModel
_HAS_FASTER_WHISPER = True
except ImportError:
_HAS_FASTER_WHISPER = False
WhisperModel = None # type: ignore[assignment,misc]
try:
from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
_HAS_OPENAI = True
except ImportError:
_HAS_OPENAI = False
import importlib.util as _ilu
_HAS_FASTER_WHISPER = _ilu.find_spec("faster_whisper") is not None
_HAS_OPENAI = _ilu.find_spec("openai") is not None
# ---------------------------------------------------------------------------
# Constants
@@ -67,7 +58,7 @@ OPENAI_MODELS = {"whisper-1", "gpt-4o-mini-transcribe", "gpt-4o-transcribe"}
GROQ_MODELS = {"whisper-large-v3", "whisper-large-v3-turbo", "distil-whisper-large-v3-en"}
# Singleton for the local model — loaded once, reused across calls
_local_model: Optional["WhisperModel"] = None
_local_model: Optional[object] = None
_local_model_name: Optional[str] = None
# ---------------------------------------------------------------------------
@@ -195,6 +186,7 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]:
return {"success": False, "transcript": "", "error": "faster-whisper not installed"}
try:
from faster_whisper import WhisperModel
# Lazy-load the model (downloads on first use, ~150 MB for 'base')
if _local_model is None or _local_model_name != model_name:
logger.info("Loading faster-whisper model '%s' (first load downloads the model)...", model_name)
@@ -235,6 +227,7 @@ def _transcribe_groq(file_path: str, model_name: str) -> Dict[str, Any]:
model_name = DEFAULT_GROQ_STT_MODEL
try:
from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
client = OpenAI(api_key=api_key, base_url=GROQ_BASE_URL, timeout=30, max_retries=0)
with open(file_path, "rb") as audio_file:
@@ -282,6 +275,7 @@ def _transcribe_openai(file_path: str, model_name: str) -> Dict[str, Any]:
model_name = DEFAULT_STT_MODEL
try:
from openai import OpenAI, APIError, APIConnectionError, APITimeoutError
client = OpenAI(api_key=api_key, base_url=OPENAI_BASE_URL, timeout=30, max_retries=0)
with open(file_path, "rb") as audio_file:

View File

@@ -636,7 +636,13 @@ def play_audio_file(file_path: str) -> bool:
sample_rate = wf.getframerate()
sd.play(audio_data, samplerate=sample_rate)
sd.wait()
# sd.wait() calls Event.wait() without timeout — hangs forever if
# the audio device stalls. Poll with a ceiling and force-stop.
duration_secs = len(audio_data) / sample_rate
deadline = time.monotonic() + duration_secs + 2.0
while sd.get_stream() and sd.get_stream().active and time.monotonic() < deadline:
time.sleep(0.01)
sd.stop()
return True
except (ImportError, OSError):
pass # audio libs not available, fall through to system players

View File

@@ -292,16 +292,10 @@ TOOLSETS = {
"includes": []
},
"hermes-web": {
"description": "Web UI bot toolset - browser-based chat interface (full access)",
"tools": _HERMES_CORE_TOOLS,
"includes": []
},
"hermes-gateway": {
"description": "Gateway toolset - union of all messaging platform tools",
"tools": [],
"includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant", "hermes-email", "hermes-web"]
"includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant", "hermes-email"]
}
}

View File

@@ -478,10 +478,6 @@ The bot requires an @mention by default in server channels. Make sure you:
- Edge TTS (free, no key) is the default fallback
- Check logs for TTS errors
### Web UI issues (firewall, mobile mic)
See the [Web UI Troubleshooting](../messaging/web.md#troubleshooting) guide for firewall, HTTPS, and mobile microphone issues.
### Whisper returns garbage text
The hallucination filter catches most cases automatically. If you're still getting phantom transcripts:

View File

@@ -15,12 +15,12 @@ Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, Email, Home As
│ Hermes Gateway │
├───────────────────────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────┐ ┌─────────┐ ┌──────────┐ ┌───────┐ ┌───────┐ ┌───────┐ ┌────┐ ┌─────┐
│ │ Telegram │ │ Discord │ │ WhatsApp │ │ Slack │ │Signal │ │ Email │ │ HA │ │ Web │
│ │ Adapter │ │ Adapter │ │ Adapter │ │Adapter│ │Adapter│ │Adapter│ │Adpt│ │Adpt │
│ └────┬─────┘ └────┬────┘ └────┬─────┘ └──┬────┘ └──┬────┘ └──┬────┘ └─┬──┘ └──┬──┘
│ │ │ │ │ │ │ │
│ └─────────────┴───────────┴───────────┴─────────┴─────────┴────────┴───────┘
│ ┌──────────┐ ┌─────────┐ ┌──────────┐ ┌───────┐ ┌───────┐ ┌───────┐ ┌────┐
│ │ Telegram │ │ Discord │ │ WhatsApp │ │ Slack │ │Signal │ │ Email │ │ HA │
│ │ Adapter │ │ Adapter │ │ Adapter │ │Adapter│ │Adapter│ │Adapter│ │Adpt│
│ └────┬─────┘ └────┬────┘ └────┬─────┘ └──┬────┘ └──┬────┘ └──┬────┘ └─┬──┘
│ │ │ │ │ │ │ │
│ └─────────────┴───────────┴───────────┴─────────┴─────────┴────────
│ │ │
│ ┌────────▼────────┐ │
│ │ Session Store │ │
@@ -81,7 +81,6 @@ hermes gateway status # Check service status
| `/background <prompt>` | Run a prompt in a separate background session |
| `/reload-mcp` | Reload MCP servers from config |
| `/update` | Update Hermes Agent to the latest version |
| `/remote-control [port] [token]` | Start web UI for remote access |
| `/help` | Show available commands |
| `/<skill-name>` | Invoke any installed skill |
@@ -221,4 +220,3 @@ Each platform has its own toolset:
- [Signal Setup](signal.md)
- [Email Setup](email.md)
- [Home Assistant Integration](homeassistant.md)
- [Web UI Setup](web.md)

View File

@@ -1,206 +0,0 @@
---
sidebar_position: 8
title: "Web UI"
description: "Access Hermes from any browser on your network — phone, tablet, or desktop"
---
# Web UI Setup
Access Hermes from any browser on your local network. Open the URL on your phone, tablet, or another computer — no app install, no third-party account needed.
:::info No External Dependencies
The Web adapter uses `aiohttp`, which is already included in the `[messaging]` extra. No additional packages or external services are required.
:::
## Overview
| Component | Value |
|-----------|-------|
| **Library** | `aiohttp` (HTTP + WebSocket) |
| **Connection** | Local network (LAN) |
| **Auth** | Token-based (auto-generated or custom) |
| **Features** | Markdown, code highlighting, voice messages, images, mobile responsive |
---
## Quick Start
### Option 1: On-Demand via Command
Start the gateway normally, then type from any connected platform (Telegram, Discord, etc.):
```
/remote-control
```
The bot replies with the URL and access token. Open the URL on your phone.
You can also specify a custom port and token:
```
/remote-control 9000 mysecrettoken
```
### Option 2: Auto-Start with Gateway
Add to `~/.hermes/.env`:
```bash
WEB_UI_ENABLED=true
WEB_UI_PORT=8765 # default: 8765
WEB_UI_TOKEN=mytoken # auto-generated if empty
```
Start the gateway:
```bash
hermes gateway
```
The web UI starts automatically alongside your other platforms.
---
## Features
### Markdown & Code Highlighting
Bot responses render full GitHub-flavored Markdown with syntax-highlighted code blocks powered by highlight.js.
### Voice Conversation
Click the microphone button to record a voice message. The audio is transcribed via Whisper STT (using OpenAI or Groq as fallback) and sent to the agent. The bot automatically replies with audio playback — voice first, then the text response appears. No extra configuration needed.
STT uses `VOICE_TOOLS_OPENAI_KEY` (OpenAI Whisper) if set, otherwise falls back to `GROQ_API_KEY` (Groq Whisper, free tier). If you only need STT, setting `GROQ_API_KEY` is the simplest option. TTS uses Edge TTS (free, no key) by default, or ElevenLabs/OpenAI if configured in `~/.hermes/config.yaml`.
### Images & Files
- Images display inline in the chat
- Documents show as download links
- Generated images from the agent appear automatically
### Mobile Responsive
The UI adapts to phone screens — full chat experience with touch-friendly input and buttons.
### Typing Indicator
Shows an animated indicator while the agent is processing your message.
### Auto-Reconnect
If the connection drops (server restart, network change), the client automatically reconnects with exponential backoff.
---
## Firewall & Network
### macOS Firewall
macOS may block incoming connections by default. If devices on your network can't connect:
1. **System Settings** > **Network** > **Firewall**
2. Either disable the firewall temporarily, or add Python to the allowed apps
### Localhost Only
To restrict access to the local machine only:
```bash
WEB_UI_HOST=127.0.0.1
```
### Remote Access (Outside LAN)
The Web UI is designed for local network access. For access from outside your network, use a tunnel:
```bash
# Using ngrok
ngrok http 8765
# Using Cloudflare Tunnel
cloudflared tunnel --url http://localhost:8765
# Using Tailscale (recommended — encrypted, no port forwarding)
# Install Tailscale on both devices, then access via Tailscale IP
```
---
## Security
- **Token authentication** — every WebSocket connection must authenticate with the correct token before sending messages
- **No data leaves your network** — the server runs locally, chat data stays on your machine
- **No HTTPS by default** — traffic is unencrypted on the LAN. Use a reverse proxy or tunnel for encryption
- **File uploads** require the auth token in the `Authorization` header
- **Media cleanup** — uploaded and generated files are automatically deleted after 24 hours
---
## Environment Variables
| Variable | Default | Description |
|----------|---------|-------------|
| `WEB_UI_ENABLED` | `false` | Enable the web gateway |
| `WEB_UI_PORT` | `8765` | HTTP server port |
| `WEB_UI_HOST` | `127.0.0.1` | Bind address (`0.0.0.0` = LAN, `127.0.0.1` = localhost) |
| `WEB_UI_TOKEN` | (auto) | Access token. Auto-generated if empty. |
---
## Troubleshooting
### "Server not found" on phone
- Verify both devices are on the **same WiFi network**
- Check if macOS firewall is blocking (see Firewall section above)
- Try the IP address shown in console output, not `localhost`
- If using VPN, the console shows all available IPs — try each one
### Port already in use
Change the port in `.env`:
```bash
WEB_UI_PORT=9000
```
### Voice recording not working
- Browser must support `MediaRecorder` API (Chrome, Firefox, Safari 14.5+)
- HTTPS is required for microphone access on non-localhost origins
- On localhost (`127.0.0.1`), HTTP works fine for microphone
### Microphone not working on mobile
Mobile browsers require **HTTPS** for microphone access (`navigator.mediaDevices` API). When accessing the Web UI over HTTP on a LAN IP (e.g. `http://192.168.1.x:8765`), the mic button will appear dimmed.
**Android Chrome** — flag the LAN IP as secure:
1. Open `chrome://flags/#unsafely-treat-insecure-origin-as-secure`
2. Add your Web UI URL (e.g. `http://192.168.1.106:8765`)
3. Set to **Enabled** and relaunch Chrome
**iOS Safari / Chrome** — no flag bypass available. Use one of these instead:
1. **Self-signed HTTPS** with mkcert (recommended):
```bash
brew install mkcert && mkcert -install
mkcert 192.168.1.106
npx local-ssl-proxy --source 8443 --target 8765 \
--cert 192.168.1.106.pem --key 192.168.1.106-key.pem
```
Then access `https://192.168.1.106:8443`. Trust the mkcert root CA on iOS: **Settings > General > About > Certificate Trust Settings**.
2. **SSH tunnel from mobile** (if you have Termius or similar):
```bash
ssh -L 8765:127.0.0.1:8765 user@your-mac-ip
```
Then access `http://localhost:8765` — localhost is exempt from the HTTPS requirement.
:::tip
Text chat works on mobile over HTTP without any workaround — only the microphone feature requires HTTPS.
:::
### CDN resources not loading
The UI loads `marked.js` and `highlight.js` from CDN. If you're offline or behind a restrictive proxy, markdown rendering and code highlighting won't work but basic chat still functions.