From 35748a2fb02c6cf8016bcfe481f34abcc64cce0a Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Sat, 14 Mar 2026 09:06:52 +0300 Subject: [PATCH] =?UTF-8?q?fix:=20address=20PR=20review=20round=204=20?= =?UTF-8?q?=E2=80=94=20remove=20web=20UI,=20fix=20audio/import/interface?= =?UTF-8?q?=20issues?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove web UI gateway (web.py, tests, docs, toolset, env vars, Platform.WEB enum) per maintainer request — Nous is building their own official chat UI. Fix 1: Replace sd.wait() with polling pattern in play_audio_file() to prevent indefinite hang when audio device stalls (consistent with play_beep()). Fix 2: Use importlib.util.find_spec() for faster_whisper/openai availability checks instead of module-level imports that trigger heavy native library loading (CUDA/cuDNN) at import time. Fix 3: Remove inspect.signature() hack in _send_voice_reply() — add **kwargs to Telegram send_voice() so all adapters accept metadata uniformly. Fix 4: Make session loading resilient to removed platform enum values — skip entries with unknown platforms instead of crashing the entire gateway. --- .env.example | 7 - gateway/config.py | 16 - gateway/platforms/telegram.py | 1 + gateway/platforms/web.py | 1609 ----------------- gateway/run.py | 83 +- gateway/session.py | 6 +- tests/gateway/test_voice_command.py | 27 - tests/gateway/test_web.py | 926 ---------- tests/tools/test_transcription.py | 4 +- tests/tools/test_transcription_tools.py | 40 +- tests/tools/test_voice_mode.py | 6 +- tools/transcription_tools.py | 20 +- tools/voice_mode.py | 8 +- toolsets.py | 8 +- .../docs/user-guide/features/voice-mode.md | 4 - website/docs/user-guide/messaging/index.md | 14 +- website/docs/user-guide/messaging/web.md | 206 --- 17 files changed, 55 insertions(+), 2930 deletions(-) delete mode 100644 gateway/platforms/web.py delete mode 100644 tests/gateway/test_web.py delete mode 100644 website/docs/user-guide/messaging/web.md diff --git a/.env.example b/.env.example index 641f378d2..fb215afba 100644 --- a/.env.example +++ b/.env.example @@ -213,13 +213,6 @@ VOICE_TOOLS_OPENAI_KEY= # EMAIL_ALLOWED_USERS=your@email.com # EMAIL_HOME_ADDRESS=your@email.com -# Web UI (browser-based chat interface on local network) -# Access from phone/tablet/desktop at http://:8765 -# WEB_UI_ENABLED=false -# WEB_UI_PORT=8765 -# WEB_UI_HOST=127.0.0.1 # Use 0.0.0.0 to expose on LAN -# WEB_UI_TOKEN= # Auto-generated if empty - # Gateway-wide: allow ALL users without an allowlist (default: false = deny) # Only set to true if you intentionally want open access. # GATEWAY_ALLOW_ALL_USERS=false diff --git a/gateway/config.py b/gateway/config.py index 06a411661..e45eede7c 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -31,7 +31,6 @@ class Platform(Enum): SIGNAL = "signal" HOMEASSISTANT = "homeassistant" EMAIL = "email" - WEB = "web" @dataclass @@ -177,9 +176,6 @@ class GatewayConfig: # Email uses extra dict for config (address + imap_host + smtp_host) elif platform == Platform.EMAIL and config.extra.get("address"): connected.append(platform) - # Web UI uses enabled flag only - elif platform == Platform.WEB: - connected.append(platform) return connected def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]: @@ -470,18 +466,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None: name=os.getenv("EMAIL_HOME_ADDRESS_NAME", "Home"), ) - # Web UI - web_enabled = os.getenv("WEB_UI_ENABLED", "").lower() in ("true", "1", "yes") - if web_enabled: - if Platform.WEB not in config.platforms: - config.platforms[Platform.WEB] = PlatformConfig() - config.platforms[Platform.WEB].enabled = True - config.platforms[Platform.WEB].extra.update({ - "port": int(os.getenv("WEB_UI_PORT", "8765")), - "host": os.getenv("WEB_UI_HOST", "") or "127.0.0.1", - "token": os.getenv("WEB_UI_TOKEN", ""), - }) - # Session settings idle_minutes = os.getenv("SESSION_IDLE_MINUTES") if idle_minutes: diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 2a19fde9c..df44733e3 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -311,6 +311,7 @@ class TelegramAdapter(BasePlatformAdapter): caption: Optional[str] = None, reply_to: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, + **kwargs, ) -> SendResult: """Send audio as a native Telegram voice message or audio file.""" if not self._bot: diff --git a/gateway/platforms/web.py b/gateway/platforms/web.py deleted file mode 100644 index 9a5d39fa7..000000000 --- a/gateway/platforms/web.py +++ /dev/null @@ -1,1609 +0,0 @@ -""" -Web platform adapter. - -Provides a browser-based chat interface via HTTP + WebSocket. -Serves a single-page chat UI with markdown rendering, code highlighting, -voice messages, and mobile responsive design. - -No external dependencies beyond aiohttp (already in messaging extra). -""" - -import asyncio -import base64 -import hmac -import json -import logging -import os -import secrets -import shutil -import socket -import time -import uuid -from pathlib import Path -from typing import Dict, List, Optional, Any - -logger = logging.getLogger(__name__) - -try: - from aiohttp import web - AIOHTTP_AVAILABLE = True -except ImportError: - AIOHTTP_AVAILABLE = False - web = None - -import sys -from pathlib import Path as _Path -sys.path.insert(0, str(_Path(__file__).resolve().parents[2])) - -from gateway.config import Platform, PlatformConfig -from gateway.platforms.base import ( - BasePlatformAdapter, - MessageEvent, - MessageType, - SendResult, -) - - -def check_web_requirements() -> bool: - """Check if aiohttp is available.""" - return AIOHTTP_AVAILABLE - - -class WebAdapter(BasePlatformAdapter): - """ - Web-based chat adapter. - - Runs a local HTTP server serving a chat UI. Clients connect via - WebSocket for real-time bidirectional messaging. - """ - - def __init__(self, config: PlatformConfig): - super().__init__(config, Platform.WEB) - self._app: Optional[web.Application] = None - self._runner: Optional[web.AppRunner] = None - self._site: Optional[web.TCPSite] = None - - # Config - self._host: str = config.extra.get("host", "127.0.0.1") - self._port: int = config.extra.get("port", 8765) - configured_token = config.extra.get("token", "") - self._token: str = configured_token or secrets.token_hex(16) - self._token_auto_generated: bool = not configured_token - - # Connected WebSocket clients: session_id -> ws - self._clients: Dict[str, web.WebSocketResponse] = {} - - # Media directory for uploaded/generated files - self._media_dir = Path.home() / ".hermes" / "web_media" - - # Cleanup task handle - self._cleanup_task: Optional[asyncio.Task] = None - - async def connect(self) -> bool: - """Start the HTTP server and begin accepting connections.""" - if not AIOHTTP_AVAILABLE: - return False - - self._media_dir.mkdir(parents=True, exist_ok=True) - - self._app = web.Application(client_max_size=50 * 1024 * 1024) # 50MB upload limit - self._app.router.add_get("/", self._handle_index) - self._app.router.add_get("/ws", self._handle_websocket) - self._app.router.add_post("/upload", self._handle_upload) - self._app.router.add_get("/media/{filename}", self._handle_media) - - self._runner = web.AppRunner(self._app) - await self._runner.setup() - - try: - self._site = web.TCPSite(self._runner, self._host, self._port) - await self._site.start() - except OSError as e: - logger.error("Failed to start web server on %s:%s — %s", self._host, self._port, e) - await self._runner.cleanup() - return False - - self._running = True - self._cleanup_task = asyncio.ensure_future(self._media_cleanup_loop()) - - if self._host in ("0.0.0.0", "::"): - all_ips = self._get_local_ips() - primary_ip = self._get_local_ip() - print(f"[{self.name}] Web UI: http://{primary_ip}:{self._port}") - for ip in all_ips: - if ip != primary_ip: - print(f"[{self.name}] also: http://{ip}:{self._port}") - else: - print(f"[{self.name}] Web UI: http://{self._host}:{self._port}") - print(f"[{self.name}] Set WEB_UI_HOST=0.0.0.0 for phone/tablet access") - if self._token_auto_generated: - print(f"[{self.name}] Access token (auto-generated): {self._token}") - else: - print(f"[{self.name}] Access token: (set via WEB_UI_TOKEN)") - - return True - - async def disconnect(self) -> None: - """Stop the server and close all connections.""" - if self._cleanup_task: - self._cleanup_task.cancel() - self._cleanup_task = None - - for ws in list(self._clients.values()): - try: - await ws.close() - except Exception: - pass - self._clients.clear() - - if self._site: - await self._site.stop() - if self._runner: - await self._runner.cleanup() - - self._running = False - self._app = None - self._runner = None - self._site = None - print(f"[{self.name}] Disconnected") - - async def send( - self, - chat_id: str, - content: str, - reply_to: Optional[str] = None, - metadata: Optional[Dict[str, Any]] = None, - ) -> SendResult: - """Send a text message to all connected clients.""" - msg_id = str(uuid.uuid4())[:8] - payload = { - "type": "message", - "id": msg_id, - "content": content, - "timestamp": time.time(), - } - await self._broadcast(payload) - return SendResult(success=True, message_id=msg_id) - - async def edit_message( - self, chat_id: str, message_id: str, content: str - ) -> SendResult: - """Edit a previously sent message (used for streaming updates).""" - payload = { - "type": "edit", - "id": message_id, - "content": content, - "timestamp": time.time(), - } - await self._broadcast(payload) - return SendResult(success=True, message_id=message_id) - - async def send_typing(self, chat_id: str, metadata=None) -> None: - """Send typing indicator to all clients.""" - await self._broadcast({"type": "typing"}) - - async def send_image( - self, - chat_id: str, - image_url: str, - caption: Optional[str] = None, - reply_to: Optional[str] = None, - ) -> SendResult: - """Send an image to all connected clients.""" - msg_id = str(uuid.uuid4())[:8] - payload = { - "type": "image", - "id": msg_id, - "url": image_url, - "caption": caption or "", - "timestamp": time.time(), - } - await self._broadcast(payload) - return SendResult(success=True, message_id=msg_id) - - async def send_voice( - self, - chat_id: str, - audio_path: str, - caption: Optional[str] = None, - reply_to: Optional[str] = None, - **kwargs, - ) -> SendResult: - """Send a voice message by copying audio to media dir and broadcasting URL.""" - filename = f"voice_{uuid.uuid4().hex[:8]}{Path(audio_path).suffix}" - dest = self._media_dir / filename - try: - shutil.copy2(audio_path, dest) - except Exception as e: - return SendResult(success=False, error=f"Failed to copy audio: {e}") - - msg_id = str(uuid.uuid4())[:8] - payload = { - "type": "voice", - "id": msg_id, - "url": f"/media/{filename}", - "caption": caption or "", - "timestamp": time.time(), - } - await self._broadcast(payload) - return SendResult(success=True, message_id=msg_id) - - async def play_tts( - self, - chat_id: str, - audio_path: str, - **kwargs, - ) -> SendResult: - """Play TTS audio invisibly — no bubble in chat, just audio playback.""" - filename = f"tts_{uuid.uuid4().hex[:8]}{Path(audio_path).suffix}" - dest = self._media_dir / filename - try: - shutil.copy2(audio_path, dest) - except Exception as e: - return SendResult(success=False, error=f"Failed to copy audio: {e}") - - payload = { - "type": "play_audio", - "url": f"/media/{filename}", - } - await self._broadcast(payload) - return SendResult(success=True) - - async def send_image_file( - self, - chat_id: str, - image_path: str, - caption: Optional[str] = None, - reply_to: Optional[str] = None, - ) -> SendResult: - """Send a local image file by copying to media dir.""" - filename = f"img_{uuid.uuid4().hex[:8]}{Path(image_path).suffix}" - dest = self._media_dir / filename - try: - shutil.copy2(image_path, dest) - except Exception as e: - return SendResult(success=False, error=f"Failed to copy image: {e}") - return await self.send_image(chat_id, f"/media/{filename}", caption, reply_to) - - async def send_document( - self, - chat_id: str, - file_path: str, - caption: Optional[str] = None, - file_name: Optional[str] = None, - reply_to: Optional[str] = None, - **kwargs, - ) -> SendResult: - """Send a document file by copying to media dir.""" - orig_name = file_name or Path(file_path).name - safe_name = f"{uuid.uuid4().hex[:8]}_{orig_name}" - dest = self._media_dir / safe_name - try: - shutil.copy2(file_path, dest) - except Exception as e: - return SendResult(success=False, error=f"Failed to copy file: {e}") - - msg_id = str(uuid.uuid4())[:8] - payload = { - "type": "document", - "id": msg_id, - "url": f"/media/{safe_name}", - "filename": orig_name, - "caption": caption or "", - "timestamp": time.time(), - } - await self._broadcast(payload) - return SendResult(success=True, message_id=msg_id) - - async def get_chat_info(self, chat_id: str) -> Dict[str, Any]: - """Return basic chat info for the web session.""" - return {"name": "Web Chat", "type": "dm"} - - # ---- HTTP Handlers ---- - - async def _handle_index(self, request: web.Request) -> web.Response: - """Serve the chat UI HTML page.""" - html = _build_chat_html() - return web.Response(text=html, content_type="text/html") - - async def _handle_websocket(self, request: web.Request) -> web.WebSocketResponse: - """Handle WebSocket connections for real-time chat.""" - ws = web.WebSocketResponse(max_msg_size=50 * 1024 * 1024) - await ws.prepare(request) - - session_id = uuid.uuid4().hex[:12] - authenticated = False - - try: - async for msg in ws: - if msg.type == web.WSMsgType.TEXT: - try: - data = json.loads(msg.data) - except json.JSONDecodeError: - continue - - msg_type = data.get("type", "") - - # Auth handshake - if msg_type == "auth": - if hmac.compare_digest(data.get("token", ""), self._token): - authenticated = True - self._clients[session_id] = ws - await ws.send_str(json.dumps({ - "type": "auth_ok", - "session_id": session_id, - })) - else: - await ws.send_str(json.dumps({ - "type": "auth_fail", - "error": "Invalid token", - })) - continue - - if not authenticated: - await ws.send_str(json.dumps({"type": "auth_required"})) - continue - - # Chat message - if msg_type == "message": - text = data.get("text", "").strip() - if text: - await self._process_user_message(session_id, text) - - # Voice message (base64 audio) - elif msg_type == "voice": - await self._process_voice_message(session_id, data) - - elif msg.type in (web.WSMsgType.ERROR, web.WSMsgType.CLOSE): - break - except Exception as e: - logger.debug("WebSocket session %s error: %s", session_id, e) - finally: - self._clients.pop(session_id, None) - - return ws - - async def _handle_upload(self, request: web.Request) -> web.Response: - """Handle file uploads (images, voice recordings).""" - token = request.headers.get("Authorization", "").replace("Bearer ", "") - if not hmac.compare_digest(token, self._token): - return web.json_response({"error": "Unauthorized"}, status=401) - - reader = await request.multipart() - field = await reader.next() - if not field: - return web.json_response({"error": "No file"}, status=400) - - # Sanitize filename to prevent path traversal attacks - orig_name = Path(field.filename or "file").name - filename = f"upload_{uuid.uuid4().hex[:8]}_{orig_name}" - dest = self._media_dir / filename - - with open(dest, "wb") as f: - while True: - chunk = await field.read_chunk() - if not chunk: - break - f.write(chunk) - - return web.json_response({"url": f"/media/{filename}", "filename": filename}) - - async def _handle_media(self, request: web.Request) -> web.Response: - """Serve media files with token authentication.""" - token = request.query.get("token", "") - if not hmac.compare_digest(token, self._token): - return web.Response(status=401, text="Unauthorized") - - filename = Path(request.match_info["filename"]).name - filepath = self._media_dir / filename - if not filepath.exists() or not filepath.is_file(): - return web.Response(status=404, text="Not found") - - return web.FileResponse(filepath) - - # ---- Message Processing ---- - - async def _process_user_message(self, session_id: str, text: str) -> None: - """Build MessageEvent from user text and feed to handler.""" - msg_type = MessageType.COMMAND if text.startswith("/") else MessageType.TEXT - - source = self.build_source( - chat_id=f"web_{session_id}", - chat_name="Web Chat", - chat_type="dm", - user_id=session_id, - user_name="Web User", - ) - - event = MessageEvent( - text=text, - message_type=msg_type, - source=source, - message_id=uuid.uuid4().hex[:8], - ) - - if self._message_handler: - await self.handle_message(event) - - async def _process_voice_message(self, session_id: str, data: dict) -> None: - """Decode base64 voice audio, transcribe via STT, and process as message.""" - import tempfile - - audio_b64 = data.get("audio", "") - if not audio_b64: - return - - audio_bytes = base64.b64decode(audio_b64) - fmt = data.get("format", "webm") - tmp_path = os.path.join( - tempfile.gettempdir(), - f"web_voice_{uuid.uuid4().hex[:8]}.{fmt}", - ) - - with open(tmp_path, "wb") as f: - f.write(audio_bytes) - - try: - from tools.transcription_tools import transcribe_audio, get_stt_model_from_config - stt_model = get_stt_model_from_config() - result = await asyncio.to_thread(transcribe_audio, tmp_path, model=stt_model) - - if not result.get("success"): - await self._send_to_session(session_id, { - "type": "error", - "error": f"Transcription failed: {result.get('error', 'Unknown')}", - }) - return - - transcript = result.get("transcript", "").strip() - if not transcript: - return - - # Show transcript to user - await self._send_to_session(session_id, { - "type": "transcript", - "text": transcript, - }) - - # Process as voice message - source = self.build_source( - chat_id=f"web_{session_id}", - chat_name="Web Chat", - chat_type="dm", - user_id=session_id, - user_name="Web User", - ) - event = MessageEvent( - text=transcript, - message_type=MessageType.VOICE, - source=source, - message_id=uuid.uuid4().hex[:8], - media_urls=[tmp_path], - media_types=[f"audio/{fmt}"], - ) - if self._message_handler: - await self.handle_message(event) - except Exception as e: - logger.warning("Voice processing failed: %s", e, exc_info=True) - finally: - try: - os.unlink(tmp_path) - except OSError: - pass - - # ---- Internal Utilities ---- - - async def _broadcast(self, payload: dict) -> None: - """Send JSON payload to all connected WebSocket clients.""" - data = json.dumps(payload) - dead: List[str] = [] - for sid, ws in self._clients.items(): - try: - await ws.send_str(data) - except Exception: - dead.append(sid) - for sid in dead: - self._clients.pop(sid, None) - - async def _send_to_session(self, session_id: str, payload: dict) -> None: - """Send a message to a specific client session.""" - ws = self._clients.get(session_id) - if ws: - try: - await ws.send_str(json.dumps(payload)) - except Exception: - self._clients.pop(session_id, None) - - async def _media_cleanup_loop(self) -> None: - """Periodically delete old media files (older than 24h).""" - try: - while self._running: - await asyncio.sleep(3600) - cutoff = time.time() - 86400 - removed = 0 - for f in self._media_dir.iterdir(): - if f.is_file() and f.stat().st_mtime < cutoff: - try: - f.unlink() - removed += 1 - except OSError: - pass - if removed: - logger.debug("Web media cleanup: removed %d old file(s)", removed) - except asyncio.CancelledError: - pass - - @staticmethod - def _get_local_ips() -> List[str]: - """Get all non-loopback IPv4 addresses on this machine.""" - ips = [] - try: - import netifaces - for iface in netifaces.interfaces(): - addrs = netifaces.ifaddresses(iface).get(netifaces.AF_INET, []) - for addr in addrs: - ip = addr.get("addr", "") - if ip and not ip.startswith("127."): - ips.append(ip) - except ImportError: - # Fallback: parse ifconfig output - import subprocess - try: - out = subprocess.check_output(["ifconfig"], text=True, timeout=5) - for line in out.splitlines(): - line = line.strip() - if line.startswith("inet ") and "127.0.0.1" not in line: - parts = line.split() - if len(parts) >= 2: - ips.append(parts[1]) - except Exception: - pass - if not ips: - # Last resort: UDP trick (may return VPN IP) - try: - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - s.connect(("8.8.8.8", 80)) - ips.append(s.getsockname()[0]) - s.close() - except Exception: - ips.append("127.0.0.1") - return ips - - @staticmethod - def _get_local_ip() -> str: - """Get the most likely LAN IP address.""" - ips = WebAdapter._get_local_ips() - # Prefer 192.168.x.x or 10.x.x.x over VPN ranges like 172.16.x.x - for ip in ips: - if ip.startswith("192.168.") or ip.startswith("10."): - return ip - return ips[0] if ips else "127.0.0.1" - - -# --------------------------------------------------------------------------- -# Chat UI HTML -# --------------------------------------------------------------------------- - -def _build_chat_html() -> str: - """Build the complete single-page chat UI as an HTML string.""" - return ''' - - - - -Hermes - - - - - - - - - -
-

Hermes

-

Enter access token to connect

- - -
Invalid token. Try again.
-
- - -
-
-
Hermes
- Connected -
-
-
-
-
- - - -
-
- - - -''' diff --git a/gateway/run.py b/gateway/run.py index 173b2551e..75449d629 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -829,13 +829,6 @@ class GatewayRunner: return None return EmailAdapter(config) - elif platform == Platform.WEB: - from gateway.platforms.web import WebAdapter, check_web_requirements - if not check_web_requirements(): - logger.warning("Web: aiohttp not installed. Run: pip install aiohttp") - return None - return WebAdapter(config) - return None def _is_user_authorized(self, source: SessionSource) -> bool: @@ -855,11 +848,6 @@ class GatewayRunner: if source.platform == Platform.HOMEASSISTANT: return True - # Web UI users are authenticated via token at the WebSocket level. - # No additional allowlist check needed. - if source.platform == Platform.WEB: - return True - user_id = source.user_id if not user_id: return False @@ -978,7 +966,7 @@ class GatewayRunner: "personality", "retry", "undo", "sethome", "set-home", "compress", "usage", "insights", "reload-mcp", "reload_mcp", "update", "title", "resume", "provider", "rollback", - "background", "reasoning", "voice", "remote-control", "remote_control"} + "background", "reasoning", "voice"} if command and command in _known_commands: await self.hooks.emit(f"command:{command}", { "platform": source.platform.value if source.platform else "", @@ -1053,10 +1041,6 @@ class GatewayRunner: if command == "voice": return await self._handle_voice_command(event) - if command in ("remote-control", "remote_control"): - return await self._handle_remote_control_command(event) - - # User-defined quick commands (bypass agent loop, no LLM call) if command: quick_commands = self.config.get("quick_commands", {}) @@ -1741,7 +1725,6 @@ class GatewayRunner: "`/rollback [number]` — List or restore filesystem checkpoints", "`/background ` — Run a prompt in a separate background session", "`/voice [on|off|tts|status]` — Toggle voice reply mode", - "`/remote-control [port] [token]` — Start web UI for remote access", "`/reload-mcp` — Reload MCP servers from config", "`/update` — Update Hermes Agent to the latest version", "`/help` — Show this message", @@ -2415,10 +2398,6 @@ class GatewayRunner: } if event.source.thread_id: send_kwargs["metadata"] = {"thread_id": event.source.thread_id} - import inspect - sig = inspect.signature(adapter.send_voice) - if "metadata" not in sig.parameters: - send_kwargs.pop("metadata", None) await adapter.send_voice(**send_kwargs) except Exception as e: logger.warning("Auto voice reply failed: %s", e, exc_info=True) @@ -2488,62 +2467,6 @@ class GatewayRunner: ) return f"❌ {result['error']}" - async def _handle_remote_control_command(self, event: MessageEvent) -> str: - """Handle /remote-control — start or show the web UI for remote access.""" - from gateway.config import Platform, PlatformConfig - - is_dm = event.source and event.source.chat_type == "dm" - - # Already running? - if Platform.WEB in self.adapters: - adapter = self.adapters[Platform.WEB] - local_ip = adapter._get_local_ip() - token_display = adapter._token if is_dm else "(hidden — use in DM to see token)" - return ( - f"Web UI already running.\n" - f"URL: http://{local_ip}:{adapter._port}\n" - f"Token: {token_display}" - ) - - # Start web adapter on the fly - try: - from gateway.platforms.web import WebAdapter, check_web_requirements - if not check_web_requirements(): - return "Web UI requires aiohttp. Run: pip install aiohttp" - - args = event.get_command_args().strip() - port = 8765 - token = "" - for part in args.split(): - if part.isdigit(): - port = int(part) - elif part and not part.startswith("-"): - token = part - - web_config = PlatformConfig( - enabled=True, - extra={"port": port, "host": "127.0.0.1", "token": token}, - ) - adapter = WebAdapter(web_config) - adapter.set_message_handler(self._handle_message) - - success = await adapter.connect() - if not success: - return f"Failed to start Web UI on port {port}. Port may be in use." - - self.adapters[Platform.WEB] = adapter - local_ip = adapter._get_local_ip() - token_display = adapter._token if is_dm else "(hidden — use in DM to see token)" - return ( - f"Web UI started!\n" - f"URL: http://{local_ip}:{adapter._port}\n" - f"Token: {token_display}\n" - f"Open this URL on your phone or any device on the same network." - ) - except Exception as e: - logger.error("Failed to start web UI: %s", e, exc_info=True) - return f"Failed to start Web UI: {e}" - async def _handle_background_command(self, event: MessageEvent) -> str: """Handle /background — run a prompt in a separate background session. @@ -2607,7 +2530,6 @@ class GatewayRunner: Platform.SIGNAL: "hermes-signal", Platform.HOMEASSISTANT: "hermes-homeassistant", Platform.EMAIL: "hermes-email", - Platform.WEB: "hermes-web", } platform_toolsets_config = {} try: @@ -2629,7 +2551,6 @@ class GatewayRunner: Platform.SIGNAL: "signal", Platform.HOMEASSISTANT: "homeassistant", Platform.EMAIL: "email", - Platform.WEB: "web", }.get(source.platform, "telegram") config_toolsets = platform_toolsets_config.get(platform_config_key) @@ -3517,7 +3438,6 @@ class GatewayRunner: Platform.SIGNAL: "hermes-signal", Platform.HOMEASSISTANT: "hermes-homeassistant", Platform.EMAIL: "hermes-email", - Platform.WEB: "hermes-web", } # Try to load platform_toolsets from config @@ -3542,7 +3462,6 @@ class GatewayRunner: Platform.SIGNAL: "signal", Platform.HOMEASSISTANT: "homeassistant", Platform.EMAIL: "email", - Platform.WEB: "web", }.get(source.platform, "telegram") # Use config override if present (list of toolsets), otherwise hardcoded default diff --git a/gateway/session.py b/gateway/session.py index 3e42db4fe..86e42b595 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -383,7 +383,11 @@ class SessionStore: with open(sessions_file, "r", encoding="utf-8") as f: data = json.load(f) for key, entry_data in data.items(): - self._entries[key] = SessionEntry.from_dict(entry_data) + try: + self._entries[key] = SessionEntry.from_dict(entry_data) + except (ValueError, KeyError): + # Skip entries with unknown/removed platform values + continue except Exception as e: print(f"[gateway] Warning: Failed to load sessions: {e}") diff --git a/tests/gateway/test_voice_command.py b/tests/gateway/test_voice_command.py index 45595d35b..4925f2845 100644 --- a/tests/gateway/test_voice_command.py +++ b/tests/gateway/test_voice_command.py @@ -390,33 +390,6 @@ class TestDiscordPlayTtsSkip: # Web play_tts sends play_audio (not voice bubble) # ===================================================================== -class TestWebPlayTts: - """Web adapter play_tts sends invisible play_audio, not a voice bubble.""" - - @pytest.mark.asyncio - async def test_play_tts_sends_play_audio(self, tmp_path): - from gateway.platforms.web import WebAdapter - from gateway.config import PlatformConfig - - config = PlatformConfig(enabled=True, extra={ - "port": 0, "host": "127.0.0.1", "token": "tok", - }) - adapter = WebAdapter(config) - adapter._broadcast = AsyncMock() - adapter._media_dir = tmp_path / "media" - adapter._media_dir.mkdir() - - audio_file = tmp_path / "test.ogg" - audio_file.write_bytes(b"fake audio") - - result = await adapter.play_tts(chat_id="web", audio_path=str(audio_file)) - assert result.success is True - - payload = adapter._broadcast.call_args[0][0] - assert payload["type"] == "play_audio" - assert "/media/" in payload["url"] - - # ===================================================================== # Help text + known commands # ===================================================================== diff --git a/tests/gateway/test_web.py b/tests/gateway/test_web.py deleted file mode 100644 index 141b0fa10..000000000 --- a/tests/gateway/test_web.py +++ /dev/null @@ -1,926 +0,0 @@ -"""Tests for the Web UI gateway platform adapter. - -Covers: -1. Platform enum exists with correct value -2. Config loading from env vars via _apply_env_overrides -3. WebAdapter init and config parsing (port, host, token) -4. Token auto-generation when not provided -5. check_web_requirements function -6. HTTP server start/stop (connect/disconnect) -7. Auth screen served on GET / -8. Media directory creation and cleanup -9. WebSocket auth handshake (auth_ok / auth_fail) -10. WebSocket message routing (text, voice) -11. Auto-TTS play_tts sends invisible playback -12. Authorization bypass (Web platform always authorized) -13. Toolset registration (hermes-web in toolset maps) -14. LAN IP detection (_get_local_ip / _get_local_ips) -15. Security: path traversal sanitization -16. Security: media endpoint authentication -17. Security: hmac.compare_digest for token comparison -18. Security: DOMPurify XSS prevention -19. Security: default bind to 127.0.0.1 -20. Security: /remote-control token hiding in group chats -21. Network: VPN/multi-interface IP detection edge cases -22. Network: startup message token exposure -""" - -import asyncio -import json -import os -import unittest -from pathlib import Path -from unittest.mock import patch, MagicMock, AsyncMock - -import pytest - -from gateway.config import GatewayConfig, Platform, PlatformConfig, _apply_env_overrides -from gateway.platforms.base import SendResult - - -# =========================================================================== -# 1. Platform Enum -# =========================================================================== - - -class TestPlatformEnum(unittest.TestCase): - """Verify WEB is in the Platform enum.""" - - def test_web_in_platform_enum(self): - self.assertEqual(Platform.WEB.value, "web") - - def test_web_distinct_from_others(self): - platforms = [p.value for p in Platform] - self.assertIn("web", platforms) - self.assertEqual(platforms.count("web"), 1) - - -# =========================================================================== -# 2. Config loading from env vars -# =========================================================================== - - -class TestConfigEnvOverrides(unittest.TestCase): - """Verify web UI config is loaded from environment variables.""" - - @patch.dict(os.environ, { - "WEB_UI_ENABLED": "true", - "WEB_UI_PORT": "9000", - "WEB_UI_HOST": "127.0.0.1", - "WEB_UI_TOKEN": "mytoken", - }, clear=False) - def test_web_config_loaded_from_env(self): - config = GatewayConfig() - _apply_env_overrides(config) - self.assertIn(Platform.WEB, config.platforms) - self.assertTrue(config.platforms[Platform.WEB].enabled) - self.assertEqual(config.platforms[Platform.WEB].extra["port"], 9000) - self.assertEqual(config.platforms[Platform.WEB].extra["host"], "127.0.0.1") - self.assertEqual(config.platforms[Platform.WEB].extra["token"], "mytoken") - - @patch.dict(os.environ, { - "WEB_UI_ENABLED": "true", - "WEB_UI_TOKEN": "", - "WEB_UI_HOST": "", - }, clear=False) - def test_web_defaults(self): - config = GatewayConfig() - _apply_env_overrides(config) - self.assertIn(Platform.WEB, config.platforms) - self.assertEqual(config.platforms[Platform.WEB].extra["port"], 8765) - self.assertEqual(config.platforms[Platform.WEB].extra["host"], "127.0.0.1") - self.assertEqual(config.platforms[Platform.WEB].extra["token"], "") - - @patch.dict(os.environ, {}, clear=True) - def test_web_not_loaded_without_env(self): - config = GatewayConfig() - _apply_env_overrides(config) - self.assertNotIn(Platform.WEB, config.platforms) - - @patch.dict(os.environ, {"WEB_UI_ENABLED": "false"}, clear=False) - def test_web_not_loaded_when_disabled(self): - config = GatewayConfig() - _apply_env_overrides(config) - self.assertNotIn(Platform.WEB, config.platforms) - - -# =========================================================================== -# 3. WebAdapter init -# =========================================================================== - - -class TestWebAdapterInit: - """Test adapter initialization and config parsing.""" - - def _make_adapter(self, **extra): - from gateway.platforms.web import WebAdapter - defaults = {"port": 8765, "host": "0.0.0.0", "token": ""} - defaults.update(extra) - config = PlatformConfig(enabled=True, extra=defaults) - return WebAdapter(config) - - def test_default_port(self): - adapter = self._make_adapter() - assert adapter._port == 8765 - - def test_custom_port(self): - adapter = self._make_adapter(port=9999) - assert adapter._port == 9999 - - def test_custom_host(self): - adapter = self._make_adapter(host="127.0.0.1") - assert adapter._host == "127.0.0.1" - - def test_explicit_token(self): - adapter = self._make_adapter(token="secret123") - assert adapter._token == "secret123" - - def test_auto_generated_token(self): - adapter = self._make_adapter(token="") - assert len(adapter._token) > 0 - assert adapter._token != "" - - def test_name_property(self): - adapter = self._make_adapter() - assert adapter.name == "Web" - - -# =========================================================================== -# 4. check_web_requirements -# =========================================================================== - - -class TestCheckRequirements: - def test_aiohttp_available(self): - from gateway.platforms.web import check_web_requirements - # aiohttp is installed in the test env - assert check_web_requirements() is True - - -# =========================================================================== -# 5. HTTP server connect/disconnect -# =========================================================================== - - -def _get_free_port(): - """Get a free port from the OS.""" - import socket - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.bind(("127.0.0.1", 0)) - return s.getsockname()[1] - - -class TestServerLifecycle: - """Test that the aiohttp server starts and stops correctly.""" - - def _make_adapter(self): - from gateway.platforms.web import WebAdapter - port = _get_free_port() - config = PlatformConfig(enabled=True, extra={ - "port": port, "host": "127.0.0.1", "token": "test", - }) - return WebAdapter(config) - - @pytest.mark.asyncio - async def test_connect_starts_server(self): - adapter = self._make_adapter() - try: - result = await adapter.connect() - assert result is True - assert adapter._runner is not None - finally: - await adapter.disconnect() - - @pytest.mark.asyncio - async def test_disconnect_stops_server(self): - adapter = self._make_adapter() - await adapter.connect() - await adapter.disconnect() - assert adapter._runner is None or True # cleanup done - - @pytest.mark.asyncio - async def test_serves_html_on_get(self): - import aiohttp - adapter = self._make_adapter() - try: - await adapter.connect() - port = adapter._port - async with aiohttp.ClientSession() as session: - async with session.get(f"http://127.0.0.1:{port}/") as resp: - assert resp.status == 200 - text = await resp.text() - assert "Hermes" in text - assert "= 1 - - -# =========================================================================== -# 13. play_tts base class fallback -# =========================================================================== - - -class TestPlayTtsBaseFallback: - """Test that base class play_tts falls back to send_voice.""" - - @pytest.mark.asyncio - async def test_base_play_tts_calls_send_voice(self): - """Web adapter overrides play_tts; verify it sends play_audio not voice.""" - from gateway.platforms.web import WebAdapter - config = PlatformConfig(enabled=True, extra={ - "port": 8765, "host": "127.0.0.1", "token": "tok", - }) - adapter = WebAdapter(config) - adapter._broadcast = AsyncMock() - adapter._media_dir = Path("/tmp/test_media") - adapter._media_dir.mkdir(exist_ok=True) - - import tempfile - with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: - f.write(b"fake") - tmp = f.name - try: - result = await adapter.play_tts(chat_id="test", audio_path=tmp) - assert result.success is True - payload = adapter._broadcast.call_args[0][0] - assert payload["type"] == "play_audio" - finally: - os.unlink(tmp) - - -# =========================================================================== -# 14. Media directory management -# =========================================================================== - - -class TestMediaDirectory: - """Test media directory is created on adapter init.""" - - def test_media_dir_created(self, tmp_path): - from gateway.platforms.web import WebAdapter - config = PlatformConfig(enabled=True, extra={ - "port": 8765, "host": "127.0.0.1", "token": "tok", - }) - adapter = WebAdapter(config) - assert adapter._media_dir.exists() or True # may use default path - - -# =========================================================================== -# 15. Security: Path traversal sanitization -# =========================================================================== - - -class TestPathTraversalSanitization: - """Upload filenames with traversal sequences are sanitized.""" - - def test_path_name_strips_traversal(self): - """Path.name strips directory traversal from filenames.""" - assert Path("../../../etc/passwd").name == "passwd" - assert Path("normal_file.txt").name == "normal_file.txt" - assert Path("/absolute/path/file.txt").name == "file.txt" - - @pytest.mark.asyncio - async def test_upload_produces_safe_filename(self): - import aiohttp - from gateway.platforms.web import WebAdapter - - port = _get_free_port() - config = PlatformConfig(enabled=True, extra={ - "port": port, "host": "127.0.0.1", "token": "tok", - }) - adapter = WebAdapter(config) - try: - await adapter.connect() - async with aiohttp.ClientSession() as session: - data = aiohttp.FormData() - data.add_field("file", b"test content", - filename="safe_file.txt", - content_type="application/octet-stream") - async with session.post( - f"http://127.0.0.1:{port}/upload", - data=data, - headers={"Authorization": "Bearer tok"}, - ) as resp: - assert resp.status == 200 - result = await resp.json() - assert result["filename"].startswith("upload_") - assert "safe_file.txt" in result["filename"] - # File must be inside media dir, not escaped - assert result["url"].startswith("/media/") - finally: - await adapter.disconnect() - - def test_sanitize_in_source_code(self): - """Verify source code uses Path().name for filename sanitization.""" - import inspect - from gateway.platforms.web import WebAdapter - source = inspect.getsource(WebAdapter._handle_upload) - assert "Path(" in source and ".name" in source - - -# =========================================================================== -# 16. Security: Media endpoint authentication -# =========================================================================== - - -class TestMediaEndpointAuth: - """Media files require a valid token query parameter.""" - - @pytest.mark.asyncio - async def test_media_without_token_returns_401(self): - import aiohttp - from gateway.platforms.web import WebAdapter - - port = _get_free_port() - config = PlatformConfig(enabled=True, extra={ - "port": port, "host": "127.0.0.1", "token": "secret", - }) - adapter = WebAdapter(config) - try: - await adapter.connect() - async with aiohttp.ClientSession() as session: - async with session.get( - f"http://127.0.0.1:{port}/media/test.txt" - ) as resp: - assert resp.status == 401 - - finally: - await adapter.disconnect() - - @pytest.mark.asyncio - async def test_media_with_wrong_token_returns_401(self): - import aiohttp - from gateway.platforms.web import WebAdapter - - port = _get_free_port() - config = PlatformConfig(enabled=True, extra={ - "port": port, "host": "127.0.0.1", "token": "secret", - }) - adapter = WebAdapter(config) - try: - await adapter.connect() - async with aiohttp.ClientSession() as session: - async with session.get( - f"http://127.0.0.1:{port}/media/test.txt?token=wrong" - ) as resp: - assert resp.status == 401 - finally: - await adapter.disconnect() - - @pytest.mark.asyncio - async def test_media_with_valid_token_serves_file(self): - import aiohttp - from gateway.platforms.web import WebAdapter - - port = _get_free_port() - config = PlatformConfig(enabled=True, extra={ - "port": port, "host": "127.0.0.1", "token": "secret", - }) - adapter = WebAdapter(config) - try: - await adapter.connect() - # Create a test file in the media directory - test_file = adapter._media_dir / "testfile.txt" - test_file.write_text("hello") - - async with aiohttp.ClientSession() as session: - async with session.get( - f"http://127.0.0.1:{port}/media/testfile.txt?token=secret" - ) as resp: - assert resp.status == 200 - body = await resp.text() - assert body == "hello" - finally: - await adapter.disconnect() - - @pytest.mark.asyncio - async def test_media_path_traversal_in_url_blocked(self): - import aiohttp - from gateway.platforms.web import WebAdapter - - port = _get_free_port() - config = PlatformConfig(enabled=True, extra={ - "port": port, "host": "127.0.0.1", "token": "secret", - }) - adapter = WebAdapter(config) - try: - await adapter.connect() - async with aiohttp.ClientSession() as session: - async with session.get( - f"http://127.0.0.1:{port}/media/..%2F..%2Fetc%2Fpasswd?token=secret" - ) as resp: - assert resp.status == 404 - finally: - await adapter.disconnect() - - -# =========================================================================== -# 17. Security: hmac.compare_digest for token comparison -# =========================================================================== - - -class TestHmacTokenComparison: - """Verify source code uses hmac.compare_digest, not == / !=.""" - - def test_no_equality_operator_for_token(self): - import inspect - from gateway.platforms.web import WebAdapter - source = inspect.getsource(WebAdapter) - # There should be no `== self._token` or `!= self._token` in the source - assert "== self._token" not in source, \ - "Token comparison must use hmac.compare_digest, not ==" - assert "!= self._token" not in source, \ - "Token comparison must use hmac.compare_digest, not !=" - - def test_hmac_compare_digest_used(self): - import inspect - from gateway.platforms.web import WebAdapter - source = inspect.getsource(WebAdapter) - assert "hmac.compare_digest" in source - - -# =========================================================================== -# 18. Security: DOMPurify XSS prevention -# =========================================================================== - - -class TestDomPurifyPresent: - """HTML template includes DOMPurify for XSS prevention.""" - - def test_dompurify_script_included(self): - from gateway.platforms.web import _build_chat_html - html = _build_chat_html() - assert "dompurify" in html.lower() - assert "DOMPurify.sanitize" in html - - def test_marked_output_sanitized(self): - from gateway.platforms.web import _build_chat_html - html = _build_chat_html() - assert "DOMPurify.sanitize(marked.parse(" in html - - -# =========================================================================== -# 19. Security: default bind to localhost -# =========================================================================== - - -class TestDefaultBindLocalhost: - """Default host should be 127.0.0.1, not 0.0.0.0.""" - - def test_adapter_default_host(self): - from gateway.platforms.web import WebAdapter - config = PlatformConfig(enabled=True, extra={}) - adapter = WebAdapter(config) - assert adapter._host == "127.0.0.1" - - @patch.dict(os.environ, {"WEB_UI_ENABLED": "true"}, clear=True) - def test_config_default_host(self): - config = GatewayConfig() - _apply_env_overrides(config) - assert config.platforms[Platform.WEB].extra["host"] == "127.0.0.1" - - -# =========================================================================== -# 20. Security: /remote-control token hiding in group chats -# =========================================================================== - - -class TestRemoteControlTokenHiding: - """Token should be hidden when /remote-control is used in group chats.""" - - def _make_runner(self, tmp_path): - from gateway.run import GatewayRunner - runner = object.__new__(GatewayRunner) - runner.adapters = {} - runner._voice_mode = {} - runner._VOICE_MODE_PATH = tmp_path / "voice.json" - runner._session_db = None - runner.session_store = MagicMock() - return runner - - def _make_event(self, chat_type="dm"): - from gateway.platforms.base import MessageEvent, SessionSource - source = SessionSource( - chat_id="test", - user_id="user1", - platform=Platform.WEB, - chat_type=chat_type, - ) - event = MessageEvent(text="/remote-control", source=source) - event.message_id = "msg1" - return event - - @pytest.mark.asyncio - async def test_token_visible_in_dm(self, tmp_path): - from gateway.platforms.web import WebAdapter - runner = self._make_runner(tmp_path) - # Simulate a running web adapter - config = PlatformConfig(enabled=True, extra={ - "port": 8765, "host": "127.0.0.1", "token": "mysecret", - }) - adapter = WebAdapter(config) - runner.adapters[Platform.WEB] = adapter - event = self._make_event(chat_type="dm") - result = await runner._handle_remote_control_command(event) - assert "mysecret" in result - - @pytest.mark.asyncio - async def test_token_hidden_in_group(self, tmp_path): - from gateway.platforms.web import WebAdapter - runner = self._make_runner(tmp_path) - config = PlatformConfig(enabled=True, extra={ - "port": 8765, "host": "127.0.0.1", "token": "mysecret", - }) - adapter = WebAdapter(config) - runner.adapters[Platform.WEB] = adapter - event = self._make_event(chat_type="group") - result = await runner._handle_remote_control_command(event) - assert "mysecret" not in result - assert "hidden" in result.lower() - - -# =========================================================================== -# 21. VPN / multi-interface IP detection edge cases -# =========================================================================== - -class TestVpnAndMultiInterfaceIp: - """IP detection must prefer LAN IPs over VPN and handle edge cases.""" - - def test_lan_preferred_over_vpn(self): - """192.168.x.x or 10.x.x.x should be chosen over 172.16.x.x VPN.""" - from gateway.platforms.web import WebAdapter - with unittest.mock.patch.object( - WebAdapter, "_get_local_ips", - return_value=["172.16.0.2", "192.168.1.106"], - ): - ip = WebAdapter._get_local_ip() - assert ip == "192.168.1.106" - - def test_ten_network_preferred_over_vpn(self): - """10.x.x.x corporate LAN should be preferred over 172.16.x.x VPN.""" - from gateway.platforms.web import WebAdapter - with unittest.mock.patch.object( - WebAdapter, "_get_local_ips", - return_value=["172.16.5.1", "10.0.0.50"], - ): - ip = WebAdapter._get_local_ip() - assert ip == "10.0.0.50" - - def test_only_vpn_ip_still_returned(self): - """If only VPN IP exists, return it rather than nothing.""" - from gateway.platforms.web import WebAdapter - with unittest.mock.patch.object( - WebAdapter, "_get_local_ips", - return_value=["172.16.0.2"], - ): - ip = WebAdapter._get_local_ip() - assert ip == "172.16.0.2" - - def test_no_interfaces_returns_localhost(self): - """If no IPs found at all, fall back to 127.0.0.1.""" - from gateway.platforms.web import WebAdapter - with unittest.mock.patch.object( - WebAdapter, "_get_local_ips", - return_value=[], - ): - ip = WebAdapter._get_local_ip() - assert ip == "127.0.0.1" - - def test_multiple_lan_ips_returns_first_match(self): - """Multiple LAN IPs: first 192.168/10.x match wins.""" - from gateway.platforms.web import WebAdapter - with unittest.mock.patch.object( - WebAdapter, "_get_local_ips", - return_value=["172.16.0.2", "192.168.1.50", "10.0.0.1"], - ): - ip = WebAdapter._get_local_ip() - assert ip == "192.168.1.50" - - def test_get_local_ips_excludes_loopback(self): - """_get_local_ips must not return 127.x.x.x addresses.""" - from gateway.platforms.web import WebAdapter - import inspect - source = inspect.getsource(WebAdapter._get_local_ips) - # Must filter out 127.x addresses - assert "127." in source, \ - "_get_local_ips must filter loopback addresses" - - def test_get_local_ips_netifaces_fallback(self): - """When netifaces is unavailable, ifconfig fallback must work.""" - from gateway.platforms.web import WebAdapter - import inspect - source = inspect.getsource(WebAdapter._get_local_ips) - assert "ifconfig" in source, \ - "_get_local_ips must have ifconfig fallback" - assert "ImportError" in source, \ - "_get_local_ips must catch netifaces ImportError" - - -# =========================================================================== -# 22. Startup message token exposure -# =========================================================================== - -class TestStartupTokenExposure: - """Configured tokens must not be printed in startup output.""" - - def test_auto_generated_flag_when_no_token(self): - """Token auto-generation flag must be set when no token provided.""" - from gateway.platforms.web import WebAdapter - config = PlatformConfig(enabled=True, extra={ - "port": 8765, "host": "127.0.0.1", "token": "", - }) - adapter = WebAdapter(config) - assert adapter._token_auto_generated is True - assert len(adapter._token) == 32 # secrets.token_hex(16) = 32 chars - - def test_configured_flag_when_token_set(self): - """Token auto-generation flag must be False when token is provided.""" - from gateway.platforms.web import WebAdapter - config = PlatformConfig(enabled=True, extra={ - "port": 8765, "host": "127.0.0.1", "token": "mytoken123", - }) - adapter = WebAdapter(config) - assert adapter._token_auto_generated is False - assert adapter._token == "mytoken123" - - def test_startup_log_hides_configured_token(self): - """connect() must not print the token value when set via env.""" - from gateway.platforms.web import WebAdapter - import inspect - source = inspect.getsource(WebAdapter.connect) - # Must check _token_auto_generated before printing - assert "_token_auto_generated" in source, \ - "connect() must check _token_auto_generated before printing token" - - def test_startup_log_shows_auto_token(self): - """connect() must print the token when auto-generated.""" - from gateway.platforms.web import WebAdapter - import inspect - source = inspect.getsource(WebAdapter.connect) - # Must have a branch that prints the actual token - assert "auto-generated" in source, \ - "connect() must indicate when token is auto-generated" diff --git a/tests/tools/test_transcription.py b/tests/tools/test_transcription.py index 433c9466f..fe3b24a8d 100644 --- a/tests/tools/test_transcription.py +++ b/tests/tools/test_transcription.py @@ -125,7 +125,7 @@ class TestTranscribeLocal: mock_model.transcribe.return_value = ([mock_segment], mock_info) with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \ - patch("tools.transcription_tools.WhisperModel", return_value=mock_model), \ + patch("faster_whisper.WhisperModel", return_value=mock_model), \ patch("tools.transcription_tools._local_model", None): from tools.transcription_tools import _transcribe_local result = _transcribe_local(str(audio_file), "base") @@ -164,7 +164,7 @@ class TestTranscribeOpenAI: mock_client.audio.transcriptions.create.return_value = "Hello from OpenAI" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_openai result = _transcribe_openai(str(audio_file), "whisper-1") diff --git a/tests/tools/test_transcription_tools.py b/tests/tools/test_transcription_tools.py index 384802b46..2f5b7cfbe 100644 --- a/tests/tools/test_transcription_tools.py +++ b/tests/tools/test_transcription_tools.py @@ -160,7 +160,7 @@ class TestTranscribeGroq: mock_client.audio.transcriptions.create.return_value = "hello world" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_groq result = _transcribe_groq(sample_wav, "whisper-large-v3-turbo") @@ -175,7 +175,7 @@ class TestTranscribeGroq: mock_client.audio.transcriptions.create.return_value = " hello world \n" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_groq result = _transcribe_groq(sample_wav, "whisper-large-v3-turbo") @@ -188,7 +188,7 @@ class TestTranscribeGroq: mock_client.audio.transcriptions.create.return_value = "test" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client) as mock_openai_cls: + patch("openai.OpenAI", return_value=mock_client) as mock_openai_cls: from tools.transcription_tools import _transcribe_groq, GROQ_BASE_URL _transcribe_groq(sample_wav, "whisper-large-v3-turbo") @@ -202,7 +202,7 @@ class TestTranscribeGroq: mock_client.audio.transcriptions.create.side_effect = Exception("API error") with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_groq result = _transcribe_groq(sample_wav, "whisper-large-v3-turbo") @@ -216,7 +216,7 @@ class TestTranscribeGroq: mock_client.audio.transcriptions.create.side_effect = PermissionError("denied") with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_groq result = _transcribe_groq(sample_wav, "whisper-large-v3-turbo") @@ -244,7 +244,7 @@ class TestTranscribeOpenAIExtended: mock_client.audio.transcriptions.create.return_value = "test" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client) as mock_openai_cls: + patch("openai.OpenAI", return_value=mock_client) as mock_openai_cls: from tools.transcription_tools import _transcribe_openai, OPENAI_BASE_URL _transcribe_openai(sample_wav, "whisper-1") @@ -258,7 +258,7 @@ class TestTranscribeOpenAIExtended: mock_client.audio.transcriptions.create.return_value = " hello \n" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_openai result = _transcribe_openai(sample_wav, "whisper-1") @@ -271,7 +271,7 @@ class TestTranscribeOpenAIExtended: mock_client.audio.transcriptions.create.side_effect = PermissionError("denied") with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_openai result = _transcribe_openai(sample_wav, "whisper-1") @@ -300,7 +300,7 @@ class TestTranscribeLocalExtended: mock_whisper_cls = MagicMock(return_value=mock_model) with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \ - patch("tools.transcription_tools.WhisperModel", mock_whisper_cls), \ + patch("faster_whisper.WhisperModel", mock_whisper_cls), \ patch("tools.transcription_tools._local_model", None), \ patch("tools.transcription_tools._local_model_name", None): from tools.transcription_tools import _transcribe_local @@ -326,7 +326,7 @@ class TestTranscribeLocalExtended: mock_whisper_cls = MagicMock(return_value=mock_model) with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \ - patch("tools.transcription_tools.WhisperModel", mock_whisper_cls), \ + patch("faster_whisper.WhisperModel", mock_whisper_cls), \ patch("tools.transcription_tools._local_model", None), \ patch("tools.transcription_tools._local_model_name", None): from tools.transcription_tools import _transcribe_local @@ -342,7 +342,7 @@ class TestTranscribeLocalExtended: mock_whisper_cls = MagicMock(side_effect=RuntimeError("CUDA out of memory")) with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \ - patch("tools.transcription_tools.WhisperModel", mock_whisper_cls), \ + patch("faster_whisper.WhisperModel", mock_whisper_cls), \ patch("tools.transcription_tools._local_model", None): from tools.transcription_tools import _transcribe_local result = _transcribe_local(str(audio), "large-v3") @@ -366,7 +366,7 @@ class TestTranscribeLocalExtended: mock_model.transcribe.return_value = ([seg1, seg2], mock_info) with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \ - patch("tools.transcription_tools.WhisperModel", return_value=mock_model), \ + patch("faster_whisper.WhisperModel", return_value=mock_model), \ patch("tools.transcription_tools._local_model", None): from tools.transcription_tools import _transcribe_local result = _transcribe_local(str(audio), "base") @@ -387,7 +387,7 @@ class TestModelAutoCorrection: mock_client.audio.transcriptions.create.return_value = "hello world" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_groq, DEFAULT_GROQ_STT_MODEL _transcribe_groq(sample_wav, "whisper-1") @@ -401,7 +401,7 @@ class TestModelAutoCorrection: mock_client.audio.transcriptions.create.return_value = "test" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_groq, DEFAULT_GROQ_STT_MODEL _transcribe_groq(sample_wav, "gpt-4o-transcribe") @@ -415,7 +415,7 @@ class TestModelAutoCorrection: mock_client.audio.transcriptions.create.return_value = "hello world" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_openai, DEFAULT_STT_MODEL _transcribe_openai(sample_wav, "whisper-large-v3-turbo") @@ -429,7 +429,7 @@ class TestModelAutoCorrection: mock_client.audio.transcriptions.create.return_value = "test" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_openai, DEFAULT_STT_MODEL _transcribe_openai(sample_wav, "distil-whisper-large-v3-en") @@ -443,7 +443,7 @@ class TestModelAutoCorrection: mock_client.audio.transcriptions.create.return_value = "test" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_groq _transcribe_groq(sample_wav, "whisper-large-v3") @@ -457,7 +457,7 @@ class TestModelAutoCorrection: mock_client.audio.transcriptions.create.return_value = "test" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_openai _transcribe_openai(sample_wav, "gpt-4o-mini-transcribe") @@ -472,7 +472,7 @@ class TestModelAutoCorrection: mock_client.audio.transcriptions.create.return_value = "test" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_groq _transcribe_groq(sample_wav, "my-custom-model") @@ -486,7 +486,7 @@ class TestModelAutoCorrection: mock_client.audio.transcriptions.create.return_value = "test" with patch("tools.transcription_tools._HAS_OPENAI", True), \ - patch("tools.transcription_tools.OpenAI", return_value=mock_client): + patch("openai.OpenAI", return_value=mock_client): from tools.transcription_tools import _transcribe_openai _transcribe_openai(sample_wav, "my-custom-model") diff --git a/tests/tools/test_voice_mode.py b/tests/tools/test_voice_mode.py index 70424fee9..cb86b881f 100644 --- a/tests/tools/test_voice_mode.py +++ b/tests/tools/test_voice_mode.py @@ -345,6 +345,10 @@ class TestPlayAudioFile: np = pytest.importorskip("numpy") mock_sd_obj = MagicMock() + # Simulate stream completing immediately (get_stream().active = False) + mock_stream = MagicMock() + mock_stream.active = False + mock_sd_obj.get_stream.return_value = mock_stream def _fake_import(): return mock_sd_obj, np @@ -357,7 +361,7 @@ class TestPlayAudioFile: assert result is True mock_sd_obj.play.assert_called_once() - mock_sd_obj.wait.assert_called_once() + mock_sd_obj.stop.assert_called_once() def test_returns_false_when_no_player(self, monkeypatch, sample_wav): def _fail_import(): diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py index 09ffb6a7a..a20ba4134 100644 --- a/tools/transcription_tools.py +++ b/tools/transcription_tools.py @@ -34,18 +34,9 @@ logger = logging.getLogger(__name__) # Optional imports — graceful degradation # --------------------------------------------------------------------------- -try: - from faster_whisper import WhisperModel - _HAS_FASTER_WHISPER = True -except ImportError: - _HAS_FASTER_WHISPER = False - WhisperModel = None # type: ignore[assignment,misc] - -try: - from openai import OpenAI, APIError, APIConnectionError, APITimeoutError - _HAS_OPENAI = True -except ImportError: - _HAS_OPENAI = False +import importlib.util as _ilu +_HAS_FASTER_WHISPER = _ilu.find_spec("faster_whisper") is not None +_HAS_OPENAI = _ilu.find_spec("openai") is not None # --------------------------------------------------------------------------- # Constants @@ -67,7 +58,7 @@ OPENAI_MODELS = {"whisper-1", "gpt-4o-mini-transcribe", "gpt-4o-transcribe"} GROQ_MODELS = {"whisper-large-v3", "whisper-large-v3-turbo", "distil-whisper-large-v3-en"} # Singleton for the local model — loaded once, reused across calls -_local_model: Optional["WhisperModel"] = None +_local_model: Optional[object] = None _local_model_name: Optional[str] = None # --------------------------------------------------------------------------- @@ -195,6 +186,7 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]: return {"success": False, "transcript": "", "error": "faster-whisper not installed"} try: + from faster_whisper import WhisperModel # Lazy-load the model (downloads on first use, ~150 MB for 'base') if _local_model is None or _local_model_name != model_name: logger.info("Loading faster-whisper model '%s' (first load downloads the model)...", model_name) @@ -235,6 +227,7 @@ def _transcribe_groq(file_path: str, model_name: str) -> Dict[str, Any]: model_name = DEFAULT_GROQ_STT_MODEL try: + from openai import OpenAI, APIError, APIConnectionError, APITimeoutError client = OpenAI(api_key=api_key, base_url=GROQ_BASE_URL, timeout=30, max_retries=0) with open(file_path, "rb") as audio_file: @@ -282,6 +275,7 @@ def _transcribe_openai(file_path: str, model_name: str) -> Dict[str, Any]: model_name = DEFAULT_STT_MODEL try: + from openai import OpenAI, APIError, APIConnectionError, APITimeoutError client = OpenAI(api_key=api_key, base_url=OPENAI_BASE_URL, timeout=30, max_retries=0) with open(file_path, "rb") as audio_file: diff --git a/tools/voice_mode.py b/tools/voice_mode.py index d5ae94f6a..3afe533a5 100644 --- a/tools/voice_mode.py +++ b/tools/voice_mode.py @@ -636,7 +636,13 @@ def play_audio_file(file_path: str) -> bool: sample_rate = wf.getframerate() sd.play(audio_data, samplerate=sample_rate) - sd.wait() + # sd.wait() calls Event.wait() without timeout — hangs forever if + # the audio device stalls. Poll with a ceiling and force-stop. + duration_secs = len(audio_data) / sample_rate + deadline = time.monotonic() + duration_secs + 2.0 + while sd.get_stream() and sd.get_stream().active and time.monotonic() < deadline: + time.sleep(0.01) + sd.stop() return True except (ImportError, OSError): pass # audio libs not available, fall through to system players diff --git a/toolsets.py b/toolsets.py index 1a80d42b5..221ff2ca8 100644 --- a/toolsets.py +++ b/toolsets.py @@ -292,16 +292,10 @@ TOOLSETS = { "includes": [] }, - "hermes-web": { - "description": "Web UI bot toolset - browser-based chat interface (full access)", - "tools": _HERMES_CORE_TOOLS, - "includes": [] - }, - "hermes-gateway": { "description": "Gateway toolset - union of all messaging platform tools", "tools": [], - "includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant", "hermes-email", "hermes-web"] + "includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant", "hermes-email"] } } diff --git a/website/docs/user-guide/features/voice-mode.md b/website/docs/user-guide/features/voice-mode.md index 65543273b..ce151643a 100644 --- a/website/docs/user-guide/features/voice-mode.md +++ b/website/docs/user-guide/features/voice-mode.md @@ -478,10 +478,6 @@ The bot requires an @mention by default in server channels. Make sure you: - Edge TTS (free, no key) is the default fallback - Check logs for TTS errors -### Web UI issues (firewall, mobile mic) - -See the [Web UI Troubleshooting](../messaging/web.md#troubleshooting) guide for firewall, HTTPS, and mobile microphone issues. - ### Whisper returns garbage text The hallucination filter catches most cases automatically. If you're still getting phantom transcripts: diff --git a/website/docs/user-guide/messaging/index.md b/website/docs/user-guide/messaging/index.md index b018cb9d9..debc841b8 100644 --- a/website/docs/user-guide/messaging/index.md +++ b/website/docs/user-guide/messaging/index.md @@ -15,12 +15,12 @@ Chat with Hermes from Telegram, Discord, Slack, WhatsApp, Signal, Email, Home As │ Hermes Gateway │ ├───────────────────────────────────────────────────────────────────────────────┤ │ │ -│ ┌──────────┐ ┌─────────┐ ┌──────────┐ ┌───────┐ ┌───────┐ ┌───────┐ ┌────┐ ┌─────┐│ -│ │ Telegram │ │ Discord │ │ WhatsApp │ │ Slack │ │Signal │ │ Email │ │ HA │ │ Web ││ -│ │ Adapter │ │ Adapter │ │ Adapter │ │Adapter│ │Adapter│ │Adapter│ │Adpt│ │Adpt ││ -│ └────┬─────┘ └────┬────┘ └────┬─────┘ └──┬────┘ └──┬────┘ └──┬────┘ └─┬──┘ └──┬──┘│ -│ │ │ │ │ │ │ │ │ │ -│ └─────────────┴───────────┴───────────┴─────────┴─────────┴────────┴───────┘ │ +│ ┌──────────┐ ┌─────────┐ ┌──────────┐ ┌───────┐ ┌───────┐ ┌───────┐ ┌────┐ │ +│ │ Telegram │ │ Discord │ │ WhatsApp │ │ Slack │ │Signal │ │ Email │ │ HA │ │ +│ │ Adapter │ │ Adapter │ │ Adapter │ │Adapter│ │Adapter│ │Adapter│ │Adpt│ │ +│ └────┬─────┘ └────┬────┘ └────┬─────┘ └──┬────┘ └──┬────┘ └──┬────┘ └─┬──┘ │ +│ │ │ │ │ │ │ │ │ +│ └─────────────┴───────────┴───────────┴─────────┴─────────┴────────┘ │ │ │ │ │ ┌────────▼────────┐ │ │ │ Session Store │ │ @@ -81,7 +81,6 @@ hermes gateway status # Check service status | `/background ` | Run a prompt in a separate background session | | `/reload-mcp` | Reload MCP servers from config | | `/update` | Update Hermes Agent to the latest version | -| `/remote-control [port] [token]` | Start web UI for remote access | | `/help` | Show available commands | | `/` | Invoke any installed skill | @@ -221,4 +220,3 @@ Each platform has its own toolset: - [Signal Setup](signal.md) - [Email Setup](email.md) - [Home Assistant Integration](homeassistant.md) -- [Web UI Setup](web.md) diff --git a/website/docs/user-guide/messaging/web.md b/website/docs/user-guide/messaging/web.md deleted file mode 100644 index e16c3488f..000000000 --- a/website/docs/user-guide/messaging/web.md +++ /dev/null @@ -1,206 +0,0 @@ ---- -sidebar_position: 8 -title: "Web UI" -description: "Access Hermes from any browser on your network — phone, tablet, or desktop" ---- - -# Web UI Setup - -Access Hermes from any browser on your local network. Open the URL on your phone, tablet, or another computer — no app install, no third-party account needed. - -:::info No External Dependencies -The Web adapter uses `aiohttp`, which is already included in the `[messaging]` extra. No additional packages or external services are required. -::: - -## Overview - -| Component | Value | -|-----------|-------| -| **Library** | `aiohttp` (HTTP + WebSocket) | -| **Connection** | Local network (LAN) | -| **Auth** | Token-based (auto-generated or custom) | -| **Features** | Markdown, code highlighting, voice messages, images, mobile responsive | - ---- - -## Quick Start - -### Option 1: On-Demand via Command - -Start the gateway normally, then type from any connected platform (Telegram, Discord, etc.): - -``` -/remote-control -``` - -The bot replies with the URL and access token. Open the URL on your phone. - -You can also specify a custom port and token: - -``` -/remote-control 9000 mysecrettoken -``` - -### Option 2: Auto-Start with Gateway - -Add to `~/.hermes/.env`: - -```bash -WEB_UI_ENABLED=true -WEB_UI_PORT=8765 # default: 8765 -WEB_UI_TOKEN=mytoken # auto-generated if empty -``` - -Start the gateway: - -```bash -hermes gateway -``` - -The web UI starts automatically alongside your other platforms. - ---- - -## Features - -### Markdown & Code Highlighting - -Bot responses render full GitHub-flavored Markdown with syntax-highlighted code blocks powered by highlight.js. - -### Voice Conversation - -Click the microphone button to record a voice message. The audio is transcribed via Whisper STT (using OpenAI or Groq as fallback) and sent to the agent. The bot automatically replies with audio playback — voice first, then the text response appears. No extra configuration needed. - -STT uses `VOICE_TOOLS_OPENAI_KEY` (OpenAI Whisper) if set, otherwise falls back to `GROQ_API_KEY` (Groq Whisper, free tier). If you only need STT, setting `GROQ_API_KEY` is the simplest option. TTS uses Edge TTS (free, no key) by default, or ElevenLabs/OpenAI if configured in `~/.hermes/config.yaml`. - -### Images & Files - -- Images display inline in the chat -- Documents show as download links -- Generated images from the agent appear automatically - -### Mobile Responsive - -The UI adapts to phone screens — full chat experience with touch-friendly input and buttons. - -### Typing Indicator - -Shows an animated indicator while the agent is processing your message. - -### Auto-Reconnect - -If the connection drops (server restart, network change), the client automatically reconnects with exponential backoff. - ---- - -## Firewall & Network - -### macOS Firewall - -macOS may block incoming connections by default. If devices on your network can't connect: - -1. **System Settings** > **Network** > **Firewall** -2. Either disable the firewall temporarily, or add Python to the allowed apps - -### Localhost Only - -To restrict access to the local machine only: - -```bash -WEB_UI_HOST=127.0.0.1 -``` - -### Remote Access (Outside LAN) - -The Web UI is designed for local network access. For access from outside your network, use a tunnel: - -```bash -# Using ngrok -ngrok http 8765 - -# Using Cloudflare Tunnel -cloudflared tunnel --url http://localhost:8765 - -# Using Tailscale (recommended — encrypted, no port forwarding) -# Install Tailscale on both devices, then access via Tailscale IP -``` - ---- - -## Security - -- **Token authentication** — every WebSocket connection must authenticate with the correct token before sending messages -- **No data leaves your network** — the server runs locally, chat data stays on your machine -- **No HTTPS by default** — traffic is unencrypted on the LAN. Use a reverse proxy or tunnel for encryption -- **File uploads** require the auth token in the `Authorization` header -- **Media cleanup** — uploaded and generated files are automatically deleted after 24 hours - ---- - -## Environment Variables - -| Variable | Default | Description | -|----------|---------|-------------| -| `WEB_UI_ENABLED` | `false` | Enable the web gateway | -| `WEB_UI_PORT` | `8765` | HTTP server port | -| `WEB_UI_HOST` | `127.0.0.1` | Bind address (`0.0.0.0` = LAN, `127.0.0.1` = localhost) | -| `WEB_UI_TOKEN` | (auto) | Access token. Auto-generated if empty. | - ---- - -## Troubleshooting - -### "Server not found" on phone - -- Verify both devices are on the **same WiFi network** -- Check if macOS firewall is blocking (see Firewall section above) -- Try the IP address shown in console output, not `localhost` -- If using VPN, the console shows all available IPs — try each one - -### Port already in use - -Change the port in `.env`: - -```bash -WEB_UI_PORT=9000 -``` - -### Voice recording not working - -- Browser must support `MediaRecorder` API (Chrome, Firefox, Safari 14.5+) -- HTTPS is required for microphone access on non-localhost origins -- On localhost (`127.0.0.1`), HTTP works fine for microphone - -### Microphone not working on mobile - -Mobile browsers require **HTTPS** for microphone access (`navigator.mediaDevices` API). When accessing the Web UI over HTTP on a LAN IP (e.g. `http://192.168.1.x:8765`), the mic button will appear dimmed. - -**Android Chrome** — flag the LAN IP as secure: -1. Open `chrome://flags/#unsafely-treat-insecure-origin-as-secure` -2. Add your Web UI URL (e.g. `http://192.168.1.106:8765`) -3. Set to **Enabled** and relaunch Chrome - -**iOS Safari / Chrome** — no flag bypass available. Use one of these instead: - -1. **Self-signed HTTPS** with mkcert (recommended): - ```bash - brew install mkcert && mkcert -install - mkcert 192.168.1.106 - npx local-ssl-proxy --source 8443 --target 8765 \ - --cert 192.168.1.106.pem --key 192.168.1.106-key.pem - ``` - Then access `https://192.168.1.106:8443`. Trust the mkcert root CA on iOS: **Settings > General > About > Certificate Trust Settings**. - -2. **SSH tunnel from mobile** (if you have Termius or similar): - ```bash - ssh -L 8765:127.0.0.1:8765 user@your-mac-ip - ``` - Then access `http://localhost:8765` — localhost is exempt from the HTTPS requirement. - -:::tip -Text chat works on mobile over HTTP without any workaround — only the microphone feature requires HTTPS. -::: - -### CDN resources not loading - -The UI loads `marked.js` and `highlight.js` from CDN. If you're offline or behind a restrictive proxy, markdown rendering and code highlighting won't work but basic chat still functions.