From 9d1483c7e64765e2f1be511c83e415e2baee0529 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 16 Mar 2026 06:38:20 -0700 Subject: [PATCH] =?UTF-8?q?feat(browser):=20/browser=20connect=20=E2=80=94?= =?UTF-8?q?=20attach=20browser=20tools=20to=20live=20Chrome=20via=20CDP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add /browser slash command for connecting browser tools to the user's live Chrome instance via Chrome DevTools Protocol: /browser connect — connect to Chrome on localhost:9222 /browser connect ws://host:port — custom CDP endpoint /browser disconnect — revert to default (headless/Browserbase) /browser status — show current browser mode + connectivity When connected: - All browser tools (navigate, snapshot, click, etc.) control the user's real Chrome — logged-in sessions, cookies, open tabs - Platform-specific Chrome launch instructions are shown - Port connectivity is tested immediately - A context message is injected so the model knows it's controlling a live browser and should be mindful of user's open tabs Implementation: - BROWSER_CDP_URL env var drives the backend selection in browser_tool.py - New _create_cdp_session() creates sessions using the CDP override - _get_cdp_override() checked before local/Browserbase selection - Existing agent-browser --cdp flag handles the actual CDP connection Inspired by OpenClaw's browser profile system. --- cli.py | 136 ++++++++++++++++++++++++++++++ hermes_cli/commands.py | 1 + tests/hermes_cli/test_commands.py | 2 +- tools/browser_tool.py | 31 ++++++- 4 files changed, 168 insertions(+), 2 deletions(-) diff --git a/cli.py b/cli.py index aa888fd6a..8116f5fd0 100755 --- a/cli.py +++ b/cli.py @@ -1420,6 +1420,8 @@ class HermesCLI: return "Processing skills command..." if cmd_lower == "/reload-mcp": return "Reloading MCP servers..." + if cmd_lower.startswith("/browser"): + return "Configuring browser..." return "Processing command..." def _command_spinner_frame(self) -> str: @@ -3235,6 +3237,8 @@ class HermesCLI: elif cmd_lower == "/reload-mcp": with self._busy_command(self._slow_command_status(cmd_original)): self._reload_mcp() + elif cmd_lower.startswith("/browser"): + self._handle_browser_command(cmd_original) elif cmd_lower.startswith("/rollback"): self._handle_rollback_command(cmd_original) elif cmd_lower.startswith("/background"): @@ -3451,6 +3455,138 @@ class HermesCLI: self._background_tasks[task_id] = thread thread.start() + def _handle_browser_command(self, cmd: str): + """Handle /browser connect|disconnect|status — manage live Chrome CDP connection.""" + import platform as _plat + import subprocess as _sp + + parts = cmd.strip().split(None, 1) + sub = parts[1].lower().strip() if len(parts) > 1 else "status" + + _DEFAULT_CDP = "ws://localhost:9222" + current = os.environ.get("BROWSER_CDP_URL", "").strip() + + if sub.startswith("connect"): + # Optionally accept a custom CDP URL: /browser connect ws://host:port + connect_parts = cmd.strip().split(None, 2) # ["/browser", "connect", "ws://..."] + cdp_url = connect_parts[2].strip() if len(connect_parts) > 2 else _DEFAULT_CDP + + os.environ["BROWSER_CDP_URL"] = cdp_url + + # Clear any existing browser sessions so the next tool call uses the new backend + try: + from tools.browser_tool import cleanup_all_browsers + cleanup_all_browsers() + except Exception: + pass + + print() + print("🌐 Browser connected to live Chrome via CDP") + print(f" Endpoint: {cdp_url}") + print() + + # Platform-specific launch instructions + sys_name = _plat.system() + if sys_name == "Darwin": + chrome_cmd = '/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222' + elif sys_name == "Windows": + chrome_cmd = 'chrome.exe --remote-debugging-port=9222' + else: + chrome_cmd = "google-chrome --remote-debugging-port=9222" + + print(" If Chrome isn't running with remote debugging yet:") + print(f" $ {chrome_cmd}") + print() + + # Quick connectivity test + _port = 9222 + try: + _port = int(cdp_url.rsplit(":", 1)[-1].split("/")[0]) + except (ValueError, IndexError): + pass + try: + import socket + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(1) + s.connect(("127.0.0.1", _port)) + s.close() + print(f" ✓ Port {_port} is open — Chrome is reachable") + except (OSError, socket.timeout): + print(f" ⚠ Port {_port} is not open — launch Chrome with the command above first") + print() + + # Inject context message so the model knows + if hasattr(self, '_pending_input'): + self._pending_input.put( + "[System note: The user has connected the browser tools to their live Chrome browser " + "session via Chrome DevTools Protocol. You now have access to their real browser — " + "any pages they have open, their logged-in sessions, bookmarks, etc. " + "Use the browser tools (browser_navigate, browser_snapshot, browser_click, etc.) " + "to interact with their live browser. Be mindful that actions affect their real browser. " + "Ask before closing tabs or navigating away from pages they might be using.]" + ) + + elif sub == "disconnect": + if current: + os.environ.pop("BROWSER_CDP_URL", None) + try: + from tools.browser_tool import cleanup_all_browsers + cleanup_all_browsers() + except Exception: + pass + print() + print("🌐 Browser disconnected from live Chrome") + print(" Browser tools reverted to default mode (local headless or Browserbase)") + print() + + if hasattr(self, '_pending_input'): + self._pending_input.put( + "[System note: The user has disconnected the browser tools from their live Chrome. " + "Browser tools are back to default mode (headless local browser or Browserbase cloud).]" + ) + else: + print() + print("Browser is not connected to live Chrome (already using default mode)") + print() + + elif sub == "status": + print() + if current: + print(f"🌐 Browser: connected to live Chrome via CDP") + print(f" Endpoint: {current}") + + _port = 9222 + try: + _port = int(current.rsplit(":", 1)[-1].split("/")[0]) + except (ValueError, IndexError): + pass + try: + import socket + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(1) + s.connect(("127.0.0.1", _port)) + s.close() + print(f" Status: ✓ reachable") + except (OSError, Exception): + print(f" Status: ⚠ not reachable (Chrome may not be running)") + elif os.environ.get("BROWSERBASE_API_KEY"): + print("🌐 Browser: Browserbase (cloud)") + else: + print("🌐 Browser: local headless Chromium (agent-browser)") + print() + print(" /browser connect — connect to your live Chrome") + print(" /browser disconnect — revert to default") + print() + + else: + print() + print("Usage: /browser connect|disconnect|status") + print() + print(" connect Connect browser tools to your live Chrome session") + print(" disconnect Revert to default browser backend") + print(" status Show current browser mode") + print() + def _handle_skin_command(self, cmd: str): """Handle /skin [name] — show or change the display skin.""" try: diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 7e964bd4e..d81aea142 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -45,6 +45,7 @@ COMMANDS_BY_CATEGORY = { "/skills": "Search, install, inspect, or manage skills from online registries", "/cron": "Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove)", "/reload-mcp": "Reload MCP servers from config.yaml", + "/browser": "Connect browser tools to your live Chrome (usage: /browser connect|disconnect|status)", }, "Info": { "/help": "Show this help message", diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index 218059434..84e1694cb 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -12,7 +12,7 @@ EXPECTED_COMMANDS = { "/personality", "/clear", "/history", "/new", "/reset", "/retry", "/undo", "/save", "/config", "/cron", "/skills", "/platforms", "/verbose", "/reasoning", "/compress", "/title", "/usage", "/insights", "/paste", - "/reload-mcp", "/rollback", "/background", "/skin", "/voice", "/quit", + "/reload-mcp", "/rollback", "/background", "/skin", "/voice", "/browser", "/quit", } diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 88eba3884..e595e8105 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -98,6 +98,16 @@ def _get_extraction_model() -> Optional[str]: return os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None +def _get_cdp_override() -> str: + """Return a user-supplied CDP URL override, or empty string. + + When ``BROWSER_CDP_URL`` is set (e.g. via ``/browser connect``), we skip + both Browserbase and the local headless launcher and connect directly to + the supplied Chrome DevTools Protocol endpoint. + """ + return os.environ.get("BROWSER_CDP_URL", "").strip() + + def _is_local_mode() -> bool: """Return True when no Browserbase credentials are configured. @@ -105,6 +115,8 @@ def _is_local_mode() -> bool: ``agent-browser --session`` instead of connecting to a remote Browserbase session via ``--cdp``. """ + if _get_cdp_override(): + return False # CDP override takes priority return not (os.environ.get("BROWSERBASE_API_KEY") and os.environ.get("BROWSERBASE_PROJECT_ID")) @@ -608,6 +620,20 @@ def _create_local_session(task_id: str) -> Dict[str, str]: } +def _create_cdp_session(task_id: str, cdp_url: str) -> Dict[str, str]: + """Create a session that connects to a user-supplied CDP endpoint.""" + import uuid + session_name = f"cdp_{uuid.uuid4().hex[:10]}" + logger.info("Created CDP browser session %s → %s for task %s", + session_name, cdp_url, task_id) + return { + "session_name": session_name, + "bb_session_id": None, + "cdp_url": cdp_url, + "features": {"cdp_override": True}, + } + + def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: """ Get or create session info for the given task. @@ -638,7 +664,10 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: return _active_sessions[task_id] # Create session outside the lock (network call in cloud mode) - if _is_local_mode(): + cdp_override = _get_cdp_override() + if cdp_override: + session_info = _create_cdp_session(task_id, cdp_override) + elif _is_local_mode(): session_info = _create_local_session(task_id) else: session_info = _create_browserbase_session(task_id)