feat(browser): /browser connect — attach browser tools to live Chrome via CDP

Add /browser slash command for connecting browser tools to the user's
live Chrome instance via Chrome DevTools Protocol:

  /browser connect       — connect to Chrome on localhost:9222
  /browser connect ws://host:port  — custom CDP endpoint
  /browser disconnect    — revert to default (headless/Browserbase)
  /browser status        — show current browser mode + connectivity

When connected:
- All browser tools (navigate, snapshot, click, etc.) control the
  user's real Chrome — logged-in sessions, cookies, open tabs
- Platform-specific Chrome launch instructions are shown
- Port connectivity is tested immediately
- A context message is injected so the model knows it's controlling
  a live browser and should be mindful of user's open tabs

Implementation:
- BROWSER_CDP_URL env var drives the backend selection in browser_tool.py
- New _create_cdp_session() creates sessions using the CDP override
- _get_cdp_override() checked before local/Browserbase selection
- Existing agent-browser --cdp flag handles the actual CDP connection

Inspired by OpenClaw's browser profile system.
This commit is contained in:
teknium1
2026-03-16 06:38:20 -07:00
parent 9a423c3487
commit 9d1483c7e6
4 changed files with 168 additions and 2 deletions

136
cli.py
View File

@@ -1420,6 +1420,8 @@ class HermesCLI:
return "Processing skills command..."
if cmd_lower == "/reload-mcp":
return "Reloading MCP servers..."
if cmd_lower.startswith("/browser"):
return "Configuring browser..."
return "Processing command..."
def _command_spinner_frame(self) -> str:
@@ -3235,6 +3237,8 @@ class HermesCLI:
elif cmd_lower == "/reload-mcp":
with self._busy_command(self._slow_command_status(cmd_original)):
self._reload_mcp()
elif cmd_lower.startswith("/browser"):
self._handle_browser_command(cmd_original)
elif cmd_lower.startswith("/rollback"):
self._handle_rollback_command(cmd_original)
elif cmd_lower.startswith("/background"):
@@ -3451,6 +3455,138 @@ class HermesCLI:
self._background_tasks[task_id] = thread
thread.start()
def _handle_browser_command(self, cmd: str):
"""Handle /browser connect|disconnect|status — manage live Chrome CDP connection."""
import platform as _plat
import subprocess as _sp
parts = cmd.strip().split(None, 1)
sub = parts[1].lower().strip() if len(parts) > 1 else "status"
_DEFAULT_CDP = "ws://localhost:9222"
current = os.environ.get("BROWSER_CDP_URL", "").strip()
if sub.startswith("connect"):
# Optionally accept a custom CDP URL: /browser connect ws://host:port
connect_parts = cmd.strip().split(None, 2) # ["/browser", "connect", "ws://..."]
cdp_url = connect_parts[2].strip() if len(connect_parts) > 2 else _DEFAULT_CDP
os.environ["BROWSER_CDP_URL"] = cdp_url
# Clear any existing browser sessions so the next tool call uses the new backend
try:
from tools.browser_tool import cleanup_all_browsers
cleanup_all_browsers()
except Exception:
pass
print()
print("🌐 Browser connected to live Chrome via CDP")
print(f" Endpoint: {cdp_url}")
print()
# Platform-specific launch instructions
sys_name = _plat.system()
if sys_name == "Darwin":
chrome_cmd = '/Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222'
elif sys_name == "Windows":
chrome_cmd = 'chrome.exe --remote-debugging-port=9222'
else:
chrome_cmd = "google-chrome --remote-debugging-port=9222"
print(" If Chrome isn't running with remote debugging yet:")
print(f" $ {chrome_cmd}")
print()
# Quick connectivity test
_port = 9222
try:
_port = int(cdp_url.rsplit(":", 1)[-1].split("/")[0])
except (ValueError, IndexError):
pass
try:
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1)
s.connect(("127.0.0.1", _port))
s.close()
print(f" ✓ Port {_port} is open — Chrome is reachable")
except (OSError, socket.timeout):
print(f" ⚠ Port {_port} is not open — launch Chrome with the command above first")
print()
# Inject context message so the model knows
if hasattr(self, '_pending_input'):
self._pending_input.put(
"[System note: The user has connected the browser tools to their live Chrome browser "
"session via Chrome DevTools Protocol. You now have access to their real browser — "
"any pages they have open, their logged-in sessions, bookmarks, etc. "
"Use the browser tools (browser_navigate, browser_snapshot, browser_click, etc.) "
"to interact with their live browser. Be mindful that actions affect their real browser. "
"Ask before closing tabs or navigating away from pages they might be using.]"
)
elif sub == "disconnect":
if current:
os.environ.pop("BROWSER_CDP_URL", None)
try:
from tools.browser_tool import cleanup_all_browsers
cleanup_all_browsers()
except Exception:
pass
print()
print("🌐 Browser disconnected from live Chrome")
print(" Browser tools reverted to default mode (local headless or Browserbase)")
print()
if hasattr(self, '_pending_input'):
self._pending_input.put(
"[System note: The user has disconnected the browser tools from their live Chrome. "
"Browser tools are back to default mode (headless local browser or Browserbase cloud).]"
)
else:
print()
print("Browser is not connected to live Chrome (already using default mode)")
print()
elif sub == "status":
print()
if current:
print(f"🌐 Browser: connected to live Chrome via CDP")
print(f" Endpoint: {current}")
_port = 9222
try:
_port = int(current.rsplit(":", 1)[-1].split("/")[0])
except (ValueError, IndexError):
pass
try:
import socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1)
s.connect(("127.0.0.1", _port))
s.close()
print(f" Status: ✓ reachable")
except (OSError, Exception):
print(f" Status: ⚠ not reachable (Chrome may not be running)")
elif os.environ.get("BROWSERBASE_API_KEY"):
print("🌐 Browser: Browserbase (cloud)")
else:
print("🌐 Browser: local headless Chromium (agent-browser)")
print()
print(" /browser connect — connect to your live Chrome")
print(" /browser disconnect — revert to default")
print()
else:
print()
print("Usage: /browser connect|disconnect|status")
print()
print(" connect Connect browser tools to your live Chrome session")
print(" disconnect Revert to default browser backend")
print(" status Show current browser mode")
print()
def _handle_skin_command(self, cmd: str):
"""Handle /skin [name] — show or change the display skin."""
try:

View File

@@ -45,6 +45,7 @@ COMMANDS_BY_CATEGORY = {
"/skills": "Search, install, inspect, or manage skills from online registries",
"/cron": "Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove)",
"/reload-mcp": "Reload MCP servers from config.yaml",
"/browser": "Connect browser tools to your live Chrome (usage: /browser connect|disconnect|status)",
},
"Info": {
"/help": "Show this help message",

View File

@@ -12,7 +12,7 @@ EXPECTED_COMMANDS = {
"/personality", "/clear", "/history", "/new", "/reset", "/retry",
"/undo", "/save", "/config", "/cron", "/skills", "/platforms",
"/verbose", "/reasoning", "/compress", "/title", "/usage", "/insights", "/paste",
"/reload-mcp", "/rollback", "/background", "/skin", "/voice", "/quit",
"/reload-mcp", "/rollback", "/background", "/skin", "/voice", "/browser", "/quit",
}

View File

@@ -98,6 +98,16 @@ def _get_extraction_model() -> Optional[str]:
return os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None
def _get_cdp_override() -> str:
"""Return a user-supplied CDP URL override, or empty string.
When ``BROWSER_CDP_URL`` is set (e.g. via ``/browser connect``), we skip
both Browserbase and the local headless launcher and connect directly to
the supplied Chrome DevTools Protocol endpoint.
"""
return os.environ.get("BROWSER_CDP_URL", "").strip()
def _is_local_mode() -> bool:
"""Return True when no Browserbase credentials are configured.
@@ -105,6 +115,8 @@ def _is_local_mode() -> bool:
``agent-browser --session`` instead of connecting to a remote Browserbase
session via ``--cdp``.
"""
if _get_cdp_override():
return False # CDP override takes priority
return not (os.environ.get("BROWSERBASE_API_KEY") and os.environ.get("BROWSERBASE_PROJECT_ID"))
@@ -608,6 +620,20 @@ def _create_local_session(task_id: str) -> Dict[str, str]:
}
def _create_cdp_session(task_id: str, cdp_url: str) -> Dict[str, str]:
"""Create a session that connects to a user-supplied CDP endpoint."""
import uuid
session_name = f"cdp_{uuid.uuid4().hex[:10]}"
logger.info("Created CDP browser session %s%s for task %s",
session_name, cdp_url, task_id)
return {
"session_name": session_name,
"bb_session_id": None,
"cdp_url": cdp_url,
"features": {"cdp_override": True},
}
def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
"""
Get or create session info for the given task.
@@ -638,7 +664,10 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
return _active_sessions[task_id]
# Create session outside the lock (network call in cloud mode)
if _is_local_mode():
cdp_override = _get_cdp_override()
if cdp_override:
session_info = _create_cdp_session(task_id, cdp_override)
elif _is_local_mode():
session_info = _create_local_session(task_id)
else:
session_info = _create_browserbase_session(task_id)