LLM responses from browser snapshot extraction and vision analysis could echo back secrets that appeared on screen or in page content. Input redaction alone is insufficient — the LLM may reproduce secrets it read from screenshots (which cannot be text-redacted). Now redact outputs from: - _extract_relevant_content (auxiliary LLM response) - browser_vision (vision LLM response) - camofox_vision (vision LLM response)
572 lines
20 KiB
Python
572 lines
20 KiB
Python
"""Camofox browser backend — local anti-detection browser via REST API.
|
|
|
|
Camofox-browser is a self-hosted Node.js server wrapping Camoufox (Firefox
|
|
fork with C++ fingerprint spoofing). It exposes a REST API that maps 1:1
|
|
to our browser tool interface: accessibility snapshots with element refs,
|
|
click/type/scroll by ref, screenshots, etc.
|
|
|
|
When ``CAMOFOX_URL`` is set (e.g. ``http://localhost:9377``), the browser
|
|
tools route through this module instead of the ``agent-browser`` CLI.
|
|
|
|
Setup::
|
|
|
|
# Option 1: npm
|
|
git clone https://github.com/jo-inc/camofox-browser && cd camofox-browser
|
|
npm install && npm start # downloads Camoufox (~300MB) on first run
|
|
|
|
# Option 2: Docker
|
|
docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser
|
|
|
|
Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import json
|
|
import logging
|
|
import os
|
|
import threading
|
|
import time
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Any, Dict, Optional
|
|
|
|
import requests
|
|
|
|
from hermes_cli.config import load_config
|
|
from tools.browser_camofox_state import get_camofox_identity
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Configuration
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_DEFAULT_TIMEOUT = 30 # seconds per HTTP request
|
|
_SNAPSHOT_MAX_CHARS = 80_000 # camofox paginates at this limit
|
|
_vnc_url: Optional[str] = None # cached from /health response
|
|
_vnc_url_checked = False # only probe once per process
|
|
|
|
|
|
def get_camofox_url() -> str:
|
|
"""Return the configured Camofox server URL, or empty string."""
|
|
return os.getenv("CAMOFOX_URL", "").rstrip("/")
|
|
|
|
|
|
def is_camofox_mode() -> bool:
|
|
"""True when Camofox backend is configured."""
|
|
return bool(get_camofox_url())
|
|
|
|
|
|
def check_camofox_available() -> bool:
|
|
"""Verify the Camofox server is reachable."""
|
|
global _vnc_url, _vnc_url_checked
|
|
url = get_camofox_url()
|
|
if not url:
|
|
return False
|
|
try:
|
|
resp = requests.get(f"{url}/health", timeout=5)
|
|
if resp.status_code == 200 and not _vnc_url_checked:
|
|
try:
|
|
data = resp.json()
|
|
vnc_port = data.get("vncPort")
|
|
if isinstance(vnc_port, int) and 1 <= vnc_port <= 65535:
|
|
from urllib.parse import urlparse
|
|
parsed = urlparse(url)
|
|
host = parsed.hostname or "localhost"
|
|
_vnc_url = f"http://{host}:{vnc_port}"
|
|
except (ValueError, KeyError):
|
|
pass
|
|
_vnc_url_checked = True
|
|
return resp.status_code == 200
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def get_vnc_url() -> Optional[str]:
|
|
"""Return the VNC URL if the Camofox server exposes one, or None."""
|
|
if not _vnc_url_checked:
|
|
check_camofox_available()
|
|
return _vnc_url
|
|
|
|
|
|
def _managed_persistence_enabled() -> bool:
|
|
"""Return whether Hermes-managed persistence is enabled for Camofox.
|
|
|
|
When enabled, sessions use a stable profile-scoped userId so the
|
|
Camofox server can map it to a persistent browser profile directory.
|
|
When disabled (default), each session gets a random userId (ephemeral).
|
|
|
|
Controlled by ``browser.camofox.managed_persistence`` in config.yaml.
|
|
"""
|
|
try:
|
|
camofox_cfg = load_config().get("browser", {}).get("camofox", {})
|
|
except Exception:
|
|
return False
|
|
return bool(camofox_cfg.get("managed_persistence"))
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Session management
|
|
# ---------------------------------------------------------------------------
|
|
# Maps task_id -> {"user_id": str, "tab_id": str|None}
|
|
_sessions: Dict[str, Dict[str, Any]] = {}
|
|
_sessions_lock = threading.Lock()
|
|
|
|
|
|
def _get_session(task_id: Optional[str]) -> Dict[str, Any]:
|
|
"""Get or create a camofox session for the given task.
|
|
|
|
When managed persistence is enabled, uses a deterministic userId
|
|
derived from the Hermes profile so the Camofox server can map it
|
|
to the same persistent browser profile across restarts.
|
|
"""
|
|
task_id = task_id or "default"
|
|
with _sessions_lock:
|
|
if task_id in _sessions:
|
|
return _sessions[task_id]
|
|
if _managed_persistence_enabled():
|
|
identity = get_camofox_identity(task_id)
|
|
session = {
|
|
"user_id": identity["user_id"],
|
|
"tab_id": None,
|
|
"session_key": identity["session_key"],
|
|
"managed": True,
|
|
}
|
|
else:
|
|
session = {
|
|
"user_id": f"hermes_{uuid.uuid4().hex[:10]}",
|
|
"tab_id": None,
|
|
"session_key": f"task_{task_id[:16]}",
|
|
"managed": False,
|
|
}
|
|
_sessions[task_id] = session
|
|
return session
|
|
|
|
|
|
def _ensure_tab(task_id: Optional[str], url: str = "about:blank") -> Dict[str, Any]:
|
|
"""Ensure a tab exists for the session, creating one if needed."""
|
|
session = _get_session(task_id)
|
|
if session["tab_id"]:
|
|
return session
|
|
base = get_camofox_url()
|
|
resp = requests.post(
|
|
f"{base}/tabs",
|
|
json={
|
|
"userId": session["user_id"],
|
|
"sessionKey": session["session_key"],
|
|
"url": url,
|
|
},
|
|
timeout=_DEFAULT_TIMEOUT,
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
session["tab_id"] = data.get("tabId")
|
|
return session
|
|
|
|
|
|
def _drop_session(task_id: Optional[str]) -> Optional[Dict[str, Any]]:
|
|
"""Remove and return session info."""
|
|
task_id = task_id or "default"
|
|
with _sessions_lock:
|
|
return _sessions.pop(task_id, None)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# HTTP helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _post(path: str, body: dict, timeout: int = _DEFAULT_TIMEOUT) -> dict:
|
|
"""POST JSON to camofox and return parsed response."""
|
|
url = f"{get_camofox_url()}{path}"
|
|
resp = requests.post(url, json=body, timeout=timeout)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
|
|
def _get(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict:
|
|
"""GET from camofox and return parsed response."""
|
|
url = f"{get_camofox_url()}{path}"
|
|
resp = requests.get(url, params=params, timeout=timeout)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
|
|
def _get_raw(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> requests.Response:
|
|
"""GET from camofox and return raw response (for binary data)."""
|
|
url = f"{get_camofox_url()}{path}"
|
|
resp = requests.get(url, params=params, timeout=timeout)
|
|
resp.raise_for_status()
|
|
return resp
|
|
|
|
|
|
def _delete(path: str, body: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict:
|
|
"""DELETE to camofox and return parsed response."""
|
|
url = f"{get_camofox_url()}{path}"
|
|
resp = requests.delete(url, json=body, timeout=timeout)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tool implementations
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
|
|
"""Navigate to a URL via Camofox."""
|
|
try:
|
|
session = _get_session(task_id)
|
|
if not session["tab_id"]:
|
|
# Create tab with the target URL directly
|
|
session = _ensure_tab(task_id, url)
|
|
data = {"ok": True, "url": url}
|
|
else:
|
|
# Navigate existing tab
|
|
data = _post(
|
|
f"/tabs/{session['tab_id']}/navigate",
|
|
{"userId": session["user_id"], "url": url},
|
|
timeout=60,
|
|
)
|
|
result = {
|
|
"success": True,
|
|
"url": data.get("url", url),
|
|
"title": data.get("title", ""),
|
|
}
|
|
vnc = get_vnc_url()
|
|
if vnc:
|
|
result["vnc_url"] = vnc
|
|
result["vnc_hint"] = (
|
|
"Browser is visible via VNC. "
|
|
"Share this link with the user so they can watch the browser live."
|
|
)
|
|
return json.dumps(result)
|
|
except requests.HTTPError as e:
|
|
return json.dumps({"success": False, "error": f"Navigation failed: {e}"})
|
|
except requests.ConnectionError:
|
|
return json.dumps({
|
|
"success": False,
|
|
"error": f"Cannot connect to Camofox at {get_camofox_url()}. "
|
|
"Is the server running? Start with: npm start (in camofox-browser dir) "
|
|
"or: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser",
|
|
})
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": str(e)})
|
|
|
|
|
|
def camofox_snapshot(full: bool = False, task_id: Optional[str] = None,
|
|
user_task: Optional[str] = None) -> str:
|
|
"""Get accessibility tree snapshot from Camofox."""
|
|
try:
|
|
session = _get_session(task_id)
|
|
if not session["tab_id"]:
|
|
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
|
|
|
|
data = _get(
|
|
f"/tabs/{session['tab_id']}/snapshot",
|
|
params={"userId": session["user_id"]},
|
|
)
|
|
|
|
snapshot = data.get("snapshot", "")
|
|
refs_count = data.get("refsCount", 0)
|
|
|
|
# Apply same summarization logic as the main browser tool
|
|
from tools.browser_tool import (
|
|
SNAPSHOT_SUMMARIZE_THRESHOLD,
|
|
_extract_relevant_content,
|
|
_truncate_snapshot,
|
|
)
|
|
|
|
if len(snapshot) > SNAPSHOT_SUMMARIZE_THRESHOLD:
|
|
if user_task:
|
|
snapshot = _extract_relevant_content(snapshot, user_task)
|
|
else:
|
|
snapshot = _truncate_snapshot(snapshot)
|
|
|
|
return json.dumps({
|
|
"success": True,
|
|
"snapshot": snapshot,
|
|
"element_count": refs_count,
|
|
})
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": str(e)})
|
|
|
|
|
|
def camofox_click(ref: str, task_id: Optional[str] = None) -> str:
|
|
"""Click an element by ref via Camofox."""
|
|
try:
|
|
session = _get_session(task_id)
|
|
if not session["tab_id"]:
|
|
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
|
|
|
|
# Strip @ prefix if present (our tool convention)
|
|
clean_ref = ref.lstrip("@")
|
|
|
|
data = _post(
|
|
f"/tabs/{session['tab_id']}/click",
|
|
{"userId": session["user_id"], "ref": clean_ref},
|
|
)
|
|
return json.dumps({
|
|
"success": True,
|
|
"clicked": clean_ref,
|
|
"url": data.get("url", ""),
|
|
})
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": str(e)})
|
|
|
|
|
|
def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
|
|
"""Type text into an element by ref via Camofox."""
|
|
try:
|
|
session = _get_session(task_id)
|
|
if not session["tab_id"]:
|
|
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
|
|
|
|
clean_ref = ref.lstrip("@")
|
|
|
|
_post(
|
|
f"/tabs/{session['tab_id']}/type",
|
|
{"userId": session["user_id"], "ref": clean_ref, "text": text},
|
|
)
|
|
return json.dumps({
|
|
"success": True,
|
|
"typed": text,
|
|
"element": clean_ref,
|
|
})
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": str(e)})
|
|
|
|
|
|
def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str:
|
|
"""Scroll the page via Camofox."""
|
|
try:
|
|
session = _get_session(task_id)
|
|
if not session["tab_id"]:
|
|
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
|
|
|
|
_post(
|
|
f"/tabs/{session['tab_id']}/scroll",
|
|
{"userId": session["user_id"], "direction": direction},
|
|
)
|
|
return json.dumps({"success": True, "scrolled": direction})
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": str(e)})
|
|
|
|
|
|
def camofox_back(task_id: Optional[str] = None) -> str:
|
|
"""Navigate back via Camofox."""
|
|
try:
|
|
session = _get_session(task_id)
|
|
if not session["tab_id"]:
|
|
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
|
|
|
|
data = _post(
|
|
f"/tabs/{session['tab_id']}/back",
|
|
{"userId": session["user_id"]},
|
|
)
|
|
return json.dumps({"success": True, "url": data.get("url", "")})
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": str(e)})
|
|
|
|
|
|
def camofox_press(key: str, task_id: Optional[str] = None) -> str:
|
|
"""Press a keyboard key via Camofox."""
|
|
try:
|
|
session = _get_session(task_id)
|
|
if not session["tab_id"]:
|
|
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
|
|
|
|
_post(
|
|
f"/tabs/{session['tab_id']}/press",
|
|
{"userId": session["user_id"], "key": key},
|
|
)
|
|
return json.dumps({"success": True, "pressed": key})
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": str(e)})
|
|
|
|
|
|
def camofox_close(task_id: Optional[str] = None) -> str:
|
|
"""Close the browser session via Camofox."""
|
|
try:
|
|
session = _drop_session(task_id)
|
|
if not session:
|
|
return json.dumps({"success": True, "closed": True})
|
|
|
|
_delete(
|
|
f"/sessions/{session['user_id']}",
|
|
)
|
|
return json.dumps({"success": True, "closed": True})
|
|
except Exception as e:
|
|
return json.dumps({"success": True, "closed": True, "warning": str(e)})
|
|
|
|
|
|
def camofox_get_images(task_id: Optional[str] = None) -> str:
|
|
"""Get images on the current page via Camofox.
|
|
|
|
Extracts image information from the accessibility tree snapshot,
|
|
since Camofox does not expose a dedicated /images endpoint.
|
|
"""
|
|
try:
|
|
session = _get_session(task_id)
|
|
if not session["tab_id"]:
|
|
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
|
|
|
|
import re
|
|
|
|
data = _get(
|
|
f"/tabs/{session['tab_id']}/snapshot",
|
|
params={"userId": session["user_id"]},
|
|
)
|
|
snapshot = data.get("snapshot", "")
|
|
|
|
# Parse img elements from the accessibility tree.
|
|
# Format: img "alt text" or img "alt text" [eN]
|
|
# URLs appear on /url: lines following img entries
|
|
images = []
|
|
lines = snapshot.split("\n")
|
|
for i, line in enumerate(lines):
|
|
stripped = line.strip()
|
|
if stripped.startswith("- img ") or stripped.startswith("img "):
|
|
alt_match = re.search(r'img\s+"([^"]*)"', stripped)
|
|
alt = alt_match.group(1) if alt_match else ""
|
|
# Look for URL on the next line
|
|
src = ""
|
|
if i + 1 < len(lines):
|
|
url_match = re.search(r'/url:\s*(\S+)', lines[i + 1].strip())
|
|
if url_match:
|
|
src = url_match.group(1)
|
|
if alt or src:
|
|
images.append({"src": src, "alt": alt})
|
|
|
|
return json.dumps({
|
|
"success": True,
|
|
"images": images,
|
|
"count": len(images),
|
|
})
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": str(e)})
|
|
|
|
|
|
def camofox_vision(question: str, annotate: bool = False,
|
|
task_id: Optional[str] = None) -> str:
|
|
"""Take a screenshot and analyze it with vision AI via Camofox."""
|
|
try:
|
|
session = _get_session(task_id)
|
|
if not session["tab_id"]:
|
|
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
|
|
|
|
# Get screenshot as binary PNG
|
|
resp = _get_raw(
|
|
f"/tabs/{session['tab_id']}/screenshot",
|
|
params={"userId": session["user_id"]},
|
|
)
|
|
|
|
# Save screenshot to cache
|
|
from hermes_constants import get_hermes_home
|
|
screenshots_dir = get_hermes_home() / "browser_screenshots"
|
|
screenshots_dir.mkdir(parents=True, exist_ok=True)
|
|
screenshot_path = str(screenshots_dir / f"browser_screenshot_{uuid.uuid4().hex[:8]}.png")
|
|
|
|
with open(screenshot_path, "wb") as f:
|
|
f.write(resp.content)
|
|
|
|
# Encode for vision LLM
|
|
img_b64 = base64.b64encode(resp.content).decode("utf-8")
|
|
|
|
# Also get annotated snapshot if requested
|
|
annotation_context = ""
|
|
if annotate:
|
|
try:
|
|
snap_data = _get(
|
|
f"/tabs/{session['tab_id']}/snapshot",
|
|
params={"userId": session["user_id"]},
|
|
)
|
|
annotation_context = f"\n\nAccessibility tree (element refs for interaction):\n{snap_data.get('snapshot', '')[:3000]}"
|
|
except Exception:
|
|
pass
|
|
|
|
# Redact secrets from annotation context before sending to vision LLM.
|
|
# The screenshot image itself cannot be redacted, but at least the
|
|
# text-based accessibility tree snippet won't leak secret values.
|
|
from agent.redact import redact_sensitive_text
|
|
annotation_context = redact_sensitive_text(annotation_context)
|
|
|
|
# Send to vision LLM
|
|
from agent.auxiliary_client import call_llm
|
|
|
|
vision_prompt = (
|
|
f"Analyze this browser screenshot and answer: {question}"
|
|
f"{annotation_context}"
|
|
)
|
|
|
|
try:
|
|
from hermes_cli.config import load_config
|
|
_cfg = load_config()
|
|
_vision_timeout = int(_cfg.get("auxiliary", {}).get("vision", {}).get("timeout", 120))
|
|
except Exception:
|
|
_vision_timeout = 120
|
|
|
|
response = call_llm(
|
|
messages=[{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": vision_prompt},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:image/png;base64,{img_b64}",
|
|
},
|
|
},
|
|
],
|
|
}],
|
|
task="vision",
|
|
timeout=_vision_timeout,
|
|
)
|
|
analysis = (response.choices[0].message.content or "").strip() if response.choices else ""
|
|
|
|
# Redact secrets the vision LLM may have read from the screenshot.
|
|
from agent.redact import redact_sensitive_text
|
|
analysis = redact_sensitive_text(analysis)
|
|
|
|
return json.dumps({
|
|
"success": True,
|
|
"analysis": analysis,
|
|
"screenshot_path": screenshot_path,
|
|
})
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": str(e)})
|
|
|
|
|
|
def camofox_console(clear: bool = False, task_id: Optional[str] = None) -> str:
|
|
"""Get console output — limited support in Camofox.
|
|
|
|
Camofox does not expose browser console logs via its REST API.
|
|
Returns an empty result with a note.
|
|
"""
|
|
return json.dumps({
|
|
"success": True,
|
|
"console_messages": [],
|
|
"js_errors": [],
|
|
"total_messages": 0,
|
|
"total_errors": 0,
|
|
"note": "Console log capture is not available with the Camofox backend. "
|
|
"Use browser_snapshot or browser_vision to inspect page state.",
|
|
})
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Cleanup
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def cleanup_all_camofox_sessions() -> None:
|
|
"""Close all active camofox sessions."""
|
|
with _sessions_lock:
|
|
sessions = list(_sessions.items())
|
|
for task_id, session in sessions:
|
|
try:
|
|
_delete(f"/sessions/{session['user_id']}")
|
|
except Exception:
|
|
pass
|
|
with _sessions_lock:
|
|
_sessions.clear()
|