"""Camofox browser backend — local anti-detection browser via REST API. Camofox-browser is a self-hosted Node.js server wrapping Camoufox (Firefox fork with C++ fingerprint spoofing). It exposes a REST API that maps 1:1 to our browser tool interface: accessibility snapshots with element refs, click/type/scroll by ref, screenshots, etc. When ``CAMOFOX_URL`` is set (e.g. ``http://localhost:9377``), the browser tools route through this module instead of the ``agent-browser`` CLI. Setup:: # Option 1: npm git clone https://github.com/jo-inc/camofox-browser && cd camofox-browser npm install && npm start # downloads Camoufox (~300MB) on first run # Option 2: Docker docker run -p 9377:9377 jo-inc/camofox-browser Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``. """ from __future__ import annotations import base64 import json import logging import os import threading import time import uuid from pathlib import Path from typing import Any, Dict, Optional import requests logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- _DEFAULT_TIMEOUT = 30 # seconds per HTTP request _SNAPSHOT_MAX_CHARS = 80_000 # camofox paginates at this limit def get_camofox_url() -> str: """Return the configured Camofox server URL, or empty string.""" return os.getenv("CAMOFOX_URL", "").rstrip("/") def is_camofox_mode() -> bool: """True when Camofox backend is configured.""" return bool(get_camofox_url()) def check_camofox_available() -> bool: """Verify the Camofox server is reachable.""" url = get_camofox_url() if not url: return False try: resp = requests.get(f"{url}/health", timeout=5) return resp.status_code == 200 except Exception: return False # --------------------------------------------------------------------------- # Session management # --------------------------------------------------------------------------- # Maps task_id -> {"user_id": str, "tab_id": str|None} _sessions: Dict[str, Dict[str, Any]] = {} _sessions_lock = threading.Lock() def _get_session(task_id: Optional[str]) -> Dict[str, Any]: """Get or create a camofox session for the given task.""" task_id = task_id or "default" with _sessions_lock: if task_id in _sessions: return _sessions[task_id] session = { "user_id": f"hermes_{uuid.uuid4().hex[:10]}", "tab_id": None, "session_key": f"task_{task_id[:16]}", } _sessions[task_id] = session return session def _ensure_tab(task_id: Optional[str], url: str = "about:blank") -> Dict[str, Any]: """Ensure a tab exists for the session, creating one if needed.""" session = _get_session(task_id) if session["tab_id"]: return session base = get_camofox_url() resp = requests.post( f"{base}/tabs", json={ "userId": session["user_id"], "sessionKey": session["session_key"], "url": url, }, timeout=_DEFAULT_TIMEOUT, ) resp.raise_for_status() data = resp.json() session["tab_id"] = data.get("tabId") return session def _drop_session(task_id: Optional[str]) -> Optional[Dict[str, Any]]: """Remove and return session info.""" task_id = task_id or "default" with _sessions_lock: return _sessions.pop(task_id, None) # --------------------------------------------------------------------------- # HTTP helpers # --------------------------------------------------------------------------- def _post(path: str, body: dict, timeout: int = _DEFAULT_TIMEOUT) -> dict: """POST JSON to camofox and return parsed response.""" url = f"{get_camofox_url()}{path}" resp = requests.post(url, json=body, timeout=timeout) resp.raise_for_status() return resp.json() def _get(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict: """GET from camofox and return parsed response.""" url = f"{get_camofox_url()}{path}" resp = requests.get(url, params=params, timeout=timeout) resp.raise_for_status() return resp.json() def _get_raw(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> requests.Response: """GET from camofox and return raw response (for binary data).""" url = f"{get_camofox_url()}{path}" resp = requests.get(url, params=params, timeout=timeout) resp.raise_for_status() return resp def _delete(path: str, body: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict: """DELETE to camofox and return parsed response.""" url = f"{get_camofox_url()}{path}" resp = requests.delete(url, json=body, timeout=timeout) resp.raise_for_status() return resp.json() # --------------------------------------------------------------------------- # Tool implementations # --------------------------------------------------------------------------- def camofox_navigate(url: str, task_id: Optional[str] = None) -> str: """Navigate to a URL via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: # Create tab with the target URL directly session = _ensure_tab(task_id, url) data = {"ok": True, "url": url} else: # Navigate existing tab data = _post( f"/tabs/{session['tab_id']}/navigate", {"userId": session["user_id"], "url": url}, timeout=60, ) return json.dumps({ "success": True, "url": data.get("url", url), "title": data.get("title", ""), }) except requests.HTTPError as e: return json.dumps({"success": False, "error": f"Navigation failed: {e}"}) except requests.ConnectionError: return json.dumps({ "success": False, "error": f"Cannot connect to Camofox at {get_camofox_url()}. " "Is the server running? Start with: npm start (in camofox-browser dir) " "or: docker run -p 9377:9377 jo-inc/camofox-browser", }) except Exception as e: return json.dumps({"success": False, "error": str(e)}) def camofox_snapshot(full: bool = False, task_id: Optional[str] = None, user_task: Optional[str] = None) -> str: """Get accessibility tree snapshot from Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) data = _get( f"/tabs/{session['tab_id']}/snapshot", params={"userId": session["user_id"]}, ) snapshot = data.get("snapshot", "") refs_count = data.get("refsCount", 0) # Apply same summarization logic as the main browser tool from tools.browser_tool import ( SNAPSHOT_SUMMARIZE_THRESHOLD, _extract_relevant_content, _truncate_snapshot, ) if len(snapshot) > SNAPSHOT_SUMMARIZE_THRESHOLD: if user_task: snapshot = _extract_relevant_content(snapshot, user_task) else: snapshot = _truncate_snapshot(snapshot) return json.dumps({ "success": True, "snapshot": snapshot, "element_count": refs_count, }) except Exception as e: return json.dumps({"success": False, "error": str(e)}) def camofox_click(ref: str, task_id: Optional[str] = None) -> str: """Click an element by ref via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) # Strip @ prefix if present (our tool convention) clean_ref = ref.lstrip("@") data = _post( f"/tabs/{session['tab_id']}/click", {"userId": session["user_id"], "ref": clean_ref}, ) return json.dumps({ "success": True, "clicked": clean_ref, "url": data.get("url", ""), }) except Exception as e: return json.dumps({"success": False, "error": str(e)}) def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str: """Type text into an element by ref via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) clean_ref = ref.lstrip("@") _post( f"/tabs/{session['tab_id']}/type", {"userId": session["user_id"], "ref": clean_ref, "text": text}, ) return json.dumps({ "success": True, "typed": text, "element": clean_ref, }) except Exception as e: return json.dumps({"success": False, "error": str(e)}) def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str: """Scroll the page via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) _post( f"/tabs/{session['tab_id']}/scroll", {"userId": session["user_id"], "direction": direction}, ) return json.dumps({"success": True, "scrolled": direction}) except Exception as e: return json.dumps({"success": False, "error": str(e)}) def camofox_back(task_id: Optional[str] = None) -> str: """Navigate back via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) data = _post( f"/tabs/{session['tab_id']}/back", {"userId": session["user_id"]}, ) return json.dumps({"success": True, "url": data.get("url", "")}) except Exception as e: return json.dumps({"success": False, "error": str(e)}) def camofox_press(key: str, task_id: Optional[str] = None) -> str: """Press a keyboard key via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) _post( f"/tabs/{session['tab_id']}/press", {"userId": session["user_id"], "key": key}, ) return json.dumps({"success": True, "pressed": key}) except Exception as e: return json.dumps({"success": False, "error": str(e)}) def camofox_close(task_id: Optional[str] = None) -> str: """Close the browser session via Camofox.""" try: session = _drop_session(task_id) if not session: return json.dumps({"success": True, "closed": True}) _delete( f"/sessions/{session['user_id']}", ) return json.dumps({"success": True, "closed": True}) except Exception as e: return json.dumps({"success": True, "closed": True, "warning": str(e)}) def camofox_get_images(task_id: Optional[str] = None) -> str: """Get images on the current page via Camofox. Extracts image information from the accessibility tree snapshot, since Camofox does not expose a dedicated /images endpoint. """ try: session = _get_session(task_id) if not session["tab_id"]: return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) import re data = _get( f"/tabs/{session['tab_id']}/snapshot", params={"userId": session["user_id"]}, ) snapshot = data.get("snapshot", "") # Parse img elements from the accessibility tree. # Format: img "alt text" or img "alt text" [eN] # URLs appear on /url: lines following img entries images = [] lines = snapshot.split("\n") for i, line in enumerate(lines): stripped = line.strip() if stripped.startswith("- img ") or stripped.startswith("img "): alt_match = re.search(r'img\s+"([^"]*)"', stripped) alt = alt_match.group(1) if alt_match else "" # Look for URL on the next line src = "" if i + 1 < len(lines): url_match = re.search(r'/url:\s*(\S+)', lines[i + 1].strip()) if url_match: src = url_match.group(1) if alt or src: images.append({"src": src, "alt": alt}) return json.dumps({ "success": True, "images": images, "count": len(images), }) except Exception as e: return json.dumps({"success": False, "error": str(e)}) def camofox_vision(question: str, annotate: bool = False, task_id: Optional[str] = None) -> str: """Take a screenshot and analyze it with vision AI via Camofox.""" try: session = _get_session(task_id) if not session["tab_id"]: return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."}) # Get screenshot as binary PNG resp = _get_raw( f"/tabs/{session['tab_id']}/screenshot", params={"userId": session["user_id"]}, ) # Save screenshot to cache from hermes_constants import get_hermes_home screenshots_dir = get_hermes_home() / "browser_screenshots" screenshots_dir.mkdir(parents=True, exist_ok=True) screenshot_path = str(screenshots_dir / f"browser_screenshot_{uuid.uuid4().hex[:8]}.png") with open(screenshot_path, "wb") as f: f.write(resp.content) # Encode for vision LLM img_b64 = base64.b64encode(resp.content).decode("utf-8") # Also get annotated snapshot if requested annotation_context = "" if annotate: try: snap_data = _get( f"/tabs/{session['tab_id']}/snapshot", params={"userId": session["user_id"]}, ) annotation_context = f"\n\nAccessibility tree (element refs for interaction):\n{snap_data.get('snapshot', '')[:3000]}" except Exception: pass # Send to vision LLM from agent.auxiliary_client import call_llm vision_prompt = ( f"Analyze this browser screenshot and answer: {question}" f"{annotation_context}" ) try: from hermes_cli.config import load_config _cfg = load_config() _vision_timeout = int(_cfg.get("auxiliary", {}).get("vision", {}).get("timeout", 120)) except Exception: _vision_timeout = 120 analysis = call_llm( messages=[{ "role": "user", "content": [ {"type": "text", "text": vision_prompt}, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{img_b64}", }, }, ], }], task="vision", timeout=_vision_timeout, ) return json.dumps({ "success": True, "analysis": analysis, "screenshot_path": screenshot_path, }) except Exception as e: return json.dumps({"success": False, "error": str(e)}) def camofox_console(clear: bool = False, task_id: Optional[str] = None) -> str: """Get console output — limited support in Camofox. Camofox does not expose browser console logs via its REST API. Returns an empty result with a note. """ return json.dumps({ "success": True, "console_messages": [], "js_errors": [], "total_messages": 0, "total_errors": 0, "note": "Console log capture is not available with the Camofox backend. " "Use browser_snapshot or browser_vision to inspect page state.", }) # --------------------------------------------------------------------------- # Cleanup # --------------------------------------------------------------------------- def cleanup_all_camofox_sessions() -> None: """Close all active camofox sessions.""" with _sessions_lock: sessions = list(_sessions.items()) for task_id, session in sessions: try: _delete(f"/sessions/{session['user_id']}") except Exception: pass with _sessions_lock: _sessions.clear()