feat(browser): add Camofox local anti-detection browser backend (#4008)

Camofox-browser is a self-hosted Node.js server wrapping Camoufox
(Firefox fork with C++ fingerprint spoofing). When CAMOFOX_URL is set,
all 11 browser tools route through the Camofox REST API instead of
the agent-browser CLI.

Maps 1:1 to the existing browser tool interface:
- Navigate, snapshot, click, type, scroll, back, press, close
- Get images, vision (screenshot + LLM analysis)
- Console (returns empty with note — camofox limitation)

Setup: npm start in camofox-browser dir, or docker run -p 9377:9377
Then: CAMOFOX_URL=http://localhost:9377 in ~/.hermes/.env

Advantages over Browserbase (cloud):
- Free (no per-session API costs)
- Local (zero network latency for browser ops)
- Anti-detection at C++ level (bypasses Cloudflare/Google bot detection)
- Works offline, Docker-ready

Files:
- tools/browser_camofox.py: Full REST backend (~400 lines)
- tools/browser_tool.py: Routing at each tool function
- hermes_cli/config.py: CAMOFOX_URL env var entry
- tests/tools/test_browser_camofox.py: 20 tests
This commit is contained in:
Teknium
2026-03-30 13:18:42 -07:00
committed by GitHub
parent 7dac75f2ae
commit 950f69475f
7 changed files with 889 additions and 3 deletions

View File

@@ -706,6 +706,14 @@ OPTIONAL_ENV_VARS = {
"password": True,
"category": "tool",
},
"CAMOFOX_URL": {
"description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)",
"prompt": "Camofox server URL",
"url": "https://github.com/jo-inc/camofox-browser",
"tools": ["browser_navigate", "browser_click"],
"password": False,
"category": "tool",
},
"FAL_KEY": {
"description": "FAL API key for image generation",
"prompt": "FAL API key",

View File

@@ -601,13 +601,15 @@ def _print_setup_summary(config: dict, hermes_home):
Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser"
).exists()
)
if get_env_value("BROWSERBASE_API_KEY"):
if get_env_value("CAMOFOX_URL"):
tool_status.append(("Browser Automation (Camofox)", True, None))
elif get_env_value("BROWSERBASE_API_KEY"):
tool_status.append(("Browser Automation (Browserbase)", True, None))
elif _ab_found:
tool_status.append(("Browser Automation (local)", True, None))
else:
tool_status.append(
("Browser Automation", False, "npm install -g agent-browser")
("Browser Automation", False, "npm install -g agent-browser or set CAMOFOX_URL")
)
# FAL (image generation)

View File

@@ -273,6 +273,16 @@ TOOL_CATEGORIES = {
"browser_provider": "browser-use",
"post_setup": "browserbase",
},
{
"name": "Camofox",
"tag": "Local anti-detection browser (Firefox/Camoufox)",
"env_vars": [
{"key": "CAMOFOX_URL", "prompt": "Camofox server URL", "default": "http://localhost:9377",
"url": "https://github.com/jo-inc/camofox-browser"},
],
"browser_provider": "camofox",
"post_setup": "camofox",
},
],
},
"homeassistant": {
@@ -337,6 +347,28 @@ def _run_post_setup(post_setup_key: str):
elif not node_modules.exists():
_print_warning(" Node.js not found - browser tools require: npm install (in hermes-agent directory)")
elif post_setup_key == "camofox":
camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camoufox-browser"
if not camofox_dir.exists() and shutil.which("npm"):
_print_info(" Installing Camofox browser server...")
import subprocess
result = subprocess.run(
["npm", "install", "--silent"],
capture_output=True, text=True, cwd=str(PROJECT_ROOT)
)
if result.returncode == 0:
_print_success(" Camofox installed")
else:
_print_warning(" npm install failed - run manually: npm install")
if camofox_dir.exists():
_print_info(" Start the Camofox server:")
_print_info(" npx @askjo/camoufox-browser")
_print_info(" First run downloads the Camoufox engine (~300MB)")
_print_info(" Or use Docker: docker run -p 9377:9377 jo-inc/camofox-browser")
elif not shutil.which("npm"):
_print_warning(" Node.js not found. Install Camofox via Docker:")
_print_info(" docker run -p 9377:9377 jo-inc/camofox-browser")
elif post_setup_key == "rl_training":
try:
__import__("tinker_atropos")

View File

@@ -16,7 +16,8 @@
},
"homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
"dependencies": {
"agent-browser": "^0.13.0"
"agent-browser": "^0.13.0",
"@askjo/camoufox-browser": "^1.0.0"
},
"engines": {
"node": ">=18.0.0"

View File

@@ -0,0 +1,290 @@
"""Tests for the Camofox browser backend."""
import json
import os
from unittest.mock import MagicMock, patch
import pytest
from tools.browser_camofox import (
camofox_back,
camofox_click,
camofox_close,
camofox_console,
camofox_get_images,
camofox_navigate,
camofox_press,
camofox_scroll,
camofox_snapshot,
camofox_type,
camofox_vision,
check_camofox_available,
cleanup_all_camofox_sessions,
is_camofox_mode,
)
# ---------------------------------------------------------------------------
# Configuration detection
# ---------------------------------------------------------------------------
class TestCamofoxMode:
def test_disabled_by_default(self, monkeypatch):
monkeypatch.delenv("CAMOFOX_URL", raising=False)
assert is_camofox_mode() is False
def test_enabled_when_url_set(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
assert is_camofox_mode() is True
def test_health_check_unreachable(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:19999")
assert check_camofox_available() is False
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _mock_response(status=200, json_data=None):
resp = MagicMock()
resp.status_code = status
resp.json.return_value = json_data or {}
resp.content = b"\x89PNG\r\n\x1a\nfake"
resp.raise_for_status = MagicMock()
return resp
# ---------------------------------------------------------------------------
# Navigate
# ---------------------------------------------------------------------------
class TestCamofoxNavigate:
@patch("tools.browser_camofox.requests.post")
def test_creates_tab_on_first_navigate(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab1", "url": "https://example.com"})
result = json.loads(camofox_navigate("https://example.com", task_id="t1"))
assert result["success"] is True
assert result["url"] == "https://example.com"
@patch("tools.browser_camofox.requests.post")
def test_navigates_existing_tab(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
# First call creates tab
mock_post.return_value = _mock_response(json_data={"tabId": "tab2", "url": "https://a.com"})
camofox_navigate("https://a.com", task_id="t2")
# Second call navigates
mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://b.com"})
result = json.loads(camofox_navigate("https://b.com", task_id="t2"))
assert result["success"] is True
assert result["url"] == "https://b.com"
def test_connection_error_returns_helpful_message(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:19999")
result = json.loads(camofox_navigate("https://example.com", task_id="t_err"))
assert result["success"] is False
assert "Cannot connect" in result["error"]
# ---------------------------------------------------------------------------
# Snapshot
# ---------------------------------------------------------------------------
class TestCamofoxSnapshot:
def test_no_session_returns_error(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
result = json.loads(camofox_snapshot(task_id="no_such_task"))
assert result["success"] is False
assert "browser_navigate" in result["error"]
@patch("tools.browser_camofox.requests.post")
@patch("tools.browser_camofox.requests.get")
def test_returns_snapshot(self, mock_get, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
# Create session
mock_post.return_value = _mock_response(json_data={"tabId": "tab3", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t3")
# Return snapshot
mock_get.return_value = _mock_response(json_data={
"snapshot": "- heading \"Test\" [e1]\n- button \"Submit\" [e2]",
"refsCount": 2,
})
result = json.loads(camofox_snapshot(task_id="t3"))
assert result["success"] is True
assert "[e1]" in result["snapshot"]
assert result["element_count"] == 2
# ---------------------------------------------------------------------------
# Click / Type / Scroll / Back / Press
# ---------------------------------------------------------------------------
class TestCamofoxInteractions:
@patch("tools.browser_camofox.requests.post")
def test_click(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab4", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t4")
mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://x.com"})
result = json.loads(camofox_click("@e5", task_id="t4"))
assert result["success"] is True
assert result["clicked"] == "e5"
@patch("tools.browser_camofox.requests.post")
def test_type(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab5", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t5")
mock_post.return_value = _mock_response(json_data={"ok": True})
result = json.loads(camofox_type("@e3", "hello world", task_id="t5"))
assert result["success"] is True
assert result["typed"] == "hello world"
@patch("tools.browser_camofox.requests.post")
def test_scroll(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab6", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t6")
mock_post.return_value = _mock_response(json_data={"ok": True})
result = json.loads(camofox_scroll("down", task_id="t6"))
assert result["success"] is True
assert result["scrolled"] == "down"
@patch("tools.browser_camofox.requests.post")
def test_back(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab7", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t7")
mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://prev.com"})
result = json.loads(camofox_back(task_id="t7"))
assert result["success"] is True
@patch("tools.browser_camofox.requests.post")
def test_press(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab8", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t8")
mock_post.return_value = _mock_response(json_data={"ok": True})
result = json.loads(camofox_press("Enter", task_id="t8"))
assert result["success"] is True
assert result["pressed"] == "Enter"
# ---------------------------------------------------------------------------
# Close
# ---------------------------------------------------------------------------
class TestCamofoxClose:
@patch("tools.browser_camofox.requests.delete")
@patch("tools.browser_camofox.requests.post")
def test_close_session(self, mock_post, mock_delete, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab9", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t9")
mock_delete.return_value = _mock_response(json_data={"ok": True})
result = json.loads(camofox_close(task_id="t9"))
assert result["success"] is True
assert result["closed"] is True
def test_close_nonexistent_session(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
result = json.loads(camofox_close(task_id="nonexistent"))
assert result["success"] is True
# ---------------------------------------------------------------------------
# Console (limited support)
# ---------------------------------------------------------------------------
class TestCamofoxConsole:
def test_console_returns_empty_with_note(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
result = json.loads(camofox_console(task_id="t_console"))
assert result["success"] is True
assert result["total_messages"] == 0
assert "not available" in result["note"]
# ---------------------------------------------------------------------------
# Images
# ---------------------------------------------------------------------------
class TestCamofoxGetImages:
@patch("tools.browser_camofox.requests.post")
@patch("tools.browser_camofox.requests.get")
def test_get_images(self, mock_get, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab10", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t10")
mock_get.return_value = _mock_response(json_data={
"images": [{"src": "https://x.com/img.png", "alt": "Logo"}],
})
result = json.loads(camofox_get_images(task_id="t10"))
assert result["success"] is True
assert result["count"] == 1
assert result["images"][0]["src"] == "https://x.com/img.png"
# ---------------------------------------------------------------------------
# Routing integration — verify browser_tool routes to camofox
# ---------------------------------------------------------------------------
class TestBrowserToolRouting:
"""Verify that browser_tool.py delegates to camofox when CAMOFOX_URL is set."""
@patch("tools.browser_camofox.requests.post")
def test_browser_navigate_routes_to_camofox(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab_rt", "url": "https://example.com"})
from tools.browser_tool import browser_navigate
# Bypass SSRF check for test URL
with patch("tools.browser_tool._is_safe_url", return_value=True):
result = json.loads(browser_navigate("https://example.com", task_id="t_route"))
assert result["success"] is True
def test_check_requirements_passes_with_camofox(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
from tools.browser_tool import check_browser_requirements
assert check_browser_requirements() is True
# ---------------------------------------------------------------------------
# Cleanup helper
# ---------------------------------------------------------------------------
class TestCamofoxCleanup:
@patch("tools.browser_camofox.requests.post")
@patch("tools.browser_camofox.requests.delete")
def test_cleanup_all(self, mock_delete, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab_c", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t_cleanup")
mock_delete.return_value = _mock_response(json_data={"ok": True})
cleanup_all_camofox_sessions()
# Session should be gone
result = json.loads(camofox_snapshot(task_id="t_cleanup"))
assert result["success"] is False

496
tools/browser_camofox.py Normal file
View File

@@ -0,0 +1,496 @@
"""Camofox browser backend — local anti-detection browser via REST API.
Camofox-browser is a self-hosted Node.js server wrapping Camoufox (Firefox
fork with C++ fingerprint spoofing). It exposes a REST API that maps 1:1
to our browser tool interface: accessibility snapshots with element refs,
click/type/scroll by ref, screenshots, etc.
When ``CAMOFOX_URL`` is set (e.g. ``http://localhost:9377``), the browser
tools route through this module instead of the ``agent-browser`` CLI.
Setup::
# Option 1: npm
git clone https://github.com/jo-inc/camofox-browser && cd camofox-browser
npm install && npm start # downloads Camoufox (~300MB) on first run
# Option 2: Docker
docker run -p 9377:9377 jo-inc/camofox-browser
Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``.
"""
from __future__ import annotations
import base64
import json
import logging
import os
import threading
import time
import uuid
from pathlib import Path
from typing import Any, Dict, Optional
import requests
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
_DEFAULT_TIMEOUT = 30 # seconds per HTTP request
_SNAPSHOT_MAX_CHARS = 80_000 # camofox paginates at this limit
def get_camofox_url() -> str:
"""Return the configured Camofox server URL, or empty string."""
return os.getenv("CAMOFOX_URL", "").rstrip("/")
def is_camofox_mode() -> bool:
"""True when Camofox backend is configured."""
return bool(get_camofox_url())
def check_camofox_available() -> bool:
"""Verify the Camofox server is reachable."""
url = get_camofox_url()
if not url:
return False
try:
resp = requests.get(f"{url}/health", timeout=5)
return resp.status_code == 200
except Exception:
return False
# ---------------------------------------------------------------------------
# Session management
# ---------------------------------------------------------------------------
# Maps task_id -> {"user_id": str, "tab_id": str|None}
_sessions: Dict[str, Dict[str, Any]] = {}
_sessions_lock = threading.Lock()
def _get_session(task_id: Optional[str]) -> Dict[str, Any]:
"""Get or create a camofox session for the given task."""
task_id = task_id or "default"
with _sessions_lock:
if task_id in _sessions:
return _sessions[task_id]
session = {
"user_id": f"hermes_{uuid.uuid4().hex[:10]}",
"tab_id": None,
"session_key": f"task_{task_id[:16]}",
}
_sessions[task_id] = session
return session
def _ensure_tab(task_id: Optional[str], url: str = "about:blank") -> Dict[str, Any]:
"""Ensure a tab exists for the session, creating one if needed."""
session = _get_session(task_id)
if session["tab_id"]:
return session
base = get_camofox_url()
resp = requests.post(
f"{base}/tabs",
json={
"userId": session["user_id"],
"sessionKey": session["session_key"],
"url": url,
},
timeout=_DEFAULT_TIMEOUT,
)
resp.raise_for_status()
data = resp.json()
session["tab_id"] = data.get("tabId")
return session
def _drop_session(task_id: Optional[str]) -> Optional[Dict[str, Any]]:
"""Remove and return session info."""
task_id = task_id or "default"
with _sessions_lock:
return _sessions.pop(task_id, None)
# ---------------------------------------------------------------------------
# HTTP helpers
# ---------------------------------------------------------------------------
def _post(path: str, body: dict, timeout: int = _DEFAULT_TIMEOUT) -> dict:
"""POST JSON to camofox and return parsed response."""
url = f"{get_camofox_url()}{path}"
resp = requests.post(url, json=body, timeout=timeout)
resp.raise_for_status()
return resp.json()
def _get(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict:
"""GET from camofox and return parsed response."""
url = f"{get_camofox_url()}{path}"
resp = requests.get(url, params=params, timeout=timeout)
resp.raise_for_status()
return resp.json()
def _get_raw(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> requests.Response:
"""GET from camofox and return raw response (for binary data)."""
url = f"{get_camofox_url()}{path}"
resp = requests.get(url, params=params, timeout=timeout)
resp.raise_for_status()
return resp
def _delete(path: str, body: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict:
"""DELETE to camofox and return parsed response."""
url = f"{get_camofox_url()}{path}"
resp = requests.delete(url, json=body, timeout=timeout)
resp.raise_for_status()
return resp.json()
# ---------------------------------------------------------------------------
# Tool implementations
# ---------------------------------------------------------------------------
def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
"""Navigate to a URL via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
# Create tab with the target URL directly
session = _ensure_tab(task_id, url)
data = {"ok": True, "url": url}
else:
# Navigate existing tab
data = _post(
f"/tabs/{session['tab_id']}/navigate",
{"userId": session["user_id"], "url": url},
timeout=60,
)
return json.dumps({
"success": True,
"url": data.get("url", url),
"title": data.get("title", ""),
})
except requests.HTTPError as e:
return json.dumps({"success": False, "error": f"Navigation failed: {e}"})
except requests.ConnectionError:
return json.dumps({
"success": False,
"error": f"Cannot connect to Camofox at {get_camofox_url()}. "
"Is the server running? Start with: npm start (in camofox-browser dir) "
"or: docker run -p 9377:9377 jo-inc/camofox-browser",
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_snapshot(full: bool = False, task_id: Optional[str] = None,
user_task: Optional[str] = None) -> str:
"""Get accessibility tree snapshot from Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
snapshot = data.get("snapshot", "")
refs_count = data.get("refsCount", 0)
# Apply same summarization logic as the main browser tool
from tools.browser_tool import (
SNAPSHOT_SUMMARIZE_THRESHOLD,
_extract_relevant_content,
_truncate_snapshot,
)
if len(snapshot) > SNAPSHOT_SUMMARIZE_THRESHOLD:
if user_task:
snapshot = _extract_relevant_content(snapshot, user_task)
else:
snapshot = _truncate_snapshot(snapshot)
return json.dumps({
"success": True,
"snapshot": snapshot,
"element_count": refs_count,
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_click(ref: str, task_id: Optional[str] = None) -> str:
"""Click an element by ref via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
# Strip @ prefix if present (our tool convention)
clean_ref = ref.lstrip("@")
data = _post(
f"/tabs/{session['tab_id']}/click",
{"userId": session["user_id"], "ref": clean_ref},
)
return json.dumps({
"success": True,
"clicked": clean_ref,
"url": data.get("url", ""),
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
"""Type text into an element by ref via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
clean_ref = ref.lstrip("@")
_post(
f"/tabs/{session['tab_id']}/type",
{"userId": session["user_id"], "ref": clean_ref, "text": text},
)
return json.dumps({
"success": True,
"typed": text,
"element": clean_ref,
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str:
"""Scroll the page via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
_post(
f"/tabs/{session['tab_id']}/scroll",
{"userId": session["user_id"], "direction": direction},
)
return json.dumps({"success": True, "scrolled": direction})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_back(task_id: Optional[str] = None) -> str:
"""Navigate back via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
data = _post(
f"/tabs/{session['tab_id']}/back",
{"userId": session["user_id"]},
)
return json.dumps({"success": True, "url": data.get("url", "")})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_press(key: str, task_id: Optional[str] = None) -> str:
"""Press a keyboard key via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
_post(
f"/tabs/{session['tab_id']}/press",
{"userId": session["user_id"], "key": key},
)
return json.dumps({"success": True, "pressed": key})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_close(task_id: Optional[str] = None) -> str:
"""Close the browser session via Camofox."""
try:
session = _drop_session(task_id)
if not session:
return json.dumps({"success": True, "closed": True})
_delete(
f"/sessions/{session['user_id']}",
)
return json.dumps({"success": True, "closed": True})
except Exception as e:
return json.dumps({"success": True, "closed": True, "warning": str(e)})
def camofox_get_images(task_id: Optional[str] = None) -> str:
"""Get images on the current page via Camofox.
Extracts image information from the accessibility tree snapshot,
since Camofox does not expose a dedicated /images endpoint.
"""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
import re
data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
snapshot = data.get("snapshot", "")
# Parse img elements from the accessibility tree.
# Format: img "alt text" or img "alt text" [eN]
# URLs appear on /url: lines following img entries
images = []
lines = snapshot.split("\n")
for i, line in enumerate(lines):
stripped = line.strip()
if stripped.startswith("- img ") or stripped.startswith("img "):
alt_match = re.search(r'img\s+"([^"]*)"', stripped)
alt = alt_match.group(1) if alt_match else ""
# Look for URL on the next line
src = ""
if i + 1 < len(lines):
url_match = re.search(r'/url:\s*(\S+)', lines[i + 1].strip())
if url_match:
src = url_match.group(1)
if alt or src:
images.append({"src": src, "alt": alt})
return json.dumps({
"success": True,
"images": images,
"count": len(images),
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_vision(question: str, annotate: bool = False,
task_id: Optional[str] = None) -> str:
"""Take a screenshot and analyze it with vision AI via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
# Get screenshot as binary PNG
resp = _get_raw(
f"/tabs/{session['tab_id']}/screenshot",
params={"userId": session["user_id"]},
)
# Save screenshot to cache
from hermes_constants import get_hermes_home
screenshots_dir = get_hermes_home() / "browser_screenshots"
screenshots_dir.mkdir(parents=True, exist_ok=True)
screenshot_path = str(screenshots_dir / f"browser_screenshot_{uuid.uuid4().hex[:8]}.png")
with open(screenshot_path, "wb") as f:
f.write(resp.content)
# Encode for vision LLM
img_b64 = base64.b64encode(resp.content).decode("utf-8")
# Also get annotated snapshot if requested
annotation_context = ""
if annotate:
try:
snap_data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
annotation_context = f"\n\nAccessibility tree (element refs for interaction):\n{snap_data.get('snapshot', '')[:3000]}"
except Exception:
pass
# Send to vision LLM
from agent.auxiliary_client import call_llm
vision_prompt = (
f"Analyze this browser screenshot and answer: {question}"
f"{annotation_context}"
)
try:
from hermes_cli.config import load_config
_cfg = load_config()
_vision_timeout = int(_cfg.get("auxiliary", {}).get("vision", {}).get("timeout", 120))
except Exception:
_vision_timeout = 120
analysis = call_llm(
messages=[{
"role": "user",
"content": [
{"type": "text", "text": vision_prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{img_b64}",
},
},
],
}],
task="vision",
timeout=_vision_timeout,
)
return json.dumps({
"success": True,
"analysis": analysis,
"screenshot_path": screenshot_path,
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_console(clear: bool = False, task_id: Optional[str] = None) -> str:
"""Get console output — limited support in Camofox.
Camofox does not expose browser console logs via its REST API.
Returns an empty result with a note.
"""
return json.dumps({
"success": True,
"console_messages": [],
"js_errors": [],
"total_messages": 0,
"total_errors": 0,
"note": "Console log capture is not available with the Camofox backend. "
"Use browser_snapshot or browser_vision to inspect page state.",
})
# ---------------------------------------------------------------------------
# Cleanup
# ---------------------------------------------------------------------------
def cleanup_all_camofox_sessions() -> None:
"""Close all active camofox sessions."""
with _sessions_lock:
sessions = list(_sessions.items())
for task_id, session in sessions:
try:
_delete(f"/sessions/{session['user_id']}")
except Exception:
pass
with _sessions_lock:
_sessions.clear()

View File

@@ -79,6 +79,14 @@ from tools.browser_providers.base import CloudBrowserProvider
from tools.browser_providers.browserbase import BrowserbaseProvider
from tools.browser_providers.browser_use import BrowserUseProvider
# Camofox local anti-detection browser backend (optional).
# When CAMOFOX_URL is set, all browser operations route through the
# camofox REST API instead of the agent-browser CLI.
try:
from tools.browser_camofox import is_camofox_mode as _is_camofox_mode
except ImportError:
_is_camofox_mode = lambda: False # noqa: E731
logger = logging.getLogger(__name__)
# Standard PATH entries for environments with minimal PATH (e.g. systemd services).
@@ -1046,6 +1054,11 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
"blocked_by_policy": {"host": blocked["host"], "rule": blocked["rule"], "source": blocked["source"]},
})
# Camofox backend — delegate after safety checks pass
if _is_camofox_mode():
from tools.browser_camofox import camofox_navigate
return camofox_navigate(url, task_id)
effective_task_id = task_id or "default"
# Get session info to check if this is a new session
@@ -1135,6 +1148,10 @@ def browser_snapshot(
Returns:
JSON string with page snapshot
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_snapshot
return camofox_snapshot(full, task_id, user_task)
effective_task_id = task_id or "default"
# Build command args based on full flag
@@ -1180,6 +1197,10 @@ def browser_click(ref: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with click result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_click
return camofox_click(ref, task_id)
effective_task_id = task_id or "default"
# Ensure ref starts with @
@@ -1212,6 +1233,10 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with type result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_type
return camofox_type(ref, text, task_id)
effective_task_id = task_id or "default"
# Ensure ref starts with @
@@ -1245,6 +1270,10 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with scroll result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_scroll
return camofox_scroll(direction, task_id)
effective_task_id = task_id or "default"
# Validate direction
@@ -1278,6 +1307,10 @@ def browser_back(task_id: Optional[str] = None) -> str:
Returns:
JSON string with navigation result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_back
return camofox_back(task_id)
effective_task_id = task_id or "default"
result = _run_browser_command(effective_task_id, "back", [])
@@ -1305,6 +1338,10 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with key press result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_press
return camofox_press(key, task_id)
effective_task_id = task_id or "default"
result = _run_browser_command(effective_task_id, "press", [key])
@@ -1330,6 +1367,10 @@ def browser_close(task_id: Optional[str] = None) -> str:
Returns:
JSON string with close result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_close
return camofox_close(task_id)
effective_task_id = task_id or "default"
with _cleanup_lock:
had_session = effective_task_id in _active_sessions
@@ -1358,6 +1399,10 @@ def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str:
Returns:
JSON string with console messages and JS errors
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_console
return camofox_console(clear, task_id)
effective_task_id = task_id or "default"
console_args = ["--clear"] if clear else []
@@ -1452,6 +1497,10 @@ def browser_get_images(task_id: Optional[str] = None) -> str:
Returns:
JSON string with list of images (src and alt)
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_get_images
return camofox_get_images(task_id)
effective_task_id = task_id or "default"
# Use eval to run JavaScript that extracts images
@@ -1516,6 +1565,10 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
Returns:
JSON string with vision analysis results and screenshot_path
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_vision
return camofox_vision(question, annotate, task_id)
import base64
import uuid as uuid_mod
from pathlib import Path
@@ -1804,6 +1857,10 @@ def check_browser_requirements() -> bool:
Returns:
True if all requirements are met, False otherwise
"""
# Camofox backend — only needs the server URL, no agent-browser CLI
if _is_camofox_mode():
return True
# The agent-browser CLI is always required
try:
_find_agent_browser()