Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution. - Updated `.env.example` to include new configuration options for Browserbase API keys and session settings. - Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets. - Updated `README.md` with setup instructions for browser tools and their usage examples. - Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality. - Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.
2026-01-29 06:10:24 +00:00
parent 54ca0997ee
commit 248acf715e
12 changed files with 2626 additions and 134 deletions
--- a/.env.example
+++ b/.env.example
@@ -2,14 +2,15 @@
 # Copy this file to .env and fill in your API keys

 # =============================================================================
-# LLM PROVIDER (OpenRouter - Primary)
+# LLM PROVIDER (OpenRouter)
 # =============================================================================
 # OpenRouter provides access to many models through one API
-# Get at: https://openrouter.ai/keys
+# All LLM calls go through OpenRouter - no direct provider keys needed
+# Get your key at: https://openrouter.ai/keys
 OPENROUTER_API_KEY=

 # Default model to use (OpenRouter format: provider/model)
-# Examples: anthropic/claude-sonnet-4, openai/gpt-4o, google/gemini-2.0-flash
+# Examples: anthropic/claude-sonnet-4, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
 LLM_MODEL=anthropic/claude-sonnet-4

 # =============================================================================
@@ -31,14 +32,17 @@ FAL_KEY=
 # =============================================================================
 # TERMINAL TOOL CONFIGURATION (mini-swe-agent backend)
 # =============================================================================
-# Backend type: "local", "docker", or "modal"
+# Backend type: "local", "singularity", "docker", or "modal"
 # - local: Runs directly on your machine (fastest, no isolation)
-# - docker: Runs in Docker containers (isolated, requires Docker installed)
+# - singularity: Runs in Apptainer/Singularity containers (HPC clusters, no root needed)
+# - docker: Runs in Docker containers (isolated, requires Docker + docker group)
 # - modal: Runs in Modal cloud sandboxes (scalable, requires Modal account)
-TERMINAL_ENV=docker
+TERMINAL_ENV=singularity

-# Docker image to use (for docker and modal backends)
-TERMINAL_DOCKER_IMAGE=python:3.11-slim
+# Container images (for singularity/docker/modal backends)
+TERMINAL_DOCKER_IMAGE=python:3.11
+TERMINAL_SINGULARITY_IMAGE=docker://python:3.11
+TERMINAL_MODAL_IMAGE=python:3.11

 # Working directory inside the container
 TERMINAL_CWD=/tmp
@@ -57,6 +61,73 @@ TERMINAL_LIFETIME_SECONDS=300
 # This will authenticate via browser and store credentials locally.
 # No API key needed in .env - Modal handles auth automatically.

+# =============================================================================
+# BROWSER TOOL CONFIGURATION (agent-browser + Browserbase)
+# =============================================================================
+# Browser automation requires Browserbase cloud service for remote browser execution.
+# This allows the agent to navigate websites, fill forms, and extract information.
+#
+# STEALTH MODES:
+# - Basic Stealth: ALWAYS active (random fingerprints, auto CAPTCHA solving)
+# - Advanced Stealth: Requires BROWSERBASE_ADVANCED_STEALTH=true (Scale Plan only)
+
+# Browserbase API Key - Cloud browser execution
+# Get at: https://browserbase.com/
+BROWSERBASE_API_KEY=
+
+# Browserbase Project ID - From your Browserbase dashboard
+BROWSERBASE_PROJECT_ID=
+
+# Enable residential proxies for better CAPTCHA solving (default: true)
+# Routes traffic through residential IPs, significantly improves success rate
+BROWSERBASE_PROXIES=true
+
+# Enable advanced stealth mode (default: false, requires Scale Plan)
+# Uses custom Chromium build to avoid bot detection altogether
+BROWSERBASE_ADVANCED_STEALTH=false
+
+# Browser session timeout in seconds (optional, default: 300)
+# Sessions are cleaned up after this duration of inactivity
+BROWSER_SESSION_TIMEOUT=300
+
+# =============================================================================
+# Browser automation requires Browserbase cloud service for remote browser execution.
+# This allows the agent to navigate websites, fill forms, and extract information.
+
+# Browserbase API Key - Cloud browser execution
+# Get at: https://browserbase.com/
+BROWSERBASE_API_KEY=
+
+# Browserbase Project ID - From your Browserbase dashboard
+BROWSERBASE_PROJECT_ID=
+
+# Enable proxies for better CAPTCHA solving and anti-bot avoidance (default: true)
+# Proxies route traffic through residential IPs for more reliable access
+BROWSERBASE_PROXIES=true
+
+# Enable advanced stealth mode (default: false, requires Scale Plan)
+# Uses custom Chromium build to avoid bot detection altogether
+BROWSERBASE_ADVANCED_STEALTH=false
+
+# Browser session timeout in seconds (optional, default: 300)
+# Sessions are cleaned up after this duration of inactivity
+BROWSER_SESSION_TIMEOUT=300
+
+# =============================================================================
+# Browser automation requires Browserbase cloud service for remote browser execution.
+# This allows the agent to navigate websites, fill forms, and extract information.
+
+# Browserbase API Key - Cloud browser execution
+# Get at: https://browserbase.com/
+BROWSERBASE_API_KEY=
+
+# Browserbase Project ID - From your Browserbase dashboard
+BROWSERBASE_PROJECT_ID=
+
+# Browser session timeout in seconds (optional, default: 300)
+# Sessions are cleaned up after this duration of inactivity
+BROWSER_SESSION_TIMEOUT=300
+
 # =============================================================================
 # LEGACY/OPTIONAL API KEYS
 # =============================================================================
@@ -69,10 +140,6 @@ MORPH_API_KEY=
 HECATE_VM_LIFETIME_SECONDS=300
 HECATE_DEFAULT_SNAPSHOT_ID=snapshot_p5294qxt

-# Direct provider keys (optional - OpenRouter is preferred)
-ANTHROPIC_API_KEY=
-OPENAI_API_KEY=
-
 # =============================================================================
 # DEBUG OPTIONS
 # =============================================================================
@@ -80,3 +147,12 @@ WEB_TOOLS_DEBUG=false
 VISION_TOOLS_DEBUG=false
 MOA_TOOLS_DEBUG=false
 IMAGE_TOOLS_DEBUG=false
+
+# Scratch directory for Singularity sandboxes (optional)
+# If not set, uses /scratch (if available) or /tmp
+# Set this to a directory with lots of space for large pip installs
+# TERMINAL_SCRATCH_DIR=/scratch/myuser
+
+# Disk usage warning threshold in GB (default: 500)
+# Warning is printed when total sandbox disk usage exceeds this
+TERMINAL_DISK_WARNING_GB=500