diff --git a/.env.example b/.env.example index 4774800d1..9f701bf3a 100644 --- a/.env.example +++ b/.env.example @@ -30,43 +30,59 @@ NOUS_API_KEY= FAL_KEY= # ============================================================================= -# TERMINAL TOOL CONFIGURATION (mini-swe-agent backend) +# TERMINAL TOOL CONFIGURATION # ============================================================================= # Backend type: "local", "singularity", "docker", or "modal" -# - local: Runs directly on your machine (fastest, no isolation) -# - singularity: Runs in Apptainer/Singularity containers (HPC clusters, no root needed) -# - docker: Runs in Docker containers (isolated, requires Docker + docker group) -# - modal: Runs in Modal cloud sandboxes (scalable, requires Modal account) -TERMINAL_ENV=local +# Uncomment ONE configuration block below based on your preferred backend. -# Container images (for singularity/docker/modal backends) -TERMINAL_DOCKER_IMAGE=python:3.11-slim -TERMINAL_SINGULARITY_IMAGE=docker://python:3.11-slim -TERMINAL_MODAL_IMAGE=python:3.11-slim - -# Working directory inside the container -TERMINAL_CWD=/tmp - -# Default command timeout in seconds +# ----------------------------------------------------------------------------- +# OPTION 1: Singularity/Apptainer (RECOMMENDED for HPC clusters) +# - No root required, common on shared systems +# - Auto-builds and caches SIF images from docker:// URLs +# - Uses /scratch if available, otherwise /tmp +# ----------------------------------------------------------------------------- +TERMINAL_ENV=singularity +TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20 +TERMINAL_CWD=/workspace TERMINAL_TIMEOUT=60 +# Optional: Override scratch directory (auto-detects /scratch or /tmp) +# TERMINAL_SCRATCH_DIR=/scratch/myuser/hermes -# Cleanup inactive environments after this many seconds +# ----------------------------------------------------------------------------- +# OPTION 2: Local execution (FASTEST, but no isolation) +# - Runs directly on your machine +# - No containers, no setup required +# - WARNING: Commands run with your user permissions +# ----------------------------------------------------------------------------- +# TERMINAL_ENV=local +# TERMINAL_CWD=/tmp +# TERMINAL_TIMEOUT=60 + +# ----------------------------------------------------------------------------- +# OPTION 3: Docker (good isolation, requires Docker) +# - Requires Docker installed and user in 'docker' group +# - Each task gets an isolated container +# ----------------------------------------------------------------------------- +# TERMINAL_ENV=docker +# TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20 +# TERMINAL_CWD=/workspace +# TERMINAL_TIMEOUT=60 + +# ----------------------------------------------------------------------------- +# OPTION 4: Modal (cloud execution, scalable) +# - Requires Modal account: pip install modal && modal setup +# - Runs in Modal's cloud sandboxes +# - Good for scaling to many parallel workers +# ----------------------------------------------------------------------------- +# TERMINAL_ENV=modal +# TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20 +# TERMINAL_CWD=/workspace +# TERMINAL_TIMEOUT=60 + +# Common settings for all backends TERMINAL_LIFETIME_SECONDS=300 - -# Scratch directory for Singularity sandboxes (optional) -# If not set, uses /scratch (if available) or /tmp -# TERMINAL_SCRATCH_DIR=/scratch/myuser - -# Disk usage warning threshold in GB (default: 500) TERMINAL_DISK_WARNING_GB=500 -# ============================================================================= -# MODAL CLOUD BACKEND (Optional - for TERMINAL_ENV=modal) -# ============================================================================= -# Modal uses CLI authentication, not environment variables. -# Run: pip install modal && modal setup -# This will authenticate via browser and store credentials locally. - # ============================================================================= # BROWSER TOOL CONFIGURATION (agent-browser + Browserbase) # ============================================================================= @@ -99,11 +115,11 @@ BROWSER_SESSION_TIMEOUT=300 # Morph API Key - For legacy Hecate terminal backend # Get at: https://morph.so/ -MORPH_API_KEY= +# MORPH_API_KEY= # Hecate VM Settings (only if using terminal-hecate tool) -HECATE_VM_LIFETIME_SECONDS=300 -HECATE_DEFAULT_SNAPSHOT_ID=snapshot_p5294qxt +# HECATE_VM_LIFETIME_SECONDS=300 +# HECATE_DEFAULT_SNAPSHOT_ID=snapshot_p5294qxt # ============================================================================= # DEBUG OPTIONS diff --git a/configs/run_mixed_tasks.sh b/configs/run_mixed_tasks.sh index e4db4de1d..39ad8cf5f 100755 --- a/configs/run_mixed_tasks.sh +++ b/configs/run_mixed_tasks.sh @@ -13,19 +13,31 @@ LOG_FILE="logs/mixed_tasks_$(date +%Y%m%d_%H%M%S).log" echo "📝 Logging output to: $LOG_FILE" echo "🔀 Running mixed browser+terminal tasks with mixed_tasks distribution" -# Set terminal environment (Modal sandboxes recommended for safety) -export TERMINAL_ENV=modal -export TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20 +# Set terminal environment +# SIF images are automatically built/cached by terminal_tool.py +export TERMINAL_ENV=singularity +export TERMINAL_SINGULARITY_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20" export TERMINAL_TIMEOUT=300 +# Set up Apptainer cache directories (use /scratch if available, otherwise /tmp) +if [ -d "/scratch" ] && [ -w "/scratch" ]; then + CACHE_BASE="/scratch/$USER/.apptainer" +else + CACHE_BASE="/tmp/$USER/.apptainer" +fi +export APPTAINER_CACHEDIR="$CACHE_BASE" +export APPTAINER_TMPDIR="$CACHE_BASE/tmp" +mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR" + +echo "📁 Apptainer cache: $APPTAINER_CACHEDIR" + python batch_runner.py \ --dataset_file="mixed-browser-terminal-tasks.jsonl" \ --batch_size=20 \ --run_name="mixed_tasks" \ --distribution="mixed_tasks" \ - --model="z-ai/glm-4.7" \ + --model="moonshotai/kimi-k2.5" \ --base_url="https://openrouter.ai/api/v1" \ - --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \ --num_workers=25 \ --max_turns=60 \ --ephemeral_system_prompt="You are an AI assistant capable of both browser automation and terminal operations. Use browser tools to navigate websites, interact with web pages, fill forms, and extract information. Use terminal tools to execute commands, write and run code, install packages (use --break-system-packages with pip if needed), and perform local computations. When web search is available, use it to find URLs, documentation, or current information. If vision is available, use it to analyze images or screenshots. If image generation is available, use it when the task requires creating images. Combine browser and terminal capabilities effectively - for example, you might use the browser to fetch data from a website and terminal to process or analyze it. Always verify your work and handle errors gracefully. Whenever you can do something in a terminal instead of a web browser, you should choose to do so, as it's much cheaper." \ diff --git a/configs/run_terminal_tasks.sh b/configs/run_terminal_tasks.sh index 6a0fd459d..7ac8a6694 100755 --- a/configs/run_terminal_tasks.sh +++ b/configs/run_terminal_tasks.sh @@ -2,7 +2,7 @@ # Terminal-focused data generation run # Uses nous-terminal-tasks.jsonl (597 tasks) -# Distribution: terminal 97%, web 15%, browser 10%, vision 8%, image_gen 3% +# Distribution: terminal 97%, web 15%, browser 0%, vision 8%, image_gen 3% # Create logs directory if it doesn't exist mkdir -p logs @@ -13,8 +13,10 @@ LOG_FILE="logs/terminal_tasks_$(date +%Y%m%d_%H%M%S).log" echo "📝 Logging output to: $LOG_FILE" echo "💻 Running terminal-focused tasks with terminal_tasks distribution" -# Set terminal environment (using Singularity for containerized execution) +# Set terminal environment +# SIF images are automatically built/cached by terminal_tool.py export TERMINAL_ENV=singularity +export TERMINAL_SINGULARITY_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20" export TERMINAL_TIMEOUT=300 # Set up Apptainer cache directories (use /scratch if available, otherwise /tmp) @@ -27,28 +29,8 @@ export APPTAINER_CACHEDIR="$CACHE_BASE" export APPTAINER_TMPDIR="$CACHE_BASE/tmp" mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR" -# Pre-build SIF image if it doesn't exist (avoids 40 workers all downloading simultaneously) -SIF_IMAGE="$CACHE_BASE/python-nodejs-3.11-20.sif" -DOCKER_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20" - -if [ ! -f "$SIF_IMAGE" ]; then - echo "🔨 Building Singularity image (one-time setup)..." - echo " Source: $DOCKER_IMAGE" - echo " Target: $SIF_IMAGE" - apptainer build "$SIF_IMAGE" "$DOCKER_IMAGE" - if [ $? -ne 0 ]; then - echo "❌ Failed to build SIF image. Falling back to docker:// URL" - export TERMINAL_SINGULARITY_IMAGE="$DOCKER_IMAGE" - else - echo "✅ SIF image built successfully" - export TERMINAL_SINGULARITY_IMAGE="$SIF_IMAGE" - fi -else - echo "✅ Using pre-built SIF image: $SIF_IMAGE" - export TERMINAL_SINGULARITY_IMAGE="$SIF_IMAGE" -fi - echo "📁 Apptainer cache: $APPTAINER_CACHEDIR" +echo "🐳 Image: $TERMINAL_SINGULARITY_IMAGE (auto-converted to SIF on first use)" python batch_runner.py \ --dataset_file="nous-terminal-tasks.jsonl" \ diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 672987717..389b1c96c 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -72,6 +72,104 @@ def _get_scratch_dir() -> Path: return Path(tempfile.gettempdir()) +def _get_apptainer_cache_dir() -> Path: + """Get the Apptainer cache directory for SIF images.""" + # Check for APPTAINER_CACHEDIR env var + cache_dir = os.getenv("APPTAINER_CACHEDIR") + if cache_dir: + cache_path = Path(cache_dir) + cache_path.mkdir(parents=True, exist_ok=True) + return cache_path + + # Use scratch dir parent for cache (one level up from sandboxes) + scratch = _get_scratch_dir() + cache_path = scratch.parent / ".apptainer" + cache_path.mkdir(parents=True, exist_ok=True) + return cache_path + + +# Lock for SIF building to prevent race conditions +_sif_build_lock = threading.Lock() + + +def _get_or_build_sif(image: str, executable: str = "apptainer") -> str: + """ + Get or build a SIF image from a docker:// URL. + + If the image is already a .sif file, returns it as-is. + If the image is a docker:// URL, checks for cached SIF and builds if needed. + + Args: + image: Image path (docker://... URL or .sif path) + executable: apptainer or singularity + + Returns: + Path to SIF file, or original image if not a docker:// URL + """ + # If already a .sif file, use it directly + if image.endswith('.sif') and Path(image).exists(): + return image + + # If not a docker:// URL, return as-is (could be a local sandbox or other format) + if not image.startswith('docker://'): + return image + + # Generate SIF filename from docker image name + # docker://nikolaik/python-nodejs:python3.11-nodejs20 -> python-nodejs-python3.11-nodejs20.sif + image_name = image.replace('docker://', '').replace('/', '-').replace(':', '-') + cache_dir = _get_apptainer_cache_dir() + sif_path = cache_dir / f"{image_name}.sif" + + # Check if SIF already exists + if sif_path.exists(): + return str(sif_path) + + # Build SIF with lock to prevent multiple workers building simultaneously + with _sif_build_lock: + # Double-check after acquiring lock (another thread may have built it) + if sif_path.exists(): + return str(sif_path) + + print(f"[Terminal] Building SIF image (one-time setup)...") + print(f"[Terminal] Source: {image}") + print(f"[Terminal] Target: {sif_path}") + + # Ensure tmp directory exists for build + tmp_dir = cache_dir / "tmp" + tmp_dir.mkdir(parents=True, exist_ok=True) + + # Set APPTAINER_TMPDIR for the build + env = os.environ.copy() + env["APPTAINER_TMPDIR"] = str(tmp_dir) + env["APPTAINER_CACHEDIR"] = str(cache_dir) + + try: + result = subprocess.run( + [executable, "build", str(sif_path), image], + capture_output=True, + text=True, + timeout=600, # 10 min timeout for pulling and building + env=env + ) + if result.returncode != 0: + print(f"[Terminal] ⚠️ SIF build failed, falling back to docker:// URL") + print(f"[Terminal] Error: {result.stderr[:500]}") + return image + + print(f"[Terminal] ✅ SIF image built successfully") + return str(sif_path) + + except subprocess.TimeoutExpired: + print(f"[Terminal] ⚠️ SIF build timed out, falling back to docker:// URL") + # Clean up partial file + if sif_path.exists(): + sif_path.unlink() + return image + except Exception as e: + print(f"[Terminal] ⚠️ SIF build error: {e}, falling back to docker:// URL") + return image + + # Disk usage warning threshold (in GB) DISK_USAGE_WARNING_THRESHOLD_GB = float(os.getenv("TERMINAL_DISK_WARNING_GB", "500")) @@ -108,19 +206,22 @@ class _SingularityEnvironment: """ Custom Singularity/Apptainer environment with better space management. + - Automatically builds/caches SIF images from docker:// URLs - Builds sandbox in /scratch (if available) or configurable location - Binds a large working directory into the container - Keeps container isolated from host filesystem """ def __init__(self, image: str, cwd: str = "/workspace", timeout: int = 60): - self.image = image self.cwd = cwd self.timeout = timeout # Use apptainer if available, otherwise singularity self.executable = "apptainer" if shutil.which("apptainer") else "singularity" + # Get or build SIF from docker:// URL (fast if already cached) + self.image = _get_or_build_sif(image, self.executable) + # Get scratch directory for sandbox self.scratch_dir = _get_scratch_dir() @@ -136,7 +237,7 @@ class _SingularityEnvironment: self._build_sandbox() def _build_sandbox(self): - """Build a writable sandbox from the container image.""" + """Build a writable sandbox from the container image (SIF or other).""" try: result = subprocess.run( [self.executable, "build", "--sandbox", str(self.sandbox_dir), self.image],