Implement sudo support across terminal environments

- Added support for sudo commands in local, Docker, Singularity, and SSH environments by introducing the `SUDO_PASSWORD` environment variable.
- Updated terminal tool configurations in `.env.example` and `cli-config.yaml.example` to document the new sudo functionality.
- Enhanced the command execution process to handle sudo commands gracefully, preventing hangs on interactive prompts and providing clear error messages when no password is configured.
- Updated `README.md` to include instructions for using sudo support and SSH backend configuration.
- Revised `TODO.md` to reflect the completion of the sudo feature and outline future enhancements.
This commit is contained in:
teknium1
2026-02-01 10:02:34 -08:00
parent affc4e9a8f
commit 971ed2bbdf
6 changed files with 276 additions and 142 deletions

View File

@@ -2,132 +2,17 @@
# Copy this file to .env and fill in your API keys
# =============================================================================
# LLM PROVIDER (OpenRouter)
# SUDO SUPPORT (works with ALL terminal backends)
# =============================================================================
# OpenRouter provides access to many models through one API
# All LLM calls go through OpenRouter - no direct provider keys needed
# Get your key at: https://openrouter.ai/keys
OPENROUTER_API_KEY=
# Default model to use (OpenRouter format: provider/model)
# Examples: anthropic/claude-sonnet-4, openai/gpt-4o, google/gemini-2.0-flash, zhipuai/glm-4-plus
LLM_MODEL=anthropic/claude-sonnet-4
# =============================================================================
# TOOL API KEYS
# =============================================================================
# Firecrawl API Key - Web search, extract, and crawl
# Get at: https://firecrawl.dev/
FIRECRAWL_API_KEY=
# Nous Research API Key - Vision analysis and multi-model reasoning
# Get at: https://inference-api.nousresearch.com/
NOUS_API_KEY=
# FAL.ai API Key - Image generation
# Get at: https://fal.ai/
FAL_KEY=
# =============================================================================
# TERMINAL TOOL CONFIGURATION
# =============================================================================
# Backend type: "local", "singularity", "docker", or "modal"
# Uncomment ONE configuration block below based on your preferred backend.
# -----------------------------------------------------------------------------
# OPTION 1: Singularity/Apptainer (RECOMMENDED for HPC clusters)
# - No root required, common on shared systems
# - Auto-builds and caches SIF images from docker:// URLs
# - Uses /scratch if available, otherwise /tmp
# -----------------------------------------------------------------------------
TERMINAL_ENV=singularity
TERMINAL_SINGULARITY_IMAGE=docker://nikolaik/python-nodejs:python3.11-nodejs20
TERMINAL_CWD=/workspace
TERMINAL_TIMEOUT=60
# Optional: Override scratch directory (auto-detects /scratch or /tmp)
# TERMINAL_SCRATCH_DIR=/scratch/myuser/hermes
# -----------------------------------------------------------------------------
# OPTION 2: Local execution (FASTEST, but no isolation)
# - Runs directly on your machine
# - No containers, no setup required
# - WARNING: Commands run with your user permissions
# -----------------------------------------------------------------------------
# TERMINAL_ENV=local
# TERMINAL_CWD=/tmp
# TERMINAL_TIMEOUT=60
# -----------------------------------------------------------------------------
# OPTION 3: Docker (good isolation, requires Docker)
# - Requires Docker installed and user in 'docker' group
# - Each task gets an isolated container
# -----------------------------------------------------------------------------
# TERMINAL_ENV=docker
# TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
# TERMINAL_CWD=/workspace
# TERMINAL_TIMEOUT=60
# -----------------------------------------------------------------------------
# OPTION 4: Modal (cloud execution, scalable)
# - Requires Modal account: pip install modal && modal setup
# - Runs in Modal's cloud sandboxes
# - Good for scaling to many parallel workers
# -----------------------------------------------------------------------------
# TERMINAL_ENV=modal
# TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20
# TERMINAL_CWD=/workspace
# TERMINAL_TIMEOUT=60
# Common settings for all backends
TERMINAL_LIFETIME_SECONDS=300
TERMINAL_DISK_WARNING_GB=500
# =============================================================================
# BROWSER TOOL CONFIGURATION (agent-browser + Browserbase)
# =============================================================================
# Browser automation requires Browserbase cloud service for remote browser execution.
# This allows the agent to navigate websites, fill forms, and extract information.
# If set, enables sudo commands by piping password via `sudo -S`.
# Works with: local, docker, singularity, modal, and ssh backends.
#
# SECURITY WARNING: Password stored in plaintext. Only use on trusted machines.
#
# ALTERNATIVES:
# - For SSH backend: Configure passwordless sudo on the remote server
# - For containers: Run as root inside the container (no sudo needed)
# - For local: Configure /etc/sudoers for specific commands
#
# STEALTH MODES:
# - Basic Stealth: ALWAYS active (random fingerprints, auto CAPTCHA solving)
# - Advanced Stealth: Requires BROWSERBASE_ADVANCED_STEALTH=true (Scale Plan only)
# SUDO_PASSWORD=your_password_here
# Browserbase API Key - Cloud browser execution
# Get at: https://browserbase.com/
BROWSERBASE_API_KEY=
# Browserbase Project ID - From your Browserbase dashboard
BROWSERBASE_PROJECT_ID=
# Enable residential proxies for better CAPTCHA solving (default: true)
BROWSERBASE_PROXIES=true
# Enable advanced stealth mode (default: false, requires Scale Plan)
BROWSERBASE_ADVANCED_STEALTH=false
# Browser session timeout in seconds - Browserbase session duration (default: 300)
BROWSER_SESSION_TIMEOUT=300
# Browser inactivity timeout in seconds - auto-cleanup inactive sessions (default: 120)
BROWSER_INACTIVITY_TIMEOUT=120
# =============================================================================
# LEGACY/OPTIONAL
# =============================================================================
# Morph API Key - For legacy Hecate terminal backend
# Get at: https://morph.so/
# MORPH_API_KEY=
# Hecate VM Settings (only if using terminal-hecate tool)
# HECATE_VM_LIFETIME_SECONDS=300
# HECATE_DEFAULT_SNAPSHOT_ID=snapshot_p5294qxt
# =============================================================================
# DEBUG OPTIONS
# =============================================================================
WEB_TOOLS_DEBUG=false
VISION_TOOLS_DEBUG=false
MOA_TOOLS_DEBUG=false
IMAGE_TOOLS_DEBUG=false

View File

@@ -530,13 +530,20 @@ All environment variables can be configured in the `.env` file (copy from `.env.
- `FAL_KEY`: Image generation tools
**Terminal Tool Configuration (mini-swe-agent backend):**
- `TERMINAL_ENV`: Backend type - `local`, `docker`, `singularity`, or `modal` (default: `local`)
- `TERMINAL_ENV`: Backend type - `local`, `docker`, `singularity`, `modal`, or `ssh` (default: `local`)
- `TERMINAL_DOCKER_IMAGE`: Docker image for docker backend (default: `python:3.11-slim`)
- `TERMINAL_SINGULARITY_IMAGE`: Singularity/Apptainer image (can be `docker://...` URL or local `.sif` path)
- `TERMINAL_TIMEOUT`: Command timeout in seconds (default: `60`)
- `TERMINAL_LIFETIME_SECONDS`: Cleanup inactive environments after this time (default: `300`)
- `TERMINAL_CWD`: Working directory inside containers (default: `/tmp`)
- `TERMINAL_SCRATCH_DIR`: Custom scratch directory for sandbox storage (optional, auto-detects `/scratch`)
- `SUDO_PASSWORD`: Enable sudo commands by piping password via `sudo -S` (works with all backends)
**SSH Backend Configuration (for remote execution):**
- `TERMINAL_SSH_HOST`: Remote server hostname or IP
- `TERMINAL_SSH_USER`: SSH username
- `TERMINAL_SSH_PORT`: SSH port (default: `22`)
- `TERMINAL_SSH_KEY`: Path to SSH private key (optional, uses ssh-agent if not set)
**Browser Tool Configuration (agent-browser + Browserbase):**
- `BROWSERBASE_API_KEY`: Browserbase API key for cloud browser execution

27
TODO.md
View File

@@ -8,13 +8,26 @@
These items need to be addressed ASAP:
### 1. SUDO Breaking Terminal Tool 🔐
- [ ] **Problem:** SUDO commands break the terminal tool execution
- [ ] **Fix:** Handle password prompts / TTY requirements gracefully
- [ ] **Options:**
- Configure passwordless sudo for specific commands
- Detect sudo and warn user / request alternative approach
- Use `sudo -S` with stdin handling if password can be provided securely
### 1. SUDO Breaking Terminal Tool 🔐 ✅ COMPLETE
- [x] **Problem:** SUDO commands break the terminal tool execution (hangs indefinitely)
- [x] **Fix:** Created custom environment wrappers in `tools/terminal_tool.py`
- `stdin=subprocess.DEVNULL` prevents hanging on interactive prompts
- Sudo fails gracefully with clear error if no password configured
- Same UX as Claude Code - agent sees error, tells user to run it themselves
- [x] **All 5 environments now have consistent behavior:**
- `_LocalEnvironment` - local execution
- `_DockerEnvironment` - Docker containers
- `_SingularityEnvironment` - Singularity/Apptainer containers
- `_ModalEnvironment` - Modal cloud sandboxes
- `_SSHEnvironment` - remote SSH execution
- [x] **Optional sudo support via `SUDO_PASSWORD` env var:**
- Shared `_transform_sudo_command()` helper used by all environments
- If set, auto-transforms `sudo cmd` → pipes password via `sudo -S`
- Documented in `.env.example` with security warnings
- Works for chained commands: `cmd1 && sudo cmd2`
- [ ] **Optional future enhancements:**
- Interactive password prompt in CLI mode only
- Document passwordless sudo setup in /etc/sudoers for power users
### 2. Fix `browser_get_images` Tool 🖼️
- [ ] **Problem:** `browser_get_images` tool is broken/not working correctly

View File

@@ -28,6 +28,7 @@ terminal:
cwd: "." # Use "." for current directory, or specify absolute path
timeout: 180
lifetime_seconds: 300
# sudo_password: "" # Enable sudo commands (pipes via sudo -S) - SECURITY WARNING: plaintext!
# -----------------------------------------------------------------------------
# OPTION 2: SSH remote execution
@@ -80,6 +81,30 @@ terminal:
# lifetime_seconds: 300
# modal_image: "python:3.11"
# -----------------------------------------------------------------------------
# SUDO SUPPORT (works with ALL backends above)
# -----------------------------------------------------------------------------
# Add sudo_password to any terminal config above to enable sudo commands.
# The password is piped via `sudo -S`. Works with local, ssh, docker, etc.
#
# SECURITY WARNING: Password stored in plaintext!
#
# ALTERNATIVES:
# - SSH backend: Configure passwordless sudo on the remote server
# - Containers: Run as root inside the container (no sudo needed)
# - Local: Configure /etc/sudoers for specific commands
#
# Example (add to your terminal section):
# sudo_password: "your-password-here"
# =============================================================================
# Browser Tool Configuration
# =============================================================================
browser:
# Inactivity timeout in seconds - browser sessions are automatically closed
# after this period of no activity between agent loops (default: 120 = 2 minutes)
inactivity_timeout: 120
# =============================================================================
# Agent Behavior
# =============================================================================

2
cli.py
View File

@@ -134,6 +134,8 @@ def load_cli_config() -> Dict[str, Any]:
"ssh_user": "TERMINAL_SSH_USER",
"ssh_port": "TERMINAL_SSH_PORT",
"ssh_key": "TERMINAL_SSH_KEY",
# Sudo support (works with all backends)
"sudo_password": "SUDO_PASSWORD",
}
# CLI config overrides .env for terminal settings

View File

@@ -204,6 +204,94 @@ def _check_disk_usage_warning():
return False
def _transform_sudo_command(command: str) -> str:
"""
Transform sudo commands to use -S flag if SUDO_PASSWORD is available.
This is a shared helper used by all execution environments to provide
consistent sudo handling across local, SSH, and container environments.
If SUDO_PASSWORD is set, transforms:
'sudo apt install curl' -> password piped via sudo -S
If SUDO_PASSWORD is not set, command runs as-is (will fail gracefully
with "sudo: a password is required" error due to stdin=DEVNULL).
"""
sudo_password = os.getenv("SUDO_PASSWORD", "")
if not sudo_password:
return command # No password, let it fail gracefully
# Check if command contains sudo (simple detection)
# Handle: "sudo cmd", "sudo -flag cmd", "cmd && sudo cmd2", etc.
import re
def replace_sudo(match):
# Replace 'sudo' with password-piped version
# The -S flag makes sudo read password from stdin
# The -p '' suppresses the password prompt
return f"echo '{sudo_password}' | sudo -S -p ''"
# Match 'sudo' at word boundaries (not 'visudo' or 'sudoers')
# This handles: sudo, sudo -flag, etc.
return re.sub(r'\bsudo\b', replace_sudo, command)
class _LocalEnvironment:
"""
Local execution environment with sudo support and non-blocking stdin.
Features:
- Uses stdin=DEVNULL to prevent hanging on interactive prompts (sudo, etc.)
- Optional SUDO_PASSWORD support: if set, transforms `sudo` commands to use `sudo -S`
- Graceful failure: sudo commands fail fast with clear error if no password configured
Environment variables:
- SUDO_PASSWORD: If set, enables sudo commands by piping password via `sudo -S`
"""
def __init__(self, cwd: str = "", timeout: int = 60, env: dict = None):
self.cwd = cwd or os.getcwd()
self.timeout = timeout
self.env = env or {}
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict:
"""Execute a command locally with sudo support."""
work_dir = cwd or self.cwd or os.getcwd()
effective_timeout = timeout or self.timeout
# Transform sudo commands if SUDO_PASSWORD is available
exec_command = _transform_sudo_command(command)
try:
result = subprocess.run(
exec_command,
shell=True,
text=True,
cwd=work_dir,
env=os.environ | self.env,
timeout=effective_timeout,
encoding="utf-8",
errors="replace",
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL, # Prevent hanging on interactive prompts
)
return {"output": result.stdout, "returncode": result.returncode}
except subprocess.TimeoutExpired:
return {"output": f"Command timed out after {effective_timeout}s", "returncode": 124}
except Exception as e:
return {"output": f"Execution error: {str(e)}", "returncode": 1}
def cleanup(self):
"""No cleanup needed for local environment."""
pass
def stop(self):
"""Alias for cleanup."""
pass
class _SingularityEnvironment:
"""
Custom Singularity/Apptainer environment with better space management.
@@ -279,8 +367,11 @@ class _SingularityEnvironment:
# Use writable sandbox
cmd.extend(["--writable", str(self.sandbox_dir)])
# Transform sudo commands if SUDO_PASSWORD is available
exec_command = _transform_sudo_command(command)
# Execute the command
cmd.extend(["bash", "-c", command])
cmd.extend(["bash", "-c", exec_command])
try:
result = subprocess.run(
@@ -291,6 +382,7 @@ class _SingularityEnvironment:
errors="replace",
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL, # Prevent hanging on interactive prompts
)
return {"output": result.stdout, "returncode": result.returncode}
except subprocess.TimeoutExpired:
@@ -395,9 +487,12 @@ class _SSHEnvironment:
work_dir = cwd or self.cwd
effective_timeout = timeout or self.timeout
# Transform sudo commands if SUDO_PASSWORD is available
exec_command = _transform_sudo_command(command)
# Wrap command to run in the correct directory
# Use bash -c to handle complex commands properly
wrapped_command = f'cd {work_dir} && {command}'
wrapped_command = f'cd {work_dir} && {exec_command}'
cmd = self._build_ssh_command()
cmd.extend(["bash", "-c", wrapped_command])
@@ -411,6 +506,7 @@ class _SSHEnvironment:
errors="replace",
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL, # Prevent hanging on interactive prompts
)
return {"output": result.stdout, "returncode": result.returncode}
except subprocess.TimeoutExpired:
@@ -447,6 +543,112 @@ class _SSHEnvironment:
pass
class _DockerEnvironment:
"""
Docker execution environment wrapper with sudo support and non-blocking stdin.
Wraps mini-swe-agent's DockerEnvironment but adds:
- stdin=DEVNULL to prevent hanging on interactive prompts
- SUDO_PASSWORD support via _transform_sudo_command
"""
def __init__(self, image: str, cwd: str = "/", timeout: int = 60):
from minisweagent.environments.docker import DockerEnvironment
self._inner = DockerEnvironment(image=image, cwd=cwd, timeout=timeout)
self.cwd = cwd
self.timeout = timeout
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict:
"""Execute a command in the Docker container with sudo support."""
# Transform sudo commands if SUDO_PASSWORD is available
exec_command = _transform_sudo_command(command)
work_dir = cwd or self.cwd
effective_timeout = timeout or self.timeout
# Get container_id from inner environment
assert self._inner.container_id, "Container not started"
cmd = [self._inner.config.executable, "exec", "-w", work_dir]
for key in self._inner.config.forward_env:
if (value := os.getenv(key)) is not None:
cmd.extend(["-e", f"{key}={value}"])
for key, value in self._inner.config.env.items():
cmd.extend(["-e", f"{key}={value}"])
cmd.extend([self._inner.container_id, "bash", "-lc", exec_command])
try:
result = subprocess.run(
cmd,
text=True,
timeout=effective_timeout,
encoding="utf-8",
errors="replace",
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL, # Prevent hanging on interactive prompts
)
return {"output": result.stdout, "returncode": result.returncode}
except subprocess.TimeoutExpired:
return {"output": f"Command timed out after {effective_timeout}s", "returncode": 124}
def cleanup(self):
"""Cleanup the Docker container."""
self._inner.cleanup()
def stop(self):
"""Alias for cleanup."""
self.cleanup()
def __del__(self):
"""Cleanup on destruction."""
try:
self.cleanup()
except:
pass
class _ModalEnvironment:
"""
Modal cloud execution environment wrapper with sudo support.
Wraps mini-swe-agent's SwerexModalEnvironment but adds:
- SUDO_PASSWORD support via _transform_sudo_command
Note: stdin handling is not needed for Modal since it uses remote async execution.
"""
def __init__(self, image: str, cwd: str = "/", timeout: int = 60):
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
self._inner = SwerexModalEnvironment(image=image, cwd=cwd, timeout=timeout)
self.cwd = cwd
self.timeout = timeout
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict:
"""Execute a command in Modal with sudo support."""
# Transform sudo commands if SUDO_PASSWORD is available
exec_command = _transform_sudo_command(command)
# Delegate to inner environment with transformed command
return self._inner.execute(exec_command, cwd=cwd, timeout=timeout)
def cleanup(self):
"""Cleanup the Modal deployment."""
if hasattr(self._inner, 'stop'):
self._inner.stop()
def stop(self):
"""Stop the Modal deployment."""
self.cleanup()
def __del__(self):
"""Cleanup on destruction."""
try:
self.cleanup()
except:
pass
# Tool description for LLM
TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux environment.
@@ -518,20 +720,20 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, ssh_c
Environment instance with execute() method
"""
if env_type == "local":
from minisweagent.environments.local import LocalEnvironment
return LocalEnvironment(cwd=cwd, timeout=timeout)
# Use our custom LocalEnvironment with sudo support and non-blocking stdin
return _LocalEnvironment(cwd=cwd, timeout=timeout)
elif env_type == "docker":
from minisweagent.environments.docker import DockerEnvironment
return DockerEnvironment(image=image, cwd=cwd, timeout=timeout)
# Use custom Docker wrapper with sudo support and non-blocking stdin
return _DockerEnvironment(image=image, cwd=cwd, timeout=timeout)
elif env_type == "singularity":
# Use custom Singularity environment with better space management
return _SingularityEnvironment(image=image, cwd=cwd, timeout=timeout)
elif env_type == "modal":
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
return SwerexModalEnvironment(image=image, cwd=cwd, timeout=timeout)
# Use custom Modal wrapper with sudo support
return _ModalEnvironment(image=image, cwd=cwd, timeout=timeout)
elif env_type == "ssh":
if not ssh_config or not ssh_config.get("host") or not ssh_config.get("user"):