2025-07-25 15:15:36 +00:00
#!/usr/bin/env python3
"""
2026-01-23 12:26:53 +00:00
Terminal Tool Module ( mini - swe - agent backend )
2025-07-25 15:15:36 +00:00
2026-01-23 12:26:53 +00:00
A terminal tool that executes commands using mini - swe - agent ' s execution environments.
Supports local execution , Docker containers , and Modal cloud sandboxes .
2025-11-04 03:32:43 -05:00
2026-01-23 12:26:53 +00:00
Environment Selection ( via TERMINAL_ENV environment variable ) :
- " local " : Execute directly on the host machine ( default , fastest )
- " docker " : Execute in Docker containers ( isolated , requires Docker )
- " modal " : Execute in Modal cloud sandboxes ( scalable , requires Modal account )
2025-11-02 08:52:05 +08:00
2026-01-23 12:26:53 +00:00
Features :
- Multiple execution backends ( local , docker , modal )
- Background task support
- VM / container lifecycle management
- Automatic cleanup after inactivity
2025-07-25 15:15:36 +00:00
Usage :
2025-07-26 04:31:17 +00:00
from terminal_tool import terminal_tool
2025-11-02 08:52:05 +08:00
2026-01-23 12:26:53 +00:00
# Execute a simple command
2025-07-26 04:31:17 +00:00
result = terminal_tool ( " ls -la " )
2025-11-02 08:52:05 +08:00
2026-01-23 12:26:53 +00:00
# Execute in background
result = terminal_tool ( " python server.py " , background = True )
2025-07-25 15:15:36 +00:00
"""
import json
2026-02-21 03:11:11 -08:00
import logging
2025-07-25 15:15:36 +00:00
import os
2026-02-10 16:34:27 -08:00
import signal
2026-01-23 12:26:53 +00:00
import sys
2025-11-04 03:32:43 -05:00
import time
2026-01-23 12:26:53 +00:00
import threading
2025-11-04 03:32:43 -05:00
import atexit
2026-01-29 06:10:24 +00:00
import shutil
import subprocess
import tempfile
import uuid
2026-01-23 12:26:53 +00:00
from pathlib import Path
2025-07-26 04:31:17 +00:00
from typing import Optional , Dict , Any
2025-07-25 15:15:36 +00:00
2026-02-21 03:11:11 -08:00
logger = logging . getLogger ( __name__ )
2026-02-10 16:34:27 -08:00
# ---------------------------------------------------------------------------
# Global interrupt event: set by the agent when a user interrupt arrives.
# The terminal tool polls this during command execution so it can kill
# long-running subprocesses immediately instead of blocking until timeout.
# ---------------------------------------------------------------------------
_interrupt_event = threading . Event ( )
def set_interrupt_event ( active : bool ) - > None :
""" Called by the agent to signal or clear the interrupt. """
if active :
_interrupt_event . set ( )
else :
_interrupt_event . clear ( )
def is_interrupted ( ) - > bool :
""" Check if an interrupt has been requested. """
return _interrupt_event . is_set ( )
2026-01-23 12:26:53 +00:00
# Add mini-swe-agent to path if not installed
mini_swe_path = Path ( __file__ ) . parent . parent / " mini-swe-agent " / " src "
if mini_swe_path . exists ( ) :
sys . path . insert ( 0 , str ( mini_swe_path ) )
2026-01-29 06:10:24 +00:00
# =============================================================================
# Custom Singularity Environment with more space
# =============================================================================
def _get_scratch_dir ( ) - > Path :
""" Get the best directory for Singularity sandboxes - prefers /scratch if available. """
# Check for configurable scratch directory first (highest priority)
custom_scratch = os . getenv ( " TERMINAL_SCRATCH_DIR " )
if custom_scratch :
scratch_path = Path ( custom_scratch )
scratch_path . mkdir ( parents = True , exist_ok = True )
return scratch_path
# Check for /scratch (common on HPC clusters, especially GPU nodes)
scratch = Path ( " /scratch " )
if scratch . exists ( ) and os . access ( scratch , os . W_OK ) :
# Create user-specific subdirectory
user_scratch = scratch / os . getenv ( " USER " , " hermes " ) / " hermes-agent "
user_scratch . mkdir ( parents = True , exist_ok = True )
2026-02-21 03:11:11 -08:00
logger . info ( " Using /scratch for sandboxes: %s " , user_scratch )
2026-01-29 06:10:24 +00:00
return user_scratch
2026-02-21 12:41:05 -08:00
# Fall back to /tmp (only relevant for Singularity/HPC sandboxes)
logger . debug ( " /scratch not available, using /tmp for sandboxes " )
2026-01-29 06:10:24 +00:00
return Path ( tempfile . gettempdir ( ) )
2026-01-29 22:47:11 +00:00
def _get_apptainer_cache_dir ( ) - > Path :
""" Get the Apptainer cache directory for SIF images. """
# Check for APPTAINER_CACHEDIR env var
cache_dir = os . getenv ( " APPTAINER_CACHEDIR " )
if cache_dir :
cache_path = Path ( cache_dir )
cache_path . mkdir ( parents = True , exist_ok = True )
return cache_path
2026-02-09 04:35:25 +00:00
# Use user-specific subdirectory in scratch for cache
2026-01-29 22:47:11 +00:00
scratch = _get_scratch_dir ( )
2026-02-09 04:35:25 +00:00
cache_path = scratch / " .apptainer "
2026-01-29 22:47:11 +00:00
cache_path . mkdir ( parents = True , exist_ok = True )
return cache_path
# Lock for SIF building to prevent race conditions
_sif_build_lock = threading . Lock ( )
def _get_or_build_sif ( image : str , executable : str = " apptainer " ) - > str :
"""
Get or build a SIF image from a docker : / / URL .
If the image is already a . sif file , returns it as - is .
If the image is a docker : / / URL , checks for cached SIF and builds if needed .
Args :
image : Image path ( docker : / / . . . URL or . sif path )
executable : apptainer or singularity
Returns :
Path to SIF file , or original image if not a docker : / / URL
"""
# If already a .sif file, use it directly
if image . endswith ( ' .sif ' ) and Path ( image ) . exists ( ) :
return image
# If not a docker:// URL, return as-is (could be a local sandbox or other format)
if not image . startswith ( ' docker:// ' ) :
return image
# Generate SIF filename from docker image name
# docker://nikolaik/python-nodejs:python3.11-nodejs20 -> python-nodejs-python3.11-nodejs20.sif
image_name = image . replace ( ' docker:// ' , ' ' ) . replace ( ' / ' , ' - ' ) . replace ( ' : ' , ' - ' )
cache_dir = _get_apptainer_cache_dir ( )
sif_path = cache_dir / f " { image_name } .sif "
# Check if SIF already exists
if sif_path . exists ( ) :
return str ( sif_path )
# Build SIF with lock to prevent multiple workers building simultaneously
with _sif_build_lock :
# Double-check after acquiring lock (another thread may have built it)
if sif_path . exists ( ) :
return str ( sif_path )
2026-02-21 03:11:11 -08:00
logger . info ( " Building SIF image (one-time setup)... " )
logger . info ( " Source: %s " , image )
logger . info ( " Target: %s " , sif_path )
2026-01-29 22:47:11 +00:00
# Ensure tmp directory exists for build
tmp_dir = cache_dir / " tmp "
tmp_dir . mkdir ( parents = True , exist_ok = True )
# Set APPTAINER_TMPDIR for the build
env = os . environ . copy ( )
env [ " APPTAINER_TMPDIR " ] = str ( tmp_dir )
env [ " APPTAINER_CACHEDIR " ] = str ( cache_dir )
try :
result = subprocess . run (
[ executable , " build " , str ( sif_path ) , image ] ,
capture_output = True ,
text = True ,
timeout = 600 , # 10 min timeout for pulling and building
env = env
)
if result . returncode != 0 :
2026-02-21 03:11:11 -08:00
logger . warning ( " SIF build failed, falling back to docker:// URL " )
logger . warning ( " Error: %s " , result . stderr [ : 500 ] )
2026-01-29 22:47:11 +00:00
return image
2026-02-21 03:11:11 -08:00
logger . info ( " SIF image built successfully " )
2026-01-29 22:47:11 +00:00
return str ( sif_path )
except subprocess . TimeoutExpired :
2026-02-21 03:11:11 -08:00
logger . warning ( " SIF build timed out, falling back to docker:// URL " )
2026-01-29 22:47:11 +00:00
# Clean up partial file
if sif_path . exists ( ) :
sif_path . unlink ( )
return image
except Exception as e :
2026-02-21 03:11:11 -08:00
logger . warning ( " SIF build error: %s , falling back to docker:// URL " , e )
2026-01-29 22:47:11 +00:00
return image
2026-01-29 06:10:24 +00:00
# Disk usage warning threshold (in GB)
DISK_USAGE_WARNING_THRESHOLD_GB = float ( os . getenv ( " TERMINAL_DISK_WARNING_GB " , " 500 " ) )
def _check_disk_usage_warning ( ) :
""" Check if total disk usage exceeds warning threshold. """
scratch_dir = _get_scratch_dir ( )
try :
# Get total size of hermes directories
total_bytes = 0
import glob
for path in glob . glob ( str ( scratch_dir / " hermes-* " ) ) :
for f in Path ( path ) . rglob ( ' * ' ) :
if f . is_file ( ) :
try :
total_bytes + = f . stat ( ) . st_size
2026-02-20 23:23:32 -08:00
except OSError :
2026-01-29 06:10:24 +00:00
pass
total_gb = total_bytes / ( 1024 * * 3 )
if total_gb > DISK_USAGE_WARNING_THRESHOLD_GB :
2026-02-21 03:11:11 -08:00
logger . warning ( " Disk usage ( %.1f GB) exceeds threshold ( %.0f GB). Consider running cleanup_all_environments(). " ,
total_gb , DISK_USAGE_WARNING_THRESHOLD_GB )
2026-01-29 06:10:24 +00:00
return True
return False
except Exception as e :
return False
2026-02-01 15:36:26 -08:00
# Session-cached sudo password (persists until CLI exits)
_cached_sudo_password : str = " "
2026-02-21 12:15:40 -08:00
# Optional UI callbacks for interactive prompts. When set, these are called
# instead of the default /dev/tty or input() readers. The CLI registers these
# so prompts route through prompt_toolkit's event loop.
# _sudo_password_callback() -> str (return password or "" to skip)
# _approval_callback(command, description) -> str ("once"/"session"/"always"/"deny")
_sudo_password_callback = None
_approval_callback = None
def set_sudo_password_callback ( cb ) :
""" Register a callback for sudo password prompts (used by CLI). """
global _sudo_password_callback
_sudo_password_callback = cb
def set_approval_callback ( cb ) :
""" Register a callback for dangerous command approval prompts (used by CLI). """
global _approval_callback
_approval_callback = cb
2026-02-02 23:35:18 -08:00
# =============================================================================
# Dangerous Command Approval System
# =============================================================================
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
from tools import approval as _approval
2026-02-12 10:05:08 -08:00
2026-02-02 23:35:18 -08:00
# Dangerous command patterns (regex, description)
DANGEROUS_PATTERNS = [
( r ' \ brm \ s+(-[^ \ s]* \ s+)*/ ' , " delete in root path " ) ,
( r ' \ brm \ s+(-[^ \ s]*)?r ' , " recursive delete " ) ,
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
( r ' \ brm \ s+--recursive \ b ' , " recursive delete (long flag) " ) ,
2026-02-02 23:35:18 -08:00
( r ' \ bchmod \ s+(-[^ \ s]* \ s+)*777 \ b ' , " world-writable permissions " ) ,
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
( r ' \ bchmod \ s+--recursive \ b.*777 ' , " recursive world-writable (long flag) " ) ,
2026-02-02 23:35:18 -08:00
( r ' \ bchown \ s+(-[^ \ s]*)?R \ s+root ' , " recursive chown to root " ) ,
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
( r ' \ bchown \ s+--recursive \ b.*root ' , " recursive chown to root (long flag) " ) ,
2026-02-02 23:35:18 -08:00
( r ' \ bmkfs \ b ' , " format filesystem " ) ,
( r ' \ bdd \ s+.*if= ' , " disk copy " ) ,
( r ' > \ s*/dev/sd ' , " write to block device " ) ,
( r ' \ bDROP \ s+(TABLE|DATABASE) \ b ' , " SQL DROP " ) ,
( r ' \ bDELETE \ s+FROM \ b(?!.* \ bWHERE \ b) ' , " SQL DELETE without WHERE " ) ,
( r ' \ bTRUNCATE \ s+(TABLE)? \ s* \ w ' , " SQL TRUNCATE " ) ,
( r ' > \ s*/etc/ ' , " overwrite system config " ) ,
( r ' \ bsystemctl \ s+(stop|disable|mask) \ b ' , " stop/disable system service " ) ,
( r ' \ bkill \ s+-9 \ s+-1 \ b ' , " kill all processes " ) ,
( r ' \ bpkill \ s+-9 \ b ' , " force kill processes " ) ,
( r ' :() \ s* { \ s*: \ s* \ | \ s*:& \ s*} \ s*;: ' , " fork bomb " ) ,
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
# Indirect execution via command launchers
( r ' \ b(bash|sh|zsh) \ s+-c \ s+ ' , " shell command via -c flag " ) ,
( r ' \ b(python[23]?|perl|ruby|node) \ s+-[ec] \ s+ ' , " script execution via -e/-c flag " ) ,
# Pipe-to-shell (remote code execution)
( r ' \ b(curl|wget) \ b.* \ | \ s*(ba)?sh \ b ' , " pipe remote content to shell " ) ,
# Destructive find/xargs patterns
( r ' \ bxargs \ s+.* \ brm \ b ' , " xargs with rm " ) ,
( r ' \ bfind \ b.*-exec \ s+rm \ b ' , " find -exec rm " ) ,
( r ' \ bfind \ b.*-delete \ b ' , " find -delete " ) ,
2026-02-02 23:35:18 -08:00
]
def _load_permanent_allowlist ( ) - > set :
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
""" Load permanently allowed command patterns from config.
Also syncs them into the approval module so is_approved ( ) works for
patterns that were added via ' always ' in a previous session .
"""
2026-02-02 23:35:18 -08:00
try :
from hermes_cli . config import load_config
config = load_config ( )
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
patterns = set ( config . get ( " command_allowlist " , [ ] ) or [ ] )
if patterns :
_approval . load_permanent ( patterns )
return patterns
2026-02-02 23:35:18 -08:00
except Exception :
return set ( )
def _save_permanent_allowlist ( patterns : set ) :
""" Save permanently allowed command patterns to config. """
try :
from hermes_cli . config import load_config , save_config
config = load_config ( )
config [ " command_allowlist " ] = list ( patterns )
save_config ( config )
except Exception as e :
2026-02-21 03:11:11 -08:00
logger . warning ( " Could not save allowlist: %s " , e )
2026-02-02 23:35:18 -08:00
def _detect_dangerous_command ( command : str ) - > tuple :
"""
Check if command matches any dangerous patterns .
Returns :
( is_dangerous , pattern_key , description ) or ( False , None , None )
"""
import re
command_lower = command . lower ( )
for pattern , description in DANGEROUS_PATTERNS :
if re . search ( pattern , command_lower , re . IGNORECASE ) :
# Use a simplified pattern key for caching (first word + key chars)
pattern_key = pattern . split ( r ' \ b ' ) [ 1 ] if r ' \ b ' in pattern else pattern [ : 20 ]
return ( True , pattern_key , description )
return ( False , None , None )
def _is_command_approved ( pattern_key : str ) - > bool :
""" Check if a pattern is approved (session or permanent). """
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
session_key = os . getenv ( " HERMES_SESSION_KEY " , " default " )
return _approval . is_approved ( session_key , pattern_key )
2026-02-02 23:35:18 -08:00
def _prompt_dangerous_approval ( command : str , description : str , timeout_seconds : int = 60 ) - > str :
"""
Prompt user to approve a dangerous command ( CLI only ) .
2026-02-21 12:15:40 -08:00
If an _approval_callback is registered ( by the CLI ) , delegates to it so the
prompt integrates with prompt_toolkit ' s UI. Otherwise falls back to the
raw input ( ) approach ( works outside the TUI , e . g . tests ) .
2026-02-02 23:35:18 -08:00
Returns : ' once ' , ' session ' , ' always ' , or ' deny '
"""
import sys
import threading
2026-02-21 12:15:40 -08:00
# Use the registered callback when available (prompt_toolkit-compatible)
if _approval_callback is not None :
try :
return _approval_callback ( command , description )
except Exception :
return " deny "
2026-02-02 23:35:18 -08:00
# Pause spinner if one is running
os . environ [ " HERMES_SPINNER_PAUSE " ] = " 1 "
try :
print ( )
2026-02-02 23:46:41 -08:00
print ( f " ⚠️ DANGEROUS COMMAND: { description } " )
print ( f " { command [ : 80 ] } { ' ... ' if len ( command ) > 80 else ' ' } " )
2026-02-02 23:35:18 -08:00
print ( )
2026-02-02 23:46:41 -08:00
print ( f " [o]nce | [s]ession | [a]lways | [d]eny " )
2026-02-02 23:35:18 -08:00
print ( )
sys . stdout . flush ( )
result = { " choice " : " " }
def get_input ( ) :
try :
result [ " choice " ] = input ( " Choice [o/s/a/D]: " ) . strip ( ) . lower ( )
2026-02-20 23:23:32 -08:00
except ( EOFError , OSError ) :
2026-02-02 23:35:18 -08:00
result [ " choice " ] = " "
thread = threading . Thread ( target = get_input , daemon = True )
thread . start ( )
thread . join ( timeout = timeout_seconds )
if thread . is_alive ( ) :
print ( " \n ⏱ Timeout - denying command " )
return " deny "
choice = result [ " choice " ]
if choice in ( ' o ' , ' once ' ) :
print ( " ✓ Allowed once " )
return " once "
elif choice in ( ' s ' , ' session ' ) :
print ( " ✓ Allowed for this session " )
return " session "
elif choice in ( ' a ' , ' always ' ) :
print ( " ✓ Added to permanent allowlist " )
return " always "
else :
print ( " ✗ Denied " )
return " deny "
except ( EOFError , KeyboardInterrupt ) :
print ( " \n ✗ Cancelled " )
return " deny "
finally :
if " HERMES_SPINNER_PAUSE " in os . environ :
del os . environ [ " HERMES_SPINNER_PAUSE " ]
print ( )
sys . stdout . flush ( )
def _check_dangerous_command ( command : str , env_type : str ) - > dict :
"""
Check if command is dangerous and handle approval .
Only applies to local / ssh backends in interactive contexts .
Args :
command : The command to check
env_type : The terminal backend type
Returns :
{ " approved " : True / False , " message " : str or None }
"""
# Skip check for isolated environments (containers are disposable)
if env_type in ( " docker " , " singularity " , " modal " ) :
return { " approved " : True , " message " : None }
# Detect dangerous command
is_dangerous , pattern_key , description = _detect_dangerous_command ( command )
if not is_dangerous :
return { " approved " : True , " message " : None }
# Check if already approved
if _is_command_approved ( pattern_key ) :
return { " approved " : True , " message " : None }
# Check context - only prompt in interactive modes
is_cli = os . getenv ( " HERMES_INTERACTIVE " )
is_gateway = os . getenv ( " HERMES_GATEWAY_SESSION " )
if not is_cli and not is_gateway :
# Programmatic use - allow (user opted into local backend)
return { " approved " : True , " message " : None }
2026-02-12 10:05:08 -08:00
if is_gateway or os . getenv ( " HERMES_EXEC_ASK " ) :
# Messaging context - return approval_required so the gateway can
# prompt the user interactively instead of just blocking
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
session_key = os . getenv ( " HERMES_SESSION_KEY " , " default " )
_approval . submit_pending ( session_key , {
2026-02-12 10:05:08 -08:00
" command " : command ,
" pattern_key " : pattern_key ,
" description " : description ,
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
} )
2026-02-02 23:35:18 -08:00
return {
" approved " : False ,
" pattern_key " : pattern_key ,
2026-02-12 10:05:08 -08:00
" status " : " approval_required " ,
" command " : command ,
" description " : description ,
" message " : f " ⚠️ This command is potentially dangerous ( { description } ). Asking the user for approval... "
2026-02-02 23:35:18 -08:00
}
# CLI context - prompt user
choice = _prompt_dangerous_approval ( command , description )
if choice == " deny " :
2026-02-02 23:46:41 -08:00
return { " approved " : False , " message " : " BLOCKED: User denied this potentially dangerous command. Do NOT retry this command - the user has explicitly rejected it. " }
2026-02-02 23:35:18 -08:00
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
session_key = os . getenv ( " HERMES_SESSION_KEY " , " default " )
2026-02-02 23:35:18 -08:00
if choice == " session " :
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
_approval . approve_session ( session_key , pattern_key )
2026-02-02 23:35:18 -08:00
elif choice == " always " :
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
_approval . approve_session ( session_key , pattern_key )
_approval . approve_permanent ( pattern_key )
_save_permanent_allowlist ( _load_permanent_allowlist ( ) | { pattern_key } )
2026-02-02 23:35:18 -08:00
return { " approved " : True , " message " : None }
def _handle_sudo_failure ( output : str , env_type : str ) - > str :
"""
Check for sudo failure and add helpful message for messaging contexts .
Returns enhanced output if sudo failed in messaging context , else original .
"""
is_gateway = os . getenv ( " HERMES_GATEWAY_SESSION " )
if not is_gateway :
return output
# Check for sudo failure indicators
sudo_failures = [
" sudo: a password is required " ,
" sudo: no tty present " ,
" sudo: a terminal is required " ,
]
for failure in sudo_failures :
if failure in output :
return output + " \n \n 💡 Tip: To enable sudo over messaging, add SUDO_PASSWORD to ~/.hermes/.env on the agent machine. "
return output
2026-02-01 15:36:26 -08:00
def _prompt_for_sudo_password ( timeout_seconds : int = 45 ) - > str :
"""
Prompt user for sudo password with timeout .
Returns the password if entered , or empty string if :
- User presses Enter without input ( skip )
- Timeout expires ( 45 s default )
- Any error occurs
Only works in interactive mode ( HERMES_INTERACTIVE = 1 ) .
2026-02-21 12:15:40 -08:00
If a _sudo_password_callback is registered ( by the CLI ) , delegates to it
so the prompt integrates with prompt_toolkit ' s UI. Otherwise reads
directly from / dev / tty with echo disabled .
2026-02-01 15:36:26 -08:00
"""
import sys
import time as time_module
2026-02-21 12:15:40 -08:00
# Use the registered callback when available (prompt_toolkit-compatible)
if _sudo_password_callback is not None :
try :
return _sudo_password_callback ( ) or " "
except Exception :
return " "
2026-02-01 15:36:26 -08:00
result = { " password " : None , " done " : False }
2026-02-20 21:26:31 -08:00
def read_password_thread ( ) :
""" Read password from /dev/tty with echo disabled. """
tty_fd = None
old_attrs = None
2026-02-01 15:36:26 -08:00
try :
2026-02-20 21:26:31 -08:00
import termios
tty_fd = os . open ( " /dev/tty " , os . O_RDONLY )
old_attrs = termios . tcgetattr ( tty_fd )
new_attrs = termios . tcgetattr ( tty_fd )
new_attrs [ 3 ] = new_attrs [ 3 ] & ~ termios . ECHO
termios . tcsetattr ( tty_fd , termios . TCSAFLUSH , new_attrs )
chars = [ ]
while True :
b = os . read ( tty_fd , 1 )
if not b or b in ( b " \n " , b " \r " ) :
break
chars . append ( b )
result [ " password " ] = b " " . join ( chars ) . decode ( " utf-8 " , errors = " replace " )
except ( EOFError , KeyboardInterrupt , OSError ) :
2026-02-01 15:36:26 -08:00
result [ " password " ] = " "
except Exception :
result [ " password " ] = " "
finally :
2026-02-20 21:26:31 -08:00
if tty_fd is not None and old_attrs is not None :
try :
import termios as _termios
_termios . tcsetattr ( tty_fd , _termios . TCSAFLUSH , old_attrs )
except Exception :
pass
if tty_fd is not None :
try :
os . close ( tty_fd )
except Exception :
pass
2026-02-01 15:36:26 -08:00
result [ " done " ] = True
try :
os . environ [ " HERMES_SPINNER_PAUSE " ] = " 1 "
2026-02-20 21:26:31 -08:00
time_module . sleep ( 0.2 )
2026-02-01 15:36:26 -08:00
2026-02-20 21:26:31 -08:00
print ( )
2026-02-01 15:36:26 -08:00
print ( " ┌ " + " ─ " * 58 + " ┐ " )
print ( " │ 🔐 SUDO PASSWORD REQUIRED " + " " * 30 + " │ " )
print ( " ├ " + " ─ " * 58 + " ┤ " )
print ( " │ Enter password below (input is hidden), or: │ " )
print ( " │ • Press Enter to skip (command fails gracefully) │ " )
print ( f " │ • Wait { timeout_seconds } s to auto-skip " + " " * 27 + " │ " )
print ( " └ " + " ─ " * 58 + " ┘ " )
print ( )
2026-02-20 21:26:31 -08:00
print ( " Password (hidden): " , end = " " , flush = True )
2026-02-01 15:36:26 -08:00
2026-02-20 21:26:31 -08:00
password_thread = threading . Thread ( target = read_password_thread , daemon = True )
2026-02-01 15:36:26 -08:00
password_thread . start ( )
password_thread . join ( timeout = timeout_seconds )
if result [ " done " ] :
password = result [ " password " ] or " "
2026-02-20 21:26:31 -08:00
print ( ) # newline after hidden input
2026-02-01 15:36:26 -08:00
if password :
print ( " ✓ Password received (cached for this session) " )
else :
print ( " ⏭ Skipped - continuing without sudo " )
print ( )
sys . stdout . flush ( )
return password
else :
print ( " \n ⏱ Timeout - continuing without sudo " )
2026-02-20 21:26:31 -08:00
print ( " (Press Enter to dismiss) " )
2026-02-01 15:36:26 -08:00
print ( )
sys . stdout . flush ( )
return " "
except ( EOFError , KeyboardInterrupt ) :
print ( )
print ( " ⏭ Cancelled - continuing without sudo " )
print ( )
sys . stdout . flush ( )
return " "
except Exception as e :
print ( f " \n [sudo prompt error: { e } ] - continuing without sudo \n " )
sys . stdout . flush ( )
return " "
finally :
if " HERMES_SPINNER_PAUSE " in os . environ :
del os . environ [ " HERMES_SPINNER_PAUSE " ]
2026-02-01 10:02:34 -08:00
def _transform_sudo_command ( command : str ) - > str :
"""
Transform sudo commands to use - S flag if SUDO_PASSWORD is available .
This is a shared helper used by all execution environments to provide
consistent sudo handling across local , SSH , and container environments .
2026-02-01 15:36:26 -08:00
If SUDO_PASSWORD is set ( via env , config , or interactive prompt ) :
2026-02-01 10:02:34 -08:00
' sudo apt install curl ' - > password piped via sudo - S
2026-02-01 15:36:26 -08:00
If SUDO_PASSWORD is not set and in interactive mode ( HERMES_INTERACTIVE = 1 ) :
Prompts user for password with 45 s timeout , caches for session .
If SUDO_PASSWORD is not set and NOT interactive :
Command runs as - is ( fails gracefully with " sudo: a password is required " ) .
2026-02-01 10:02:34 -08:00
"""
2026-02-01 15:36:26 -08:00
global _cached_sudo_password
import re
# Check if command even contains sudo
if not re . search ( r ' \ bsudo \ b ' , command ) :
return command # No sudo in command, return as-is
# Try to get password from: env var -> session cache -> interactive prompt
sudo_password = os . getenv ( " SUDO_PASSWORD " , " " ) or _cached_sudo_password
2026-02-01 10:02:34 -08:00
if not sudo_password :
2026-02-01 15:36:26 -08:00
# No password configured - check if we're in interactive mode
if os . getenv ( " HERMES_INTERACTIVE " ) :
# Prompt user for password
sudo_password = _prompt_for_sudo_password ( timeout_seconds = 45 )
if sudo_password :
_cached_sudo_password = sudo_password # Cache for session
2026-02-01 10:02:34 -08:00
2026-02-01 15:36:26 -08:00
if not sudo_password :
return command # No password, let it fail gracefully
2026-02-01 10:02:34 -08:00
def replace_sudo ( match ) :
# Replace 'sudo' with password-piped version
# The -S flag makes sudo read password from stdin
# The -p '' suppresses the password prompt
return f " echo ' { sudo_password } ' | sudo -S -p ' ' "
# Match 'sudo' at word boundaries (not 'visudo' or 'sudoers')
# This handles: sudo, sudo -flag, etc.
return re . sub ( r ' \ bsudo \ b ' , replace_sudo , command )
class _LocalEnvironment :
"""
Local execution environment with sudo support and non - blocking stdin .
Features :
- Uses stdin = DEVNULL to prevent hanging on interactive prompts ( sudo , etc . )
- Optional SUDO_PASSWORD support : if set , transforms ` sudo ` commands to use ` sudo - S `
- Graceful failure : sudo commands fail fast with clear error if no password configured
Environment variables :
- SUDO_PASSWORD : If set , enables sudo commands by piping password via ` sudo - S `
"""
def __init__ ( self , cwd : str = " " , timeout : int = 60 , env : dict = None ) :
self . cwd = cwd or os . getcwd ( )
self . timeout = timeout
self . env = env or { }
2026-02-19 14:50:51 -08:00
def execute ( self , command : str , cwd : str = " " , * , timeout : int | None = None ,
stdin_data : str | None = None ) - > dict :
2026-02-10 16:34:27 -08:00
"""
Execute a command locally with sudo support .
Uses Popen + polling so the global interrupt event can kill the
process early when the user sends a new message , instead of
blocking for the full timeout .
2026-02-19 09:24:04 -08:00
A background reader thread drains stdout continuously to prevent
pipe buffer deadlocks . Without this , commands producing > 64 KB of
output would block ( Linux pipe buffer = 64 KB ) while the poll loop
waits for the process to finish — a classic deadlock .
2026-02-19 14:50:51 -08:00
Args :
stdin_data : If provided , piped to the process ' s stdin. This
bypasses shell ARG_MAX limits for large content .
2026-02-10 16:34:27 -08:00
"""
2026-02-01 10:02:34 -08:00
work_dir = cwd or self . cwd or os . getcwd ( )
effective_timeout = timeout or self . timeout
# Transform sudo commands if SUDO_PASSWORD is available
exec_command = _transform_sudo_command ( command )
try :
2026-02-10 16:34:27 -08:00
proc = subprocess . Popen (
2026-02-01 10:02:34 -08:00
exec_command ,
shell = True ,
text = True ,
cwd = work_dir ,
env = os . environ | self . env ,
encoding = " utf-8 " ,
errors = " replace " ,
stdout = subprocess . PIPE ,
stderr = subprocess . STDOUT ,
2026-02-19 14:50:51 -08:00
stdin = subprocess . PIPE if stdin_data is not None else subprocess . DEVNULL ,
2026-02-10 16:34:27 -08:00
# Start in a new process group so we can kill the whole tree
preexec_fn = os . setsid ,
2026-02-01 10:02:34 -08:00
)
2026-02-10 16:34:27 -08:00
2026-02-19 14:50:51 -08:00
# Pipe stdin_data in a background thread to avoid deadlock
# (large writes can block if the pipe buffer fills before the
# process drains it).
if stdin_data is not None :
def _write_stdin ( ) :
try :
proc . stdin . write ( stdin_data )
proc . stdin . close ( )
except ( BrokenPipeError , OSError ) :
pass
stdin_writer = threading . Thread ( target = _write_stdin , daemon = True )
stdin_writer . start ( )
2026-02-19 09:24:04 -08:00
# Drain stdout in a background thread to prevent pipe buffer
# deadlocks. The OS pipe buffer is 64KB on Linux; if the child
# writes more than that before anyone reads, it blocks forever.
_output_chunks : list [ str ] = [ ]
def _drain_stdout ( ) :
try :
for line in proc . stdout :
_output_chunks . append ( line )
except ValueError :
pass # stdout closed during interrupt/timeout
finally :
try :
proc . stdout . close ( )
except Exception :
pass
reader = threading . Thread ( target = _drain_stdout , daemon = True )
reader . start ( )
2026-02-10 16:34:27 -08:00
deadline = time . monotonic ( ) + effective_timeout
# Poll every 200ms so we notice interrupts quickly
while proc . poll ( ) is None :
if _interrupt_event . is_set ( ) :
# User sent a new message — kill the process tree and return
# what we have so far
try :
os . killpg ( os . getpgid ( proc . pid ) , signal . SIGTERM )
except ( ProcessLookupError , PermissionError ) :
proc . kill ( )
2026-02-19 09:24:04 -08:00
reader . join ( timeout = 2 )
output = " " . join ( _output_chunks )
2026-02-10 16:34:27 -08:00
return {
" output " : output + " \n [Command interrupted — user sent a new message] " ,
" returncode " : 130 # Standard interrupted exit code
}
if time . monotonic ( ) > deadline :
# Timeout — kill process tree
try :
os . killpg ( os . getpgid ( proc . pid ) , signal . SIGTERM )
except ( ProcessLookupError , PermissionError ) :
proc . kill ( )
2026-02-19 09:24:04 -08:00
reader . join ( timeout = 2 )
2026-02-10 16:34:27 -08:00
return { " output " : f " Command timed out after { effective_timeout } s " , " returncode " : 124 }
# Short sleep to avoid busy-waiting
time . sleep ( 0.2 )
2026-02-19 09:24:04 -08:00
# Process finished — wait for reader to drain remaining output
reader . join ( timeout = 5 )
return { " output " : " " . join ( _output_chunks ) , " returncode " : proc . returncode }
2026-02-10 16:34:27 -08:00
2026-02-01 10:02:34 -08:00
except Exception as e :
return { " output " : f " Execution error: { str ( e ) } " , " returncode " : 1 }
def cleanup ( self ) :
""" No cleanup needed for local environment. """
pass
def stop ( self ) :
""" Alias for cleanup. """
pass
2026-01-29 06:10:24 +00:00
class _SingularityEnvironment :
"""
2026-02-10 06:49:58 +00:00
Persistent Singularity / Apptainer container environment .
2026-01-29 06:10:24 +00:00
2026-02-10 06:49:58 +00:00
Uses ` apptainer instance ` to create a long - running container that persists
state ( files , installs , env changes ) across all commands within a task .
The model experiences this as a real Linux VM .
Features :
- Persistent filesystem : files created in one command are visible in the next
- Package installs persist : pip / apt installs survive across tool calls
- Full isolation : - - containall gives PID , IPC , and environment isolation
- Writable tmpfs overlay : full root filesystem is writable ( RAM - backed )
- Automatic SIF caching : docker : / / images converted to SIF once , reused forever
2026-01-29 06:10:24 +00:00
"""
2026-02-10 06:49:58 +00:00
def __init__ ( self , image : str , cwd : str = " /root " , timeout : int = 60 ) :
2026-01-29 06:10:24 +00:00
self . cwd = cwd
self . timeout = timeout
# Use apptainer if available, otherwise singularity
self . executable = " apptainer " if shutil . which ( " apptainer " ) else " singularity "
2026-01-29 22:47:11 +00:00
# Get or build SIF from docker:// URL (fast if already cached)
self . image = _get_or_build_sif ( image , self . executable )
2026-02-10 06:49:58 +00:00
# Create unique instance name (must be alphanumeric + underscores)
self . instance_id = f " hermes_ { uuid . uuid4 ( ) . hex [ : 12 ] } "
self . _instance_started = False
2026-01-29 06:10:24 +00:00
2026-02-10 06:49:58 +00:00
# Start the persistent instance
self . _start_instance ( )
def _start_instance ( self ) :
""" Start a persistent apptainer instance.
2026-01-29 06:10:24 +00:00
2026-02-10 06:49:58 +00:00
The instance runs as a background process . All subsequent execute ( ) calls
run commands inside this same instance , so state persists across calls .
"""
cmd = [
self . executable , " instance " , " start " ,
" --writable-tmpfs " , # RAM-backed writable overlay on read-only SIF
" --containall " , # Full isolation: PID, IPC, environment, filesystem
str ( self . image ) ,
self . instance_id ,
]
2026-01-29 06:10:24 +00:00
2026-02-10 06:49:58 +00:00
try :
result = subprocess . run (
cmd ,
capture_output = True ,
text = True ,
timeout = 120 , # 2 min for instance startup
)
if result . returncode != 0 :
raise RuntimeError ( f " Failed to start instance: { result . stderr } " )
self . _instance_started = True
2026-02-21 03:11:11 -08:00
logger . info ( " Singularity instance %s started (persistent container) " , self . instance_id )
2026-02-10 06:49:58 +00:00
except subprocess . TimeoutExpired :
raise RuntimeError ( " Instance start timed out " )
2026-01-29 06:10:24 +00:00
2026-02-19 14:50:51 -08:00
def execute ( self , command : str , cwd : str = " " , * , timeout : int | None = None ,
stdin_data : str | None = None ) - > dict :
2026-02-10 06:49:58 +00:00
""" Execute a command in the persistent Singularity instance.
2026-01-29 06:10:24 +00:00
2026-02-10 06:49:58 +00:00
All commands run in the same container , so files , installs , and
environment changes persist between calls .
"""
if not self . _instance_started :
return { " output " : " Instance not started " , " returncode " : - 1 }
2026-01-29 06:10:24 +00:00
2026-02-10 06:49:58 +00:00
cmd = [ self . executable , " exec " ]
2026-01-29 06:10:24 +00:00
# Set working directory
work_dir = cwd or self . cwd
cmd . extend ( [ " --pwd " , work_dir ] )
2026-02-10 06:49:58 +00:00
# Connect to the running instance
cmd . append ( f " instance:// { self . instance_id } " )
2026-01-29 06:10:24 +00:00
2026-02-01 10:02:34 -08:00
# Transform sudo commands if SUDO_PASSWORD is available
exec_command = _transform_sudo_command ( command )
2026-01-29 06:10:24 +00:00
# Execute the command
2026-02-01 10:02:34 -08:00
cmd . extend ( [ " bash " , " -c " , exec_command ] )
2026-01-29 06:10:24 +00:00
2026-02-19 14:50:51 -08:00
run_kwargs = {
" text " : True ,
" timeout " : timeout or self . timeout ,
" encoding " : " utf-8 " ,
" errors " : " replace " ,
" stdout " : subprocess . PIPE ,
" stderr " : subprocess . STDOUT ,
}
if stdin_data is not None :
run_kwargs [ " input " ] = stdin_data
else :
run_kwargs [ " stdin " ] = subprocess . DEVNULL
2026-01-29 06:10:24 +00:00
try :
2026-02-19 14:50:51 -08:00
result = subprocess . run ( cmd , * * run_kwargs )
2026-01-29 06:10:24 +00:00
return { " output " : result . stdout , " returncode " : result . returncode }
except subprocess . TimeoutExpired :
return { " output " : f " Command timed out after { timeout or self . timeout } s " , " returncode " : 124 }
def cleanup ( self ) :
2026-02-10 06:49:58 +00:00
""" Stop the persistent instance and clean up. """
if self . _instance_started :
try :
subprocess . run (
[ self . executable , " instance " , " stop " , self . instance_id ] ,
capture_output = True ,
text = True ,
timeout = 30 ,
)
2026-02-21 03:11:11 -08:00
logger . info ( " Singularity instance %s stopped " , self . instance_id )
2026-02-10 06:49:58 +00:00
except Exception as e :
2026-02-21 03:11:11 -08:00
logger . warning ( " Failed to stop Singularity instance %s : %s " , self . instance_id , e )
2026-02-10 06:49:58 +00:00
self . _instance_started = False
2026-01-29 06:10:24 +00:00
def stop ( self ) :
""" Alias for cleanup. """
self . cleanup ( )
def __del__ ( self ) :
""" Cleanup on destruction. """
2026-02-10 06:49:58 +00:00
try :
self . cleanup ( )
2026-02-20 23:23:32 -08:00
except Exception :
2026-02-10 06:49:58 +00:00
pass
2026-01-29 06:10:24 +00:00
2026-01-31 06:30:48 +00:00
class _SSHEnvironment :
"""
SSH - based remote execution environment .
Runs commands on a remote machine over SSH , keeping the agent code
completely isolated from the execution environment . Uses SSH ControlMaster
for connection persistence ( faster subsequent commands ) .
Security benefits :
- Agent cannot modify its own code
- Remote machine acts as a sandbox
- Clear separation between agent and execution environment
"""
def __init__ ( self , host : str , user : str , cwd : str = " /tmp " , timeout : int = 60 ,
port : int = 22 , key_path : str = " " ) :
self . host = host
self . user = user
self . cwd = cwd
self . timeout = timeout
self . port = port
self . key_path = key_path
# Create control socket directory for connection persistence
self . control_dir = Path ( tempfile . gettempdir ( ) ) / " hermes-ssh "
self . control_dir . mkdir ( parents = True , exist_ok = True )
self . control_socket = self . control_dir / f " { user } @ { host } : { port } .sock "
# Test connection and establish ControlMaster
self . _establish_connection ( )
def _build_ssh_command ( self , extra_args : list = None ) - > list :
""" Build base SSH command with connection options. """
cmd = [ " ssh " ]
# Connection multiplexing for performance
cmd . extend ( [ " -o " , f " ControlPath= { self . control_socket } " ] )
cmd . extend ( [ " -o " , " ControlMaster=auto " ] )
cmd . extend ( [ " -o " , " ControlPersist=300 " ] ) # Keep connection alive for 5 min
# Standard options
cmd . extend ( [ " -o " , " BatchMode=yes " ] ) # No password prompts
cmd . extend ( [ " -o " , " StrictHostKeyChecking=accept-new " ] ) # Accept new hosts
cmd . extend ( [ " -o " , " ConnectTimeout=10 " ] )
# Port
if self . port != 22 :
cmd . extend ( [ " -p " , str ( self . port ) ] )
# Private key
if self . key_path :
cmd . extend ( [ " -i " , self . key_path ] )
# Extra args (like -t for TTY)
if extra_args :
cmd . extend ( extra_args )
# Target
cmd . append ( f " { self . user } @ { self . host } " )
return cmd
def _establish_connection ( self ) :
""" Test SSH connection and establish ControlMaster. """
cmd = self . _build_ssh_command ( )
cmd . append ( " echo ' SSH connection established ' " )
try :
result = subprocess . run (
cmd ,
capture_output = True ,
text = True ,
timeout = 15
)
if result . returncode != 0 :
error_msg = result . stderr . strip ( ) or result . stdout . strip ( )
raise RuntimeError ( f " SSH connection failed: { error_msg } " )
except subprocess . TimeoutExpired :
raise RuntimeError ( f " SSH connection to { self . user } @ { self . host } timed out " )
2026-02-19 14:50:51 -08:00
def execute ( self , command : str , cwd : str = " " , * , timeout : int | None = None ,
stdin_data : str | None = None ) - > dict :
2026-01-31 06:30:48 +00:00
""" Execute a command on the remote host via SSH. """
work_dir = cwd or self . cwd
effective_timeout = timeout or self . timeout
2026-02-01 10:02:34 -08:00
# Transform sudo commands if SUDO_PASSWORD is available
exec_command = _transform_sudo_command ( command )
2026-01-31 06:30:48 +00:00
# Wrap command to run in the correct directory
2026-02-01 10:02:34 -08:00
wrapped_command = f ' cd { work_dir } && { exec_command } '
2026-01-31 06:30:48 +00:00
cmd = self . _build_ssh_command ( )
cmd . extend ( [ " bash " , " -c " , wrapped_command ] )
2026-02-19 14:50:51 -08:00
run_kwargs = {
" text " : True ,
" timeout " : effective_timeout ,
" encoding " : " utf-8 " ,
" errors " : " replace " ,
" stdout " : subprocess . PIPE ,
" stderr " : subprocess . STDOUT ,
}
if stdin_data is not None :
run_kwargs [ " input " ] = stdin_data
else :
run_kwargs [ " stdin " ] = subprocess . DEVNULL
2026-01-31 06:30:48 +00:00
try :
2026-02-19 14:50:51 -08:00
result = subprocess . run ( cmd , * * run_kwargs )
2026-01-31 06:30:48 +00:00
return { " output " : result . stdout , " returncode " : result . returncode }
except subprocess . TimeoutExpired :
return { " output " : f " Command timed out after { effective_timeout } s " , " returncode " : 124 }
except Exception as e :
return { " output " : f " SSH execution error: { str ( e ) } " , " returncode " : 1 }
def cleanup ( self ) :
""" Close the SSH ControlMaster connection. """
if self . control_socket . exists ( ) :
try :
# Send exit command to ControlMaster
cmd = [ " ssh " , " -o " , f " ControlPath= { self . control_socket } " , " -O " , " exit " ,
f " { self . user } @ { self . host } " ]
subprocess . run ( cmd , capture_output = True , timeout = 5 )
2026-02-20 23:23:32 -08:00
except ( OSError , subprocess . SubprocessError ) :
2026-01-31 06:30:48 +00:00
pass
# Remove socket file
try :
self . control_socket . unlink ( )
2026-02-20 23:23:32 -08:00
except OSError :
2026-01-31 06:30:48 +00:00
pass
def stop ( self ) :
""" Alias for cleanup. """
self . cleanup ( )
def __del__ ( self ) :
""" Cleanup on destruction. """
try :
self . cleanup ( )
2026-02-20 23:23:32 -08:00
except Exception :
2026-01-31 06:30:48 +00:00
pass
2026-02-01 10:02:34 -08:00
class _DockerEnvironment :
"""
Docker execution environment wrapper with sudo support and non - blocking stdin .
Wraps mini - swe - agent ' s DockerEnvironment but adds:
- stdin = DEVNULL to prevent hanging on interactive prompts
- SUDO_PASSWORD support via _transform_sudo_command
"""
def __init__ ( self , image : str , cwd : str = " / " , timeout : int = 60 ) :
from minisweagent . environments . docker import DockerEnvironment
self . _inner = DockerEnvironment ( image = image , cwd = cwd , timeout = timeout )
self . cwd = cwd
self . timeout = timeout
2026-02-19 14:50:51 -08:00
def execute ( self , command : str , cwd : str = " " , * , timeout : int | None = None ,
stdin_data : str | None = None ) - > dict :
2026-02-01 10:02:34 -08:00
""" Execute a command in the Docker container with sudo support. """
# Transform sudo commands if SUDO_PASSWORD is available
exec_command = _transform_sudo_command ( command )
work_dir = cwd or self . cwd
effective_timeout = timeout or self . timeout
# Get container_id from inner environment
assert self . _inner . container_id , " Container not started "
2026-02-19 14:50:51 -08:00
cmd = [ self . _inner . config . executable , " exec " ]
if stdin_data is not None :
cmd . append ( " -i " ) # Enable stdin piping into the container
cmd . extend ( [ " -w " , work_dir ] )
2026-02-01 10:02:34 -08:00
for key in self . _inner . config . forward_env :
if ( value := os . getenv ( key ) ) is not None :
cmd . extend ( [ " -e " , f " { key } = { value } " ] )
for key , value in self . _inner . config . env . items ( ) :
cmd . extend ( [ " -e " , f " { key } = { value } " ] )
cmd . extend ( [ self . _inner . container_id , " bash " , " -lc " , exec_command ] )
2026-02-19 14:50:51 -08:00
run_kwargs = {
" text " : True ,
" timeout " : effective_timeout ,
" encoding " : " utf-8 " ,
" errors " : " replace " ,
" stdout " : subprocess . PIPE ,
" stderr " : subprocess . STDOUT ,
}
if stdin_data is not None :
run_kwargs [ " input " ] = stdin_data
else :
run_kwargs [ " stdin " ] = subprocess . DEVNULL
2026-02-01 10:02:34 -08:00
try :
2026-02-19 14:50:51 -08:00
result = subprocess . run ( cmd , * * run_kwargs )
2026-02-01 10:02:34 -08:00
return { " output " : result . stdout , " returncode " : result . returncode }
except subprocess . TimeoutExpired :
return { " output " : f " Command timed out after { effective_timeout } s " , " returncode " : 124 }
def cleanup ( self ) :
""" Cleanup the Docker container. """
self . _inner . cleanup ( )
def stop ( self ) :
""" Alias for cleanup. """
self . cleanup ( )
def __del__ ( self ) :
""" Cleanup on destruction. """
try :
self . cleanup ( )
2026-02-20 23:23:32 -08:00
except Exception :
2026-02-01 10:02:34 -08:00
pass
class _ModalEnvironment :
"""
Modal cloud execution environment wrapper with sudo support .
Wraps mini - swe - agent ' s SwerexModalEnvironment but adds:
- SUDO_PASSWORD support via _transform_sudo_command
2026-02-10 19:39:05 +00:00
- Automatic async - safety patches ( applied once , before first use )
2026-02-01 10:02:34 -08:00
2026-02-10 19:39:05 +00:00
The patches replace SwerexModalEnvironment ' s asyncio.run() calls with a
background thread approach , making it safe to use inside any event loop
( e . g . , Atropos ) . Applied here at the point of use rather than relying on
import - time side effects , so ALL callers get the fix automatically .
2026-02-01 10:02:34 -08:00
"""
2026-02-10 19:39:05 +00:00
# Class-level flag: patches only need to be applied once
_patches_applied = False
2026-02-08 12:56:40 -08:00
def __init__ ( self , image : str , cwd : str = " /root " , timeout : int = 60 ) :
2026-02-10 19:39:05 +00:00
# Ensure async-safety patches are applied before creating any
# SwerexModalEnvironment instance. This is the single authoritative
# place -- no other module needs to call apply_patches() for Modal.
if not _ModalEnvironment . _patches_applied :
try :
from environments . patches import apply_patches
apply_patches ( )
except ImportError :
pass # patches module not available (standalone use)
_ModalEnvironment . _patches_applied = True
2026-02-01 10:02:34 -08:00
from minisweagent . environments . extra . swerex_modal import SwerexModalEnvironment
2026-02-10 19:39:05 +00:00
# Generous startup timeout: sandbox creation can take 30-60s for cold images,
# and the SWE-ReX runtime needs another 10-30s to boot inside it.
self . _inner = SwerexModalEnvironment (
image = image , cwd = cwd , timeout = timeout ,
startup_timeout = 180.0 ,
runtime_timeout = 3600.0 ,
)
2026-02-01 10:02:34 -08:00
self . cwd = cwd
self . timeout = timeout
2026-02-19 14:50:51 -08:00
def execute ( self , command : str , cwd : str = " " , * , timeout : int | None = None ,
stdin_data : str | None = None ) - > dict :
""" Execute a command in Modal with sudo support.
Modal uses HTTP transport ( no execve ) , so there ' s no ARG_MAX limit.
When stdin_data is provided , we embed it as a heredoc since there ' s
no process - level stdin pipe to the cloud sandbox .
"""
if stdin_data is not None :
marker = f " HERMES_EOF_ { uuid . uuid4 ( ) . hex [ : 8 ] } "
while marker in stdin_data :
marker = f " HERMES_EOF_ { uuid . uuid4 ( ) . hex [ : 8 ] } "
command = f " { command } << ' { marker } ' \n { stdin_data } \n { marker } "
2026-02-01 10:02:34 -08:00
# Transform sudo commands if SUDO_PASSWORD is available
exec_command = _transform_sudo_command ( command )
# Delegate to inner environment with transformed command
return self . _inner . execute ( exec_command , cwd = cwd , timeout = timeout )
def cleanup ( self ) :
""" Cleanup the Modal deployment. """
if hasattr ( self . _inner , ' stop ' ) :
self . _inner . stop ( )
def stop ( self ) :
""" Stop the Modal deployment. """
self . cleanup ( )
def __del__ ( self ) :
""" Cleanup on destruction. """
try :
self . cleanup ( )
2026-02-20 23:23:32 -08:00
except Exception :
2026-02-01 10:02:34 -08:00
pass
2026-01-23 12:26:53 +00:00
# Tool description for LLM
2026-02-21 02:41:30 -08:00
TERMINAL_TOOL_DESCRIPTION = """ Execute commands on a Linux environment. Filesystem persists between calls.
Background processes : Set background = true to get a session_id , then use the ' process ' tool to poll / wait / kill / write .
Working directory : Use ' workdir ' for per - command cwd .
PTY mode : Set pty = true for interactive CLI tools ( Codex , Claude Code , Python REPL ) .
Do NOT use vim / nano / interactive tools without pty = true — they hang without a pseudo - terminal . Pipe git output to cat if it might page .
2026-01-23 12:26:53 +00:00
"""
# Global state for environment lifecycle management
_active_environments : Dict [ str , Any ] = { }
2026-01-29 06:10:24 +00:00
_task_workdirs : Dict [ str , str ] = { } # Maps task_id to working directory
2026-01-23 12:26:53 +00:00
_last_activity : Dict [ str , float ] = { }
_env_lock = threading . Lock ( )
2026-02-12 05:37:14 +00:00
_creation_locks : Dict [ str , threading . Lock ] = { } # Per-task locks for sandbox creation
_creation_locks_lock = threading . Lock ( ) # Protects _creation_locks dict itself
2025-11-04 03:32:43 -05:00
_cleanup_thread = None
_cleanup_running = False
2026-02-10 06:49:58 +00:00
# Per-task environment overrides registry.
# Allows environments (e.g., TerminalBench2Env) to specify a custom Docker/Modal
# image for a specific task_id BEFORE the agent loop starts. When the terminal or
# file tools create a new sandbox for that task_id, they check this registry first
# and fall back to the TERMINAL_MODAL_IMAGE (etc.) env var if no override is set.
#
# This is never exposed to the model -- only infrastructure code calls it.
# Thread-safe because each task_id is unique per rollout.
_task_env_overrides : Dict [ str , Dict [ str , Any ] ] = { }
def register_task_env_overrides ( task_id : str , overrides : Dict [ str , Any ] ) :
"""
Register environment overrides for a specific task / rollout .
Called by Atropos environments before the agent loop to configure
per - task sandbox settings ( e . g . , a custom Dockerfile for the Modal image ) .
Supported override keys :
- modal_image : str - - Path to Dockerfile or Docker Hub image name
- docker_image : str - - Docker image name
- cwd : str - - Working directory inside the sandbox
Args :
task_id : The rollout ' s unique task identifier
overrides : Dict of config keys to override
"""
_task_env_overrides [ task_id ] = overrides
def clear_task_env_overrides ( task_id : str ) :
"""
Clear environment overrides for a task after rollout completes .
Called during cleanup to avoid stale entries accumulating .
"""
_task_env_overrides . pop ( task_id , None )
2026-01-23 12:26:53 +00:00
# Configuration from environment variables
def _get_env_config ( ) - > Dict [ str , Any ] :
""" Get terminal environment configuration from environment variables. """
2026-02-02 19:13:41 -08:00
# Default image with Python and Node.js for maximum compatibility
default_image = " nikolaik/python-nodejs:python3.11-nodejs20 "
2026-02-08 12:56:40 -08:00
env_type = os . getenv ( " TERMINAL_ENV " , " local " )
# Default cwd depends on backend:
Fix host CWD leaking into non-local terminal backends
When using Modal, Docker, SSH, or Singularity as the terminal backend
from the CLI, the agent resolved cwd: "." to the host machine's local
path (e.g. /Users/rewbs/code/hermes-agent) and passed it to the remote
sandbox, where it doesn't exist. All commands failed with "No such file
or directory".
Root cause: cli.py unconditionally resolved "." to os.getcwd() and wrote
it to TERMINAL_CWD regardless of backend type. Every tool then used that
host-local path as the working directory inside the remote environment.
Fixes:
- cli.py: only resolve "." to os.getcwd() for the local backend. For all
remote backends (ssh, docker, modal, singularity), leave TERMINAL_CWD
unset so the tool layer uses per-backend defaults (/root, /, ~, etc.)
- terminal_tool.py: added sanity check -- if TERMINAL_CWD contains a
host-local prefix (/Users/, /home/, C:\) for a non-local backend, log
a warning and fall back to the backend's default
- terminal_tool.py: SSH default CWD is now ~ instead of os.getcwd()
- file_operations.py: last-resort CWD fallback changed from os.getcwd()
to "/" so host paths never leak into remote file operations
2026-02-16 22:30:04 -08:00
# - local: host's current working directory
# - ssh: remote user's home (agent code is local, execution is remote)
# - docker: / inside the container
# - singularity/modal: /root (ephemeral cloud/container)
2026-02-10 06:49:58 +00:00
if env_type in ( " modal " , " singularity " ) :
2026-02-08 12:56:40 -08:00
default_cwd = " /root "
2026-02-10 06:49:58 +00:00
elif env_type == " docker " :
default_cwd = " / "
Fix host CWD leaking into non-local terminal backends
When using Modal, Docker, SSH, or Singularity as the terminal backend
from the CLI, the agent resolved cwd: "." to the host machine's local
path (e.g. /Users/rewbs/code/hermes-agent) and passed it to the remote
sandbox, where it doesn't exist. All commands failed with "No such file
or directory".
Root cause: cli.py unconditionally resolved "." to os.getcwd() and wrote
it to TERMINAL_CWD regardless of backend type. Every tool then used that
host-local path as the working directory inside the remote environment.
Fixes:
- cli.py: only resolve "." to os.getcwd() for the local backend. For all
remote backends (ssh, docker, modal, singularity), leave TERMINAL_CWD
unset so the tool layer uses per-backend defaults (/root, /, ~, etc.)
- terminal_tool.py: added sanity check -- if TERMINAL_CWD contains a
host-local prefix (/Users/, /home/, C:\) for a non-local backend, log
a warning and fall back to the backend's default
- terminal_tool.py: SSH default CWD is now ~ instead of os.getcwd()
- file_operations.py: last-resort CWD fallback changed from os.getcwd()
to "/" so host paths never leak into remote file operations
2026-02-16 22:30:04 -08:00
elif env_type == " ssh " :
default_cwd = " ~ "
2026-02-08 12:56:40 -08:00
else :
default_cwd = os . getcwd ( )
Fix host CWD leaking into non-local terminal backends
When using Modal, Docker, SSH, or Singularity as the terminal backend
from the CLI, the agent resolved cwd: "." to the host machine's local
path (e.g. /Users/rewbs/code/hermes-agent) and passed it to the remote
sandbox, where it doesn't exist. All commands failed with "No such file
or directory".
Root cause: cli.py unconditionally resolved "." to os.getcwd() and wrote
it to TERMINAL_CWD regardless of backend type. Every tool then used that
host-local path as the working directory inside the remote environment.
Fixes:
- cli.py: only resolve "." to os.getcwd() for the local backend. For all
remote backends (ssh, docker, modal, singularity), leave TERMINAL_CWD
unset so the tool layer uses per-backend defaults (/root, /, ~, etc.)
- terminal_tool.py: added sanity check -- if TERMINAL_CWD contains a
host-local prefix (/Users/, /home/, C:\) for a non-local backend, log
a warning and fall back to the backend's default
- terminal_tool.py: SSH default CWD is now ~ instead of os.getcwd()
- file_operations.py: last-resort CWD fallback changed from os.getcwd()
to "/" so host paths never leak into remote file operations
2026-02-16 22:30:04 -08:00
# Read TERMINAL_CWD but sanity-check it for non-local backends.
# If the CWD looks like a host-local path that can't exist inside a
# container/sandbox, fall back to the backend's own default. This
# catches the case where cli.py (or .env) leaked the host's CWD.
cwd = os . getenv ( " TERMINAL_CWD " , default_cwd )
if env_type in ( " modal " , " docker " , " singularity " , " ssh " ) and cwd :
# Paths containing common host-only prefixes are clearly wrong
# inside a container. Also catch Windows-style paths (C:\...).
host_prefixes = ( " /Users/ " , " /home/ " , " C: \\ " , " C:/ " )
if any ( cwd . startswith ( p ) for p in host_prefixes ) and cwd != default_cwd :
2026-02-21 03:11:11 -08:00
logger . info ( " Ignoring TERMINAL_CWD= %r for %s backend "
" (host path won ' t exist in sandbox). Using %r instead. " ,
cwd , env_type , default_cwd )
Fix host CWD leaking into non-local terminal backends
When using Modal, Docker, SSH, or Singularity as the terminal backend
from the CLI, the agent resolved cwd: "." to the host machine's local
path (e.g. /Users/rewbs/code/hermes-agent) and passed it to the remote
sandbox, where it doesn't exist. All commands failed with "No such file
or directory".
Root cause: cli.py unconditionally resolved "." to os.getcwd() and wrote
it to TERMINAL_CWD regardless of backend type. Every tool then used that
host-local path as the working directory inside the remote environment.
Fixes:
- cli.py: only resolve "." to os.getcwd() for the local backend. For all
remote backends (ssh, docker, modal, singularity), leave TERMINAL_CWD
unset so the tool layer uses per-backend defaults (/root, /, ~, etc.)
- terminal_tool.py: added sanity check -- if TERMINAL_CWD contains a
host-local prefix (/Users/, /home/, C:\) for a non-local backend, log
a warning and fall back to the backend's default
- terminal_tool.py: SSH default CWD is now ~ instead of os.getcwd()
- file_operations.py: last-resort CWD fallback changed from os.getcwd()
to "/" so host paths never leak into remote file operations
2026-02-16 22:30:04 -08:00
cwd = default_cwd
2026-01-23 12:26:53 +00:00
return {
2026-02-08 12:56:40 -08:00
" env_type " : env_type ,
2026-02-02 19:13:41 -08:00
" docker_image " : os . getenv ( " TERMINAL_DOCKER_IMAGE " , default_image ) ,
" singularity_image " : os . getenv ( " TERMINAL_SINGULARITY_IMAGE " , f " docker:// { default_image } " ) ,
" modal_image " : os . getenv ( " TERMINAL_MODAL_IMAGE " , default_image ) ,
Fix host CWD leaking into non-local terminal backends
When using Modal, Docker, SSH, or Singularity as the terminal backend
from the CLI, the agent resolved cwd: "." to the host machine's local
path (e.g. /Users/rewbs/code/hermes-agent) and passed it to the remote
sandbox, where it doesn't exist. All commands failed with "No such file
or directory".
Root cause: cli.py unconditionally resolved "." to os.getcwd() and wrote
it to TERMINAL_CWD regardless of backend type. Every tool then used that
host-local path as the working directory inside the remote environment.
Fixes:
- cli.py: only resolve "." to os.getcwd() for the local backend. For all
remote backends (ssh, docker, modal, singularity), leave TERMINAL_CWD
unset so the tool layer uses per-backend defaults (/root, /, ~, etc.)
- terminal_tool.py: added sanity check -- if TERMINAL_CWD contains a
host-local prefix (/Users/, /home/, C:\) for a non-local backend, log
a warning and fall back to the backend's default
- terminal_tool.py: SSH default CWD is now ~ instead of os.getcwd()
- file_operations.py: last-resort CWD fallback changed from os.getcwd()
to "/" so host paths never leak into remote file operations
2026-02-16 22:30:04 -08:00
" cwd " : cwd ,
2026-01-23 12:26:53 +00:00
" timeout " : int ( os . getenv ( " TERMINAL_TIMEOUT " , " 60 " ) ) ,
" lifetime_seconds " : int ( os . getenv ( " TERMINAL_LIFETIME_SECONDS " , " 300 " ) ) ,
2026-01-31 06:30:48 +00:00
# SSH-specific config
" ssh_host " : os . getenv ( " TERMINAL_SSH_HOST " , " " ) ,
" ssh_user " : os . getenv ( " TERMINAL_SSH_USER " , " " ) ,
" ssh_port " : int ( os . getenv ( " TERMINAL_SSH_PORT " , " 22 " ) ) ,
" ssh_key " : os . getenv ( " TERMINAL_SSH_KEY " , " " ) , # Path to private key (optional, uses ssh-agent if empty)
2026-01-23 12:26:53 +00:00
}
2025-11-04 03:32:43 -05:00
2026-01-23 12:26:53 +00:00
2026-01-31 06:30:48 +00:00
def _create_environment ( env_type : str , image : str , cwd : str , timeout : int , ssh_config : dict = None ) :
2026-01-23 12:26:53 +00:00
"""
Create an execution environment from mini - swe - agent .
2025-11-04 03:32:43 -05:00
Args :
2026-01-31 06:30:48 +00:00
env_type : One of " local " , " docker " , " singularity " , " modal " , " ssh "
image : Docker / Singularity / Modal image name ( ignored for local / ssh )
2026-01-23 12:26:53 +00:00
cwd : Working directory
timeout : Default command timeout
2026-01-31 06:30:48 +00:00
ssh_config : SSH connection config ( for env_type = " ssh " )
2026-01-23 12:26:53 +00:00
Returns :
Environment instance with execute ( ) method
2025-11-04 03:32:43 -05:00
"""
2026-01-23 12:26:53 +00:00
if env_type == " local " :
2026-02-01 10:02:34 -08:00
# Use our custom LocalEnvironment with sudo support and non-blocking stdin
return _LocalEnvironment ( cwd = cwd , timeout = timeout )
2026-01-23 12:26:53 +00:00
elif env_type == " docker " :
2026-02-01 10:02:34 -08:00
# Use custom Docker wrapper with sudo support and non-blocking stdin
return _DockerEnvironment ( image = image , cwd = cwd , timeout = timeout )
2026-01-23 12:26:53 +00:00
2026-01-29 06:10:24 +00:00
elif env_type == " singularity " :
# Use custom Singularity environment with better space management
return _SingularityEnvironment ( image = image , cwd = cwd , timeout = timeout )
2026-01-23 12:26:53 +00:00
elif env_type == " modal " :
2026-02-01 10:02:34 -08:00
# Use custom Modal wrapper with sudo support
return _ModalEnvironment ( image = image , cwd = cwd , timeout = timeout )
2026-01-23 12:26:53 +00:00
2026-01-31 06:30:48 +00:00
elif env_type == " ssh " :
if not ssh_config or not ssh_config . get ( " host " ) or not ssh_config . get ( " user " ) :
raise ValueError ( " SSH environment requires ssh_host and ssh_user to be configured " )
return _SSHEnvironment (
host = ssh_config [ " host " ] ,
user = ssh_config [ " user " ] ,
port = ssh_config . get ( " port " , 22 ) ,
key_path = ssh_config . get ( " key " , " " ) ,
cwd = cwd ,
timeout = timeout
)
2026-01-23 12:26:53 +00:00
else :
2026-01-31 06:30:48 +00:00
raise ValueError ( f " Unknown environment type: { env_type } . Use ' local ' , ' docker ' , ' singularity ' , ' modal ' , or ' ssh ' " )
2026-01-23 12:26:53 +00:00
def _cleanup_inactive_envs ( lifetime_seconds : int = 300 ) :
""" Clean up environments that have been inactive for longer than lifetime_seconds. """
global _active_environments , _last_activity
2025-11-04 03:32:43 -05:00
current_time = time . time ( )
2026-02-16 19:37:40 -08:00
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
# Check the process registry -- skip cleanup for sandboxes with active
# background processes (their _last_activity gets refreshed to keep them alive).
try :
from tools . process_registry import process_registry
for task_id in list ( _last_activity . keys ( ) ) :
if process_registry . has_active_processes ( task_id ) :
_last_activity [ task_id ] = current_time # Keep sandbox alive
except ImportError :
pass
2026-02-16 19:37:40 -08:00
# Phase 1: collect stale entries and remove them from tracking dicts while
# holding the lock. Do NOT call env.cleanup() inside the lock -- Modal and
# Docker teardown can block for 10-15s, which would stall every concurrent
# terminal/file tool call waiting on _env_lock.
envs_to_stop = [ ] # list of (task_id, env) pairs
2025-11-04 03:32:43 -05:00
2026-01-23 12:26:53 +00:00
with _env_lock :
2025-11-04 03:32:43 -05:00
for task_id , last_time in list ( _last_activity . items ( ) ) :
2026-01-23 12:26:53 +00:00
if current_time - last_time > lifetime_seconds :
2026-02-16 19:37:40 -08:00
env = _active_environments . pop ( task_id , None )
_last_activity . pop ( task_id , None )
_task_workdirs . pop ( task_id , None )
if env is not None :
envs_to_stop . append ( ( task_id , env ) )
# Also purge per-task creation locks for cleaned-up tasks
with _creation_locks_lock :
for task_id , _ in envs_to_stop :
_creation_locks . pop ( task_id , None )
# Phase 2: stop the actual sandboxes OUTSIDE the lock so other tool calls
# are not blocked while Modal/Docker sandboxes shut down.
for task_id , env in envs_to_stop :
# Invalidate stale file_ops cache entry (Bug fix: prevents
# ShellFileOperations from referencing a dead sandbox)
try :
from tools . file_tools import clear_file_ops_cache
clear_file_ops_cache ( task_id )
except ImportError :
pass
2025-11-04 03:32:43 -05:00
2026-02-16 19:37:40 -08:00
try :
if hasattr ( env , ' cleanup ' ) :
env . cleanup ( )
elif hasattr ( env , ' stop ' ) :
env . stop ( )
elif hasattr ( env , ' terminate ' ) :
env . terminate ( )
2025-11-04 03:32:43 -05:00
2026-02-21 03:11:11 -08:00
logger . info ( " Cleaned up inactive environment for task: %s " , task_id )
2026-02-16 19:37:40 -08:00
except Exception as e :
error_str = str ( e )
2026-02-21 03:11:11 -08:00
if " 404 " in error_str or " not found " in error_str . lower ( ) :
logger . info ( " Environment for task %s already cleaned up " , task_id )
else :
logger . warning ( " Error cleaning up environment for task %s : %s " , task_id , e )
2026-01-23 12:26:53 +00:00
2025-11-04 03:32:43 -05:00
def _cleanup_thread_worker ( ) :
2026-01-23 12:26:53 +00:00
""" Background thread worker that periodically cleans up inactive environments. """
2025-11-04 03:32:43 -05:00
global _cleanup_running
while _cleanup_running :
try :
2026-01-23 12:26:53 +00:00
config = _get_env_config ( )
_cleanup_inactive_envs ( config [ " lifetime_seconds " ] )
2025-11-04 03:32:43 -05:00
except Exception as e :
2026-02-21 03:11:11 -08:00
logger . warning ( " Error in cleanup thread: %s " , e )
2025-11-04 03:32:43 -05:00
for _ in range ( 60 ) :
if not _cleanup_running :
break
time . sleep ( 1 )
2026-01-23 12:26:53 +00:00
2025-11-04 03:32:43 -05:00
def _start_cleanup_thread ( ) :
2026-01-23 12:26:53 +00:00
""" Start the background cleanup thread if not already running. """
2025-11-04 03:32:43 -05:00
global _cleanup_thread , _cleanup_running
2026-01-23 12:26:53 +00:00
with _env_lock :
2025-11-04 03:32:43 -05:00
if _cleanup_thread is None or not _cleanup_thread . is_alive ( ) :
_cleanup_running = True
_cleanup_thread = threading . Thread ( target = _cleanup_thread_worker , daemon = True )
_cleanup_thread . start ( )
2026-01-23 12:26:53 +00:00
2025-11-04 03:32:43 -05:00
def _stop_cleanup_thread ( ) :
2026-01-23 12:26:53 +00:00
""" Stop the background cleanup thread. """
2025-11-04 03:32:43 -05:00
global _cleanup_running
_cleanup_running = False
if _cleanup_thread is not None :
_cleanup_thread . join ( timeout = 5 )
2026-01-29 06:10:24 +00:00
def get_active_environments_info ( ) - > Dict [ str , Any ] :
""" Get information about currently active environments. """
info = {
" count " : len ( _active_environments ) ,
" task_ids " : list ( _active_environments . keys ( ) ) ,
" workdirs " : dict ( _task_workdirs ) ,
}
# Calculate total disk usage
total_size = 0
for task_id in _active_environments . keys ( ) :
# Check sandbox and workdir sizes
scratch_dir = _get_scratch_dir ( )
for pattern in [ f " hermes-* { task_id [ : 8 ] } * " ] :
import glob
for path in glob . glob ( str ( scratch_dir / " hermes-* " ) ) :
try :
size = sum ( f . stat ( ) . st_size for f in Path ( path ) . rglob ( ' * ' ) if f . is_file ( ) )
total_size + = size
2026-02-20 23:23:32 -08:00
except OSError :
2026-01-29 06:10:24 +00:00
pass
info [ " total_disk_usage_mb " ] = round ( total_size / ( 1024 * 1024 ) , 2 )
return info
def cleanup_all_environments ( ) :
""" Clean up ALL active environments. Use with caution. """
global _active_environments , _last_activity , _task_workdirs
task_ids = list ( _active_environments . keys ( ) )
cleaned = 0
for task_id in task_ids :
try :
cleanup_vm ( task_id )
cleaned + = 1
except Exception as e :
2026-02-21 03:11:11 -08:00
logger . error ( " Error cleaning %s : %s " , task_id , e )
2026-01-29 06:10:24 +00:00
# Also clean any orphaned directories
scratch_dir = _get_scratch_dir ( )
import glob
for path in glob . glob ( str ( scratch_dir / " hermes-* " ) ) :
try :
shutil . rmtree ( path , ignore_errors = True )
2026-02-21 03:11:11 -08:00
logger . info ( " Removed orphaned: %s " , path )
2026-02-20 23:23:32 -08:00
except OSError :
2026-01-29 06:10:24 +00:00
pass
2026-02-21 03:11:11 -08:00
if cleaned > 0 :
logger . info ( " Cleaned %d environments " , cleaned )
2026-01-29 06:10:24 +00:00
return cleaned
2026-01-23 12:26:53 +00:00
def cleanup_vm ( task_id : str ) :
""" Manually clean up a specific environment by task_id. """
2026-01-29 06:10:24 +00:00
global _active_environments , _last_activity , _task_workdirs
2025-11-04 03:32:43 -05:00
2026-02-16 19:37:40 -08:00
# Remove from tracking dicts while holding the lock, but defer the
# actual (potentially slow) env.cleanup() call to outside the lock
# so other tool calls aren't blocked.
env = None
2026-01-23 12:26:53 +00:00
with _env_lock :
2026-02-16 19:37:40 -08:00
env = _active_environments . pop ( task_id , None )
_task_workdirs . pop ( task_id , None )
_last_activity . pop ( task_id , None )
2026-01-23 12:26:53 +00:00
2026-02-16 19:37:40 -08:00
# Clean up per-task creation lock
with _creation_locks_lock :
_creation_locks . pop ( task_id , None )
2025-11-04 03:32:43 -05:00
2026-02-16 19:37:40 -08:00
# Invalidate stale file_ops cache entry
try :
from tools . file_tools import clear_file_ops_cache
clear_file_ops_cache ( task_id )
except ImportError :
pass
2026-01-29 06:10:24 +00:00
2026-02-16 19:37:40 -08:00
if env is None :
return
2025-11-04 03:32:43 -05:00
2026-02-16 19:37:40 -08:00
try :
if hasattr ( env , ' cleanup ' ) :
env . cleanup ( )
elif hasattr ( env , ' stop ' ) :
env . stop ( )
elif hasattr ( env , ' terminate ' ) :
env . terminate ( )
2026-02-21 03:11:11 -08:00
logger . info ( " Manually cleaned up environment for task: %s " , task_id )
2026-02-16 19:37:40 -08:00
except Exception as e :
2026-02-21 03:11:11 -08:00
error_str = str ( e )
if " 404 " in error_str or " not found " in error_str . lower ( ) :
logger . info ( " Environment for task %s already cleaned up " , task_id )
else :
logger . warning ( " Error cleaning up environment for task %s : %s " , task_id , e )
2026-01-23 12:26:53 +00:00
2025-11-04 03:32:43 -05:00
2026-02-10 22:53:44 +00:00
def _atexit_cleanup ( ) :
""" Stop cleanup thread and shut down all remaining sandboxes on exit. """
_stop_cleanup_thread ( )
if _active_environments :
count = len ( _active_environments )
2026-02-21 03:11:11 -08:00
logger . info ( " Shutting down %d remaining sandbox(es)... " , count )
2026-02-10 22:53:44 +00:00
cleanup_all_environments ( )
atexit . register ( _atexit_cleanup )
2025-11-02 08:52:05 +08:00
2026-01-23 12:26:53 +00:00
2025-07-26 04:31:17 +00:00
def terminal_tool (
2026-01-23 12:26:53 +00:00
command : str ,
2025-07-25 15:15:36 +00:00
background : bool = False ,
2025-11-03 17:42:23 -05:00
timeout : Optional [ int ] = None ,
2026-02-02 23:35:18 -08:00
task_id : Optional [ str ] = None ,
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
force : bool = False ,
workdir : Optional [ str ] = None ,
check_interval : Optional [ int ] = None ,
pty : bool = False ,
2025-07-25 15:15:36 +00:00
) - > str :
"""
2026-01-23 12:26:53 +00:00
Execute a command using mini - swe - agent ' s execution environments.
2025-11-03 17:42:23 -05:00
2025-07-25 15:15:36 +00:00
Args :
2026-01-23 12:26:53 +00:00
command : The command to execute
background : Whether to run in background ( default : False )
timeout : Command timeout in seconds ( default : from config )
task_id : Unique identifier for environment isolation ( optional )
2026-02-02 23:35:18 -08:00
force : If True , skip dangerous command check ( use after user confirms )
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
workdir : Working directory for this command ( optional , uses session cwd if not set )
check_interval : Seconds between auto - checks for background processes ( gateway only , min 30 )
pty : If True , use pseudo - terminal for interactive CLI tools ( local backend only )
2025-11-03 17:42:23 -05:00
2025-07-25 15:15:36 +00:00
Returns :
2026-01-23 12:26:53 +00:00
str : JSON string with output , exit_code , and error fields
2025-07-26 04:31:17 +00:00
Examples :
# Execute a simple command
>> > result = terminal_tool ( command = " ls -la /tmp " )
2026-01-23 12:26:53 +00:00
2025-07-26 04:31:17 +00:00
# Run a background task
2026-01-23 12:26:53 +00:00
>> > result = terminal_tool ( command = " python server.py " , background = True )
# With custom timeout
>> > result = terminal_tool ( command = " long_task.sh " , timeout = 300 )
2026-02-02 23:35:18 -08:00
# Force run after user confirmation
2026-02-02 23:46:41 -08:00
# Note: force parameter is internal only, not exposed to model API
2025-07-25 15:15:36 +00:00
"""
2026-01-23 12:26:53 +00:00
global _active_environments , _last_activity
2025-11-02 08:52:05 +08:00
2025-07-25 15:15:36 +00:00
try :
2026-01-23 12:26:53 +00:00
# Get configuration
config = _get_env_config ( )
env_type = config [ " env_type " ]
2026-02-10 06:49:58 +00:00
# Use task_id for environment isolation
effective_task_id = task_id or " default "
# Check per-task overrides (set by environments like TerminalBench2Env)
# before falling back to global env var config
overrides = _task_env_overrides . get ( effective_task_id , { } )
2026-01-23 12:26:53 +00:00
2026-02-10 06:49:58 +00:00
# Select image based on env type, with per-task override support
2026-01-23 12:26:53 +00:00
if env_type == " docker " :
2026-02-10 06:49:58 +00:00
image = overrides . get ( " docker_image " ) or config [ " docker_image " ]
2026-01-29 06:10:24 +00:00
elif env_type == " singularity " :
2026-02-10 06:49:58 +00:00
image = overrides . get ( " singularity_image " ) or config [ " singularity_image " ]
2026-01-23 12:26:53 +00:00
elif env_type == " modal " :
2026-02-10 06:49:58 +00:00
image = overrides . get ( " modal_image " ) or config [ " modal_image " ]
2026-01-23 12:26:53 +00:00
else :
image = " "
2026-02-10 06:49:58 +00:00
cwd = overrides . get ( " cwd " ) or config [ " cwd " ]
2026-01-23 12:26:53 +00:00
default_timeout = config [ " timeout " ]
effective_timeout = timeout or default_timeout
2025-11-02 08:52:05 +08:00
2026-01-31 06:30:48 +00:00
# For local environment in batch mode, create a unique subdirectory per task
2026-01-29 06:10:24 +00:00
# This prevents parallel tasks from overwriting each other's files
2026-01-31 06:30:48 +00:00
# In CLI mode (HERMES_QUIET), use the cwd directly without subdirectories
if env_type == " local " and not os . getenv ( " HERMES_QUIET " ) :
2026-01-29 06:10:24 +00:00
with _env_lock :
if effective_task_id not in _task_workdirs :
task_workdir = Path ( cwd ) / f " hermes- { effective_task_id } - { uuid . uuid4 ( ) . hex [ : 8 ] } "
task_workdir . mkdir ( parents = True , exist_ok = True )
_task_workdirs [ effective_task_id ] = str ( task_workdir )
cwd = _task_workdirs [ effective_task_id ]
2026-01-23 12:26:53 +00:00
# Start cleanup thread
2025-11-04 03:32:43 -05:00
_start_cleanup_thread ( )
2026-02-12 05:37:14 +00:00
# Get or create environment.
# Use a per-task creation lock so concurrent tool calls for the same
# task_id wait for the first one to finish creating the sandbox,
# instead of each creating their own (wasting Modal resources).
2026-01-23 12:26:53 +00:00
with _env_lock :
2026-02-12 05:37:14 +00:00
if effective_task_id in _active_environments :
2026-02-08 05:00:47 +00:00
_last_activity [ effective_task_id ] = time . time ( )
env = _active_environments [ effective_task_id ]
2026-02-12 05:37:14 +00:00
needs_creation = False
else :
needs_creation = True
2026-01-23 12:26:53 +00:00
2026-02-08 05:00:47 +00:00
if needs_creation :
2026-02-12 05:37:14 +00:00
# Per-task lock: only one thread creates the sandbox, others wait
with _creation_locks_lock :
if effective_task_id not in _creation_locks :
_creation_locks [ effective_task_id ] = threading . Lock ( )
task_lock = _creation_locks [ effective_task_id ]
with task_lock :
# Double-check after acquiring the per-task lock
with _env_lock :
if effective_task_id in _active_environments :
_last_activity [ effective_task_id ] = time . time ( )
env = _active_environments [ effective_task_id ]
needs_creation = False
if needs_creation :
2026-02-21 12:43:56 -08:00
if env_type == " singularity " :
2026-02-12 05:37:14 +00:00
_check_disk_usage_warning ( )
2026-02-21 03:11:11 -08:00
logger . info ( " Creating new %s environment for task %s ... " , env_type , effective_task_id [ : 8 ] )
2026-02-08 05:00:47 +00:00
try :
2026-02-12 05:37:14 +00:00
ssh_config = None
if env_type == " ssh " :
ssh_config = {
" host " : config . get ( " ssh_host " , " " ) ,
" user " : config . get ( " ssh_user " , " " ) ,
" port " : config . get ( " ssh_port " , 22 ) ,
" key " : config . get ( " ssh_key " , " " ) ,
}
new_env = _create_environment (
env_type = env_type ,
image = image ,
cwd = cwd ,
timeout = effective_timeout ,
ssh_config = ssh_config
)
except ImportError as e :
return json . dumps ( {
" output " : " " ,
" exit_code " : - 1 ,
" error " : f " Terminal tool disabled: mini-swe-agent not available ( { e } ) " ,
" status " : " disabled "
} , ensure_ascii = False )
with _env_lock :
_active_environments [ effective_task_id ] = new_env
_last_activity [ effective_task_id ] = time . time ( )
env = new_env
2026-02-21 03:11:11 -08:00
logger . info ( " %s environment ready for task %s " , env_type , effective_task_id [ : 8 ] )
2025-11-02 08:52:05 +08:00
2026-02-02 23:35:18 -08:00
# Check for dangerous commands (only for local/ssh in interactive modes)
# Skip check if force=True (user has confirmed they want to run it)
if not force :
approval = _check_dangerous_command ( command , env_type )
if not approval [ " approved " ] :
2026-02-12 10:05:08 -08:00
# Check if this is an approval_required (gateway ask mode)
if approval . get ( " status " ) == " approval_required " :
return json . dumps ( {
" output " : " " ,
" exit_code " : - 1 ,
" error " : approval . get ( " message " , " Waiting for user approval " ) ,
" status " : " approval_required " ,
" command " : approval . get ( " command " , command ) ,
" description " : approval . get ( " description " , " dangerous command " ) ,
" pattern_key " : approval . get ( " pattern_key " , " " ) ,
} , ensure_ascii = False )
2026-02-02 23:35:18 -08:00
# Command was blocked - return informative message
return json . dumps ( {
" output " : " " ,
" exit_code " : - 1 ,
" error " : approval . get ( " message " , " Command denied - potentially dangerous operation " ) ,
" status " : " blocked "
} , ensure_ascii = False )
2026-01-23 12:26:53 +00:00
# Prepare command for execution
2025-07-26 04:31:17 +00:00
if background :
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
# Spawn a tracked background process via the process registry.
# For local backends: uses subprocess.Popen with output buffering.
# For non-local backends: runs inside the sandbox via env.execute().
from tools . process_registry import process_registry
session_key = os . getenv ( " HERMES_SESSION_KEY " , " " )
effective_cwd = workdir or cwd
2026-01-23 12:26:53 +00:00
try :
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
if env_type == " local " :
proc_session = process_registry . spawn_local (
command = command ,
cwd = effective_cwd ,
task_id = effective_task_id ,
session_key = session_key ,
env_vars = env . env if hasattr ( env , ' env ' ) else None ,
use_pty = pty ,
)
else :
proc_session = process_registry . spawn_via_env (
env = env ,
command = command ,
cwd = effective_cwd ,
task_id = effective_task_id ,
session_key = session_key ,
)
result_data = {
" output " : " Background process started " ,
" session_id " : proc_session . id ,
" pid " : proc_session . pid ,
2026-01-23 12:26:53 +00:00
" exit_code " : 0 ,
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
" error " : None ,
}
# Transparent timeout clamping note
max_timeout = effective_timeout
if timeout and timeout > max_timeout :
result_data [ " timeout_note " ] = (
f " Requested timeout { timeout } s was clamped to "
f " configured limit of { max_timeout } s "
)
# Register check_interval watcher (gateway picks this up after agent run)
if check_interval and background :
effective_interval = max ( 30 , check_interval )
if check_interval < 30 :
result_data [ " check_interval_note " ] = (
f " Requested { check_interval } s raised to minimum 30s "
)
process_registry . pending_watchers . append ( {
" session_id " : proc_session . id ,
" check_interval " : effective_interval ,
" session_key " : session_key ,
" platform " : os . getenv ( " HERMES_SESSION_PLATFORM " , " " ) ,
" chat_id " : os . getenv ( " HERMES_SESSION_CHAT_ID " , " " ) ,
} )
return json . dumps ( result_data , ensure_ascii = False )
2026-01-23 12:26:53 +00:00
except Exception as e :
return json . dumps ( {
" output " : " " ,
" exit_code " : - 1 ,
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
" error " : f " Failed to start background process: { str ( e ) } "
2026-01-23 12:26:53 +00:00
} , ensure_ascii = False )
else :
# Run foreground command with retry logic
max_retries = 3
retry_count = 0
result = None
2026-01-10 05:56:26 +00:00
2026-01-23 12:26:53 +00:00
while retry_count < = max_retries :
try :
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
execute_kwargs = { " timeout " : effective_timeout }
if workdir :
execute_kwargs [ " cwd " ] = workdir
result = env . execute ( command , * * execute_kwargs )
2026-01-23 12:26:53 +00:00
except Exception as e :
error_str = str ( e ) . lower ( )
if " timeout " in error_str :
return json . dumps ( {
" output " : " " ,
" exit_code " : 124 ,
" error " : f " Command timed out after { effective_timeout } seconds "
} , ensure_ascii = False )
# Retry on transient errors
if retry_count < max_retries :
retry_count + = 1
wait_time = 2 * * retry_count
2026-02-21 03:11:11 -08:00
logger . warning ( " Execution error, retrying in %d s (attempt %d / %d ) - Command: %s - Error: %s : %s - Task: %s , Backend: %s " ,
wait_time , retry_count , max_retries , command [ : 200 ] , type ( e ) . __name__ , e , effective_task_id , env_type )
2026-01-23 12:26:53 +00:00
time . sleep ( wait_time )
continue
2026-02-21 03:11:11 -08:00
logger . error ( " Execution failed after %d retries - Command: %s - Error: %s : %s - Task: %s , Backend: %s " ,
max_retries , command [ : 200 ] , type ( e ) . __name__ , e , effective_task_id , env_type )
2026-01-23 12:26:53 +00:00
return json . dumps ( {
" output " : " " ,
" exit_code " : - 1 ,
2026-02-08 05:00:47 +00:00
" error " : f " Command execution failed: { type ( e ) . __name__ } : { str ( e ) } "
2026-01-23 12:26:53 +00:00
} , ensure_ascii = False )
# Got a result
break
2026-01-10 05:56:26 +00:00
2026-01-23 12:26:53 +00:00
# Extract output
output = result . get ( " output " , " " )
returncode = result . get ( " returncode " , 0 )
2026-01-10 05:56:26 +00:00
2026-02-02 23:35:18 -08:00
# Add helpful message for sudo failures in messaging context
output = _handle_sudo_failure ( output , env_type )
2026-01-23 12:26:53 +00:00
# Truncate output if too long
MAX_OUTPUT_CHARS = 50000
if len ( output ) > MAX_OUTPUT_CHARS :
truncated_notice = f " \n \n ... [OUTPUT TRUNCATED - showing last { MAX_OUTPUT_CHARS } chars of { len ( output ) } total] ... "
output = truncated_notice + output [ - MAX_OUTPUT_CHARS : ]
2026-01-10 05:56:26 +00:00
2026-01-23 12:26:53 +00:00
return json . dumps ( {
" output " : output . strip ( ) if output else " " ,
" exit_code " : returncode ,
" error " : None
} , ensure_ascii = False )
2025-11-02 08:52:05 +08:00
2025-07-25 15:15:36 +00:00
except Exception as e :
return json . dumps ( {
2025-07-26 04:31:17 +00:00
" output " : " " ,
2025-07-25 15:15:36 +00:00
" exit_code " : - 1 ,
2026-01-23 12:26:53 +00:00
" error " : f " Failed to execute command: { str ( e ) } " ,
2025-07-25 15:15:36 +00:00
" status " : " error "
2025-11-05 03:47:17 +00:00
} , ensure_ascii = False )
2025-07-25 15:15:36 +00:00
2026-01-23 12:26:53 +00:00
def check_terminal_requirements ( ) - > bool :
""" Check if all requirements for the terminal tool are met. """
config = _get_env_config ( )
env_type = config [ " env_type " ]
2025-07-25 15:15:36 +00:00
try :
2026-01-23 12:26:53 +00:00
if env_type == " local " :
from minisweagent . environments . local import LocalEnvironment
return True
elif env_type == " docker " :
from minisweagent . environments . docker import DockerEnvironment
# Check if docker is available
import subprocess
result = subprocess . run ( [ " docker " , " version " ] , capture_output = True , timeout = 5 )
return result . returncode == 0
2026-01-29 06:10:24 +00:00
elif env_type == " singularity " :
from minisweagent . environments . singularity import SingularityEnvironment
# Check if singularity/apptainer is available
import subprocess
import shutil
executable = shutil . which ( " apptainer " ) or shutil . which ( " singularity " )
if executable :
result = subprocess . run ( [ executable , " --version " ] , capture_output = True , timeout = 5 )
return result . returncode == 0
return False
2026-01-23 12:26:53 +00:00
elif env_type == " modal " :
from minisweagent . environments . extra . swerex_modal import SwerexModalEnvironment
# Check for modal token
return os . getenv ( " MODAL_TOKEN_ID " ) is not None or Path . home ( ) . joinpath ( " .modal.toml " ) . exists ( )
else :
return False
2025-10-03 09:46:44 +00:00
except Exception as e :
2026-02-21 03:11:11 -08:00
logger . error ( " Terminal requirements check failed: %s " , e )
2025-07-25 15:15:36 +00:00
return False
if __name__ == " __main__ " :
2026-02-20 23:23:32 -08:00
# Simple test when run directly
2026-01-23 12:26:53 +00:00
print ( " Terminal Tool Module (mini-swe-agent backend) " )
print ( " = " * 50 )
2025-07-25 15:15:36 +00:00
2026-01-23 12:26:53 +00:00
config = _get_env_config ( )
print ( f " \n Current Configuration: " )
print ( f " Environment type: { config [ ' env_type ' ] } " )
print ( f " Docker image: { config [ ' docker_image ' ] } " )
print ( f " Modal image: { config [ ' modal_image ' ] } " )
print ( f " Working directory: { config [ ' cwd ' ] } " )
print ( f " Default timeout: { config [ ' timeout ' ] } s " )
print ( f " Lifetime: { config [ ' lifetime_seconds ' ] } s " )
if not check_terminal_requirements ( ) :
print ( " \n ❌ Requirements not met. Please check the messages above. " )
2025-07-25 15:15:36 +00:00
exit ( 1 )
2026-01-23 12:26:53 +00:00
print ( " \n ✅ All requirements met! " )
2025-07-26 04:31:17 +00:00
print ( " \n Available Tool: " )
2026-01-23 12:26:53 +00:00
print ( " - terminal_tool: Execute commands using mini-swe-agent environments " )
2025-07-25 15:15:36 +00:00
print ( " \n Usage Examples: " )
print ( " # Execute a command " )
2025-07-26 04:31:17 +00:00
print ( " result = terminal_tool(command= ' ls -la ' ) " )
2025-07-25 15:15:36 +00:00
print ( " " )
2025-07-26 04:31:17 +00:00
print ( " # Run a background task " )
2026-01-23 12:26:53 +00:00
print ( " result = terminal_tool(command= ' python server.py ' , background=True) " )
2025-07-25 15:15:36 +00:00
print ( " \n Environment Variables: " )
2026-02-02 19:13:41 -08:00
default_img = " nikolaik/python-nodejs:python3.11-nodejs20 "
print ( f " TERMINAL_ENV: { os . getenv ( ' TERMINAL_ENV ' , ' local ' ) } (local/docker/singularity/modal/ssh) " )
print ( f " TERMINAL_DOCKER_IMAGE: { os . getenv ( ' TERMINAL_DOCKER_IMAGE ' , default_img ) } " )
print ( f " TERMINAL_SINGULARITY_IMAGE: { os . getenv ( ' TERMINAL_SINGULARITY_IMAGE ' , f ' docker:// { default_img } ' ) } " )
print ( f " TERMINAL_MODAL_IMAGE: { os . getenv ( ' TERMINAL_MODAL_IMAGE ' , default_img ) } " )
2026-02-08 12:56:40 -08:00
print ( f " TERMINAL_CWD: { os . getenv ( ' TERMINAL_CWD ' , os . getcwd ( ) ) } " )
2026-01-23 12:26:53 +00:00
print ( f " TERMINAL_TIMEOUT: { os . getenv ( ' TERMINAL_TIMEOUT ' , ' 60 ' ) } " )
print ( f " TERMINAL_LIFETIME_SECONDS: { os . getenv ( ' TERMINAL_LIFETIME_SECONDS ' , ' 300 ' ) } " )