From 02b38b93cba9237da77619db5ea9db481648a4b9 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 24 Mar 2026 07:30:25 -0700 Subject: [PATCH] =?UTF-8?q?refactor:=20remove=20mini-swe-agent=20dependenc?= =?UTF-8?q?y=20=E2=80=94=20inline=20Docker/Modal=20backends=20(#2804)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the mini-swe-agent git submodule. All terminal backends now use hermes-agent's own environment implementations directly. Docker backend: - Inline the `docker run -d` container startup (was 15 lines in minisweagent's DockerEnvironment). Our wrapper already handled execute(), cleanup(), security hardening, volumes, and resource limits. Modal backend: - Import swe-rex's ModalDeployment directly instead of going through minisweagent's 90-line passthrough wrapper. - Bake the _AsyncWorker pattern (from environments/patches.py) directly into ModalEnvironment for Atropos compatibility without monkey-patching. Cleanup: - Remove minisweagent_path.py (submodule path resolution helper) - Remove submodule init/install from install.sh and setup-hermes.sh - Remove mini-swe-agent from .gitmodules - environments/patches.py is now a no-op (kept for backward compat) - terminal_tool.py no longer does sys.path hacking for minisweagent - mini_swe_runner.py guards imports (optional, for RL training only) - Update all affected tests to mock the new direct subprocess calls - Update README.md, CONTRIBUTING.md No functionality change — all Docker, Modal, local, SSH, Singularity, and Daytona backends behave identically. 6093 tests pass. --- .gitmodules | 3 - CONTRIBUTING.md | 5 +- README.md | 4 +- cli.py | 4 +- environments/patches.py | 186 ++---------------- mini-swe-agent | 1 - mini_swe_runner.py | 23 ++- minisweagent_path.py | 92 --------- pyproject.toml | 2 +- run_agent.py | 2 +- scripts/install.sh | 19 -- setup-hermes.sh | 11 +- tests/integration/test_modal_terminal.py | 1 - tests/test_minisweagent_path.py | 37 +--- tests/tools/test_code_execution.py | 4 +- tests/tools/test_docker_environment.py | 142 +++++-------- tests/tools/test_modal_sandbox_fixes.py | 51 ++--- tests/tools/test_terminal_requirements.py | 7 +- .../tools/test_terminal_tool_requirements.py | 2 +- tools/environments/docker.py | 73 ++++--- tools/environments/modal.py | 175 +++++++++++----- tools/terminal_tool.py | 30 +-- 22 files changed, 283 insertions(+), 591 deletions(-) delete mode 160000 mini-swe-agent delete mode 100644 minisweagent_path.py diff --git a/.gitmodules b/.gitmodules index 6a494f4b..76580d6e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "mini-swe-agent"] - path = mini-swe-agent - url = https://github.com/SWE-agent/mini-swe-agent [submodule "tinker-atropos"] path = tinker-atropos url = https://github.com/nousresearch/tinker-atropos diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 25cddde6..4577454e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -72,8 +72,9 @@ export VIRTUAL_ENV="$(pwd)/venv" # Install with all extras (messaging, cron, CLI menus, dev tools) uv pip install -e ".[all,dev]" -uv pip install -e "./mini-swe-agent" -uv pip install -e "./tinker-atropos" + +# Optional: RL training submodule +# git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos" # Optional: browser tools npm install diff --git a/README.md b/README.md index 9fb40a58..fde4cae3 100644 --- a/README.md +++ b/README.md @@ -144,16 +144,14 @@ Quick start for contributors: ```bash git clone https://github.com/NousResearch/hermes-agent.git cd hermes-agent -git submodule update --init mini-swe-agent # required terminal backend curl -LsSf https://astral.sh/uv/install.sh | sh uv venv venv --python 3.11 source venv/bin/activate uv pip install -e ".[all,dev]" -uv pip install -e "./mini-swe-agent" python -m pytest tests/ -q ``` -> **RL Training (optional):** To work on the RL/Tinker-Atropos integration, also run: +> **RL Training (optional):** To work on the RL/Tinker-Atropos integration: > ```bash > git submodule update --init tinker-atropos > uv pip install -e "./tinker-atropos" diff --git a/cli.py b/cli.py index ce9e30b8..bccc691f 100644 --- a/cli.py +++ b/cli.py @@ -31,7 +31,7 @@ from typing import List, Dict, Any, Optional logger = logging.getLogger(__name__) # Suppress startup messages for clean CLI experience -os.environ["MSWEA_SILENT_STARTUP"] = "1" # mini-swe-agent +os.environ["MSWEA_SILENT_STARTUP"] = "1" # suppress mini-swe-agent startup noise if installed os.environ["HERMES_QUIET"] = "1" # Our own modules import yaml @@ -78,7 +78,7 @@ _hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) _project_env = Path(__file__).parent / '.env' load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env) -# Point mini-swe-agent at ~/.hermes/ so it shares our config +# Point mini-swe-agent at ~/.hermes/ if installed (RL training use) os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home)) # ============================================================================= diff --git a/environments/patches.py b/environments/patches.py index 3c5ed2cd..aed78da6 100644 --- a/environments/patches.py +++ b/environments/patches.py @@ -2,203 +2,41 @@ Monkey patches for making hermes-agent tools work inside async frameworks (Atropos). Problem: - Some tools use asyncio.run() internally (e.g., mini-swe-agent's Modal backend, + Some tools use asyncio.run() internally (e.g., Modal backend via SWE-ReX, web_extract). This crashes when called from inside Atropos's event loop because asyncio.run() can't be nested. Solution: - Replace the problematic methods with versions that use a dedicated background - thread with its own event loop. The calling code sees the same sync interface -- - call a function, get a result -- but internally the async work happens on a - separate thread that doesn't conflict with Atropos's loop. + The Modal environment (tools/environments/modal.py) now uses a dedicated + _AsyncWorker thread internally, making it safe for both CLI and Atropos use. + No monkey-patching is required. - These patches are safe for normal CLI use too: when there's no running event - loop, the behavior is identical (the background thread approach works regardless). - -What gets patched: - - SwerexModalEnvironment.__init__ -- creates Modal deployment on a background thread - - SwerexModalEnvironment.execute -- runs commands on the same background thread - - SwerexModalEnvironment.stop -- stops deployment on the background thread + This module is kept for backward compatibility — apply_patches() is now a no-op. Usage: Call apply_patches() once at import time (done automatically by hermes_base_env.py). - This is idempotent -- calling it multiple times is safe. + This is idempotent — calling it multiple times is safe. """ -import asyncio import logging -import threading -from typing import Any logger = logging.getLogger(__name__) _patches_applied = False -class _AsyncWorker: - """ - A dedicated background thread with its own event loop. - - Allows sync code to submit async coroutines and block for results, - even when called from inside another running event loop. Used to - bridge sync tool interfaces with async backends (Modal, SWE-ReX). - """ - - def __init__(self): - self._loop: asyncio.AbstractEventLoop = None - self._thread: threading.Thread = None - self._started = threading.Event() - - def start(self): - """Start the background event loop thread.""" - self._thread = threading.Thread(target=self._run_loop, daemon=True) - self._thread.start() - self._started.wait(timeout=30) - - def _run_loop(self): - """Background thread entry point -- runs the event loop forever.""" - self._loop = asyncio.new_event_loop() - asyncio.set_event_loop(self._loop) - self._started.set() - self._loop.run_forever() - - def run_coroutine(self, coro, timeout=600): - """ - Submit a coroutine to the background loop and block until it completes. - - Safe to call from any thread, including threads that already have - a running event loop. - """ - if self._loop is None or self._loop.is_closed(): - raise RuntimeError("AsyncWorker loop is not running") - future = asyncio.run_coroutine_threadsafe(coro, self._loop) - return future.result(timeout=timeout) - - def stop(self): - """Stop the background event loop and join the thread.""" - if self._loop and self._loop.is_running(): - self._loop.call_soon_threadsafe(self._loop.stop) - if self._thread: - self._thread.join(timeout=10) - - -def _patch_swerex_modal(): - """ - Monkey patch SwerexModalEnvironment to use a background thread event loop - instead of asyncio.run(). This makes it safe to call from inside Atropos's - async event loop. - - The patched methods have the exact same interface and behavior -- the only - difference is HOW the async work is executed internally. - """ - try: - from minisweagent.environments.extra.swerex_modal import ( - SwerexModalEnvironment, - SwerexModalEnvironmentConfig, - ) - from swerex.deployment.modal import ModalDeployment - from swerex.runtime.abstract import Command as RexCommand - except ImportError: - # mini-swe-agent or swe-rex not installed -- nothing to patch - logger.debug("mini-swe-agent Modal backend not available, skipping patch") - return - - # Save original methods so we can refer to config handling - _original_init = SwerexModalEnvironment.__init__ - - def _patched_init(self, **kwargs): - """Patched __init__: creates Modal deployment on a background thread.""" - self.config = SwerexModalEnvironmentConfig(**kwargs) - - # Start a dedicated event loop thread for all Modal async operations - self._worker = _AsyncWorker() - self._worker.start() - - # Pre-build a modal.Image with pip fix for Modal's legacy image builder. - # Modal requires `python -m pip` to work during image build, but some - # task images (e.g., TBLite's broken-python) have intentionally broken pip. - # Fix: remove stale pip dist-info and reinstall via ensurepip before Modal - # tries to use it. This is a no-op for images where pip already works. - import modal as _modal - image_spec = self.config.image - if isinstance(image_spec, str): - image_spec = _modal.Image.from_registry( - image_spec, - setup_dockerfile_commands=[ - "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; " - "python -m ensurepip --upgrade --default-pip 2>/dev/null || true", - ], - ) - - # Create AND start the deployment entirely on the worker's loop/thread - # so all gRPC channels and async state are bound to that loop - async def _create_and_start(): - deployment = ModalDeployment( - image=image_spec, - startup_timeout=self.config.startup_timeout, - runtime_timeout=self.config.runtime_timeout, - deployment_timeout=self.config.deployment_timeout, - install_pipx=self.config.install_pipx, - modal_sandbox_kwargs=self.config.modal_sandbox_kwargs, - ) - await deployment.start() - return deployment - - self.deployment = self._worker.run_coroutine(_create_and_start()) - - def _patched_execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]: - """Patched execute: runs commands on the background thread's loop.""" - async def _do_execute(): - return await self.deployment.runtime.execute( - RexCommand( - command=command, - shell=True, - check=False, - cwd=cwd or self.config.cwd, - timeout=timeout or self.config.timeout, - merge_output_streams=True, - env=self.config.env if self.config.env else None, - ) - ) - - output = self._worker.run_coroutine(_do_execute()) - return { - "output": output.stdout, - "returncode": output.exit_code, - } - - def _patched_stop(self): - """Patched stop: stops deployment on the background thread, then stops the thread.""" - try: - self._worker.run_coroutine( - asyncio.wait_for(self.deployment.stop(), timeout=10), - timeout=15, - ) - except Exception: - pass - finally: - self._worker.stop() - - # Apply the patches - SwerexModalEnvironment.__init__ = _patched_init - SwerexModalEnvironment.execute = _patched_execute - SwerexModalEnvironment.stop = _patched_stop - - logger.debug("Patched SwerexModalEnvironment for async-safe operation") - - def apply_patches(): - """ - Apply all monkey patches needed for Atropos compatibility. + """Apply all monkey patches needed for Atropos compatibility. - Safe to call multiple times -- patches are only applied once. - Safe for normal CLI use -- patched code works identically when - there is no running event loop. + Now a no-op — Modal async safety is built directly into ModalEnvironment. + Safe to call multiple times. """ global _patches_applied if _patches_applied: return - _patch_swerex_modal() + # Modal async-safety is now built into tools/environments/modal.py + # via the _AsyncWorker class. No monkey-patching needed. + logger.debug("apply_patches() called — no patches needed (async safety is built-in)") _patches_applied = True diff --git a/mini-swe-agent b/mini-swe-agent deleted file mode 160000 index 07aa6a73..00000000 --- a/mini-swe-agent +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 07aa6a738556e44b30d7b5c3bbd5063dac871d25 diff --git a/mini_swe_runner.py b/mini_swe_runner.py index e0df6695..fb3dbfe4 100644 --- a/mini_swe_runner.py +++ b/mini_swe_runner.py @@ -42,11 +42,13 @@ from dotenv import load_dotenv # Load environment variables load_dotenv() -# Add mini-swe-agent to path if not installed. In git worktrees the populated -# submodule may live in the main checkout rather than the worktree itself. -from minisweagent_path import ensure_minisweagent_on_path - -ensure_minisweagent_on_path(Path(__file__).resolve().parent) +# mini-swe-agent is an optional dependency for this runner. +# Install separately: git submodule update --init mini-swe-agent && pip install -e ./mini-swe-agent +try: + import minisweagent # noqa: F401 + _HAS_MINISWEAGENT = True +except ImportError: + _HAS_MINISWEAGENT = False # ============================================================================ @@ -110,7 +112,10 @@ def create_environment( **kwargs ): """ - Create an execution environment from mini-swe-agent. + Create an execution environment. + + Uses mini-swe-agent environments when available, which requires the + mini-swe-agent submodule to be installed separately. Args: env_type: One of "local", "docker", "modal" @@ -122,6 +127,12 @@ def create_environment( Returns: Environment instance with execute() method """ + if not _HAS_MINISWEAGENT: + raise ImportError( + "mini-swe-agent is required for mini_swe_runner.py. " + "Install it: git submodule update --init mini-swe-agent && pip install -e ./mini-swe-agent" + ) + if env_type == "local": from minisweagent.environments.local import LocalEnvironment return LocalEnvironment(cwd=cwd, timeout=timeout) diff --git a/minisweagent_path.py b/minisweagent_path.py deleted file mode 100644 index e0ea8f29..00000000 --- a/minisweagent_path.py +++ /dev/null @@ -1,92 +0,0 @@ -"""Helpers for locating the mini-swe-agent source tree. - -Hermes often runs from git worktrees. In that layout the worktree root may have -an empty ``mini-swe-agent/`` placeholder while the real populated submodule -lives under the main checkout that owns the shared ``.git`` directory. - -These helpers locate a usable ``mini-swe-agent/src`` directory and optionally -prepend it to ``sys.path`` so imports like ``import minisweagent`` work from -both normal checkouts and worktrees. -""" - -from __future__ import annotations - -import importlib.util -import sys -from pathlib import Path -from typing import Optional - - -def _read_gitdir(repo_root: Path) -> Optional[Path]: - """Resolve the gitdir referenced by ``repo_root/.git`` when it is a file.""" - git_marker = repo_root / ".git" - if not git_marker.is_file(): - return None - - try: - raw = git_marker.read_text(encoding="utf-8").strip() - except OSError: - return None - - prefix = "gitdir:" - if not raw.lower().startswith(prefix): - return None - - target = raw[len(prefix):].strip() - gitdir = Path(target) - if not gitdir.is_absolute(): - gitdir = (repo_root / gitdir).resolve() - else: - gitdir = gitdir.resolve() - return gitdir - - -def discover_minisweagent_src(repo_root: Optional[Path] = None) -> Optional[Path]: - """Return the best available ``mini-swe-agent/src`` path, if any. - - Search order: - 1. Current checkout/worktree root - 2. Main checkout that owns the shared ``.git`` directory (for worktrees) - """ - repo_root = (repo_root or Path(__file__).resolve().parent).resolve() - - candidates: list[Path] = [repo_root / "mini-swe-agent" / "src"] - - gitdir = _read_gitdir(repo_root) - if gitdir is not None: - # Worktree layout:
/.git/worktrees/ - if len(gitdir.parents) >= 3 and gitdir.parent.name == "worktrees": - candidates.append(gitdir.parents[2] / "mini-swe-agent" / "src") - # Direct checkout with .git file pointing elsewhere - elif gitdir.name == ".git": - candidates.append(gitdir.parent / "mini-swe-agent" / "src") - - seen = set() - for candidate in candidates: - candidate = candidate.resolve() - if candidate in seen: - continue - seen.add(candidate) - if candidate.exists() and candidate.is_dir(): - return candidate - - return None - - -def ensure_minisweagent_on_path(repo_root: Optional[Path] = None) -> Optional[Path]: - """Ensure ``minisweagent`` is importable by prepending its src dir to sys.path. - - Returns the inserted/discovered path, or ``None`` if the package is already - importable or no local source tree could be found. - """ - if importlib.util.find_spec("minisweagent") is not None: - return None - - src = discover_minisweagent_src(repo_root) - if src is None: - return None - - src_str = str(src) - if src_str not in sys.path: - sys.path.insert(0, src_str) - return src diff --git a/pyproject.toml b/pyproject.toml index 5059b9ef..7140eaff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,7 +90,7 @@ hermes-agent = "run_agent:main" hermes-acp = "acp_adapter.entry:main" [tool.setuptools] -py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "mini_swe_runner", "minisweagent_path", "rl_cli", "utils"] +py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "mini_swe_runner", "rl_cli", "utils"] [tool.setuptools.packages.find] include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "honcho_integration", "acp_adapter"] diff --git a/run_agent.py b/run_agent.py index 6e0cac97..976bc318 100644 --- a/run_agent.py +++ b/run_agent.py @@ -58,7 +58,7 @@ if _loaded_env_paths: else: logger.info("No .env file found. Using system environment variables.") -# Point mini-swe-agent at ~/.hermes/ so it shares our config +# Point mini-swe-agent at ~/.hermes/ if installed (RL training use) os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home)) os.environ.setdefault("MSWEA_SILENT_STARTUP", "1") diff --git a/scripts/install.sh b/scripts/install.sh index 9671b99d..6fbb22b4 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -637,13 +637,6 @@ clone_repo() { cd "$INSTALL_DIR" - # Only init mini-swe-agent (terminal tool backend — required). - # tinker-atropos (RL training) is optional and heavy — users can opt in later - # with: git submodule update --init tinker-atropos && uv pip install -e ./tinker-atropos - log_info "Initializing mini-swe-agent submodule (terminal backend)..." - git submodule update --init mini-swe-agent - log_success "Submodule ready" - log_success "Repository ready" } @@ -718,18 +711,6 @@ install_deps() { log_success "Main package installed" - # Install submodules - log_info "Installing mini-swe-agent (terminal tool backend)..." - if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then - if $UV_CMD pip install -e "./mini-swe-agent"; then - log_success "mini-swe-agent installed" - else - log_warn "mini-swe-agent install failed (Docker/Modal terminal backends may not work, local terminal is unaffected)" - fi - else - log_warn "mini-swe-agent not found (run: git submodule update --init)" - fi - # tinker-atropos (RL training) is optional — skip by default. # To enable RL tools: git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos" if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then diff --git a/setup-hermes.sh b/setup-hermes.sh index 5db5e6bb..9561f497 100755 --- a/setup-hermes.sh +++ b/setup-hermes.sh @@ -124,16 +124,7 @@ echo -e "${GREEN}✓${NC} Dependencies installed" # Submodules (terminal backend + RL training) # ============================================================================ -echo -e "${CYAN}→${NC} Installing submodules..." - -# mini-swe-agent (terminal tool backend) -if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then - $UV_CMD pip install -e "./mini-swe-agent" && \ - echo -e "${GREEN}✓${NC} mini-swe-agent installed" || \ - echo -e "${YELLOW}⚠${NC} mini-swe-agent install failed (Docker/Modal terminal backends may not work, local terminal is unaffected)" -else - echo -e "${YELLOW}⚠${NC} mini-swe-agent not found (run: git submodule update --init --recursive)" -fi +echo -e "${CYAN}→${NC} Installing optional submodules..." # tinker-atropos (RL training backend) if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then diff --git a/tests/integration/test_modal_terminal.py b/tests/integration/test_modal_terminal.py index 11943f20..71877c18 100644 --- a/tests/integration/test_modal_terminal.py +++ b/tests/integration/test_modal_terminal.py @@ -41,7 +41,6 @@ except ImportError: # Add project root to path for imports parent_dir = Path(__file__).parent.parent.parent sys.path.insert(0, str(parent_dir)) -sys.path.insert(0, str(parent_dir / "mini-swe-agent" / "src")) # Import terminal_tool module directly using importlib to avoid tools/__init__.py import importlib.util diff --git a/tests/test_minisweagent_path.py b/tests/test_minisweagent_path.py index 00eca12c..98c4eb37 100644 --- a/tests/test_minisweagent_path.py +++ b/tests/test_minisweagent_path.py @@ -1,34 +1,5 @@ -"""Tests for minisweagent_path.py.""" +"""Tests for minisweagent_path.py — REMOVED. -from pathlib import Path - -from minisweagent_path import discover_minisweagent_src - - -def test_discover_minisweagent_src_in_current_checkout(tmp_path): - repo = tmp_path / "repo" - src = repo / "mini-swe-agent" / "src" - src.mkdir(parents=True) - - assert discover_minisweagent_src(repo) == src.resolve() - - -def test_discover_minisweagent_src_falls_back_from_worktree_to_main_checkout(tmp_path): - main_repo = tmp_path / "main-repo" - (main_repo / ".git" / "worktrees" / "wt1").mkdir(parents=True) - main_src = main_repo / "mini-swe-agent" / "src" - main_src.mkdir(parents=True) - - worktree = tmp_path / "worktree" - worktree.mkdir() - (worktree / ".git").write_text(f"gitdir: {main_repo / '.git' / 'worktrees' / 'wt1'}\n", encoding="utf-8") - (worktree / "mini-swe-agent").mkdir() # empty placeholder, no src/ - - assert discover_minisweagent_src(worktree) == main_src.resolve() - - -def test_discover_minisweagent_src_returns_none_when_missing(tmp_path): - repo = tmp_path / "repo" - repo.mkdir() - - assert discover_minisweagent_src(repo) is None +minisweagent_path.py was removed as part of dropping the mini-swe-agent +dependency. These tests are no longer applicable. +""" diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py index b7c34708..80a9f4ab 100644 --- a/tests/tools/test_code_execution.py +++ b/tests/tools/test_code_execution.py @@ -131,9 +131,9 @@ class TestExecuteCode(unittest.TestCase): def test_repo_root_modules_are_importable(self): """Sandboxed scripts can import modules that live at the repo root.""" - result = self._run('import minisweagent_path; print(minisweagent_path.__file__)') + result = self._run('import hermes_constants; print(hermes_constants.__file__)') self.assertEqual(result["status"], "success") - self.assertIn("minisweagent_path.py", result["output"]) + self.assertIn("hermes_constants.py", result["output"]) def test_single_tool_call(self): """Script calls terminal and prints the result.""" diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py index 9e5cab3d..002776ca 100644 --- a/tests/tools/test_docker_environment.py +++ b/tests/tools/test_docker_environment.py @@ -9,25 +9,24 @@ import pytest from tools.environments import docker as docker_env -def _install_fake_minisweagent(monkeypatch, captured_run_args): - class MockInnerDocker: - container_id = "fake-container" - config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})() +def _mock_subprocess_run(monkeypatch): + """Mock subprocess.run to intercept docker run -d and docker version calls. - def __init__(self, **kwargs): - captured_run_args.extend(kwargs.get("run_args", [])) + Returns a list of captured (cmd, kwargs) tuples for inspection. + """ + calls = [] - def cleanup(self): - pass + def _run(cmd, **kwargs): + calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs)) + if isinstance(cmd, list) and len(cmd) >= 2: + if cmd[1] == "version": + return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="") + if cmd[1] == "run": + return subprocess.CompletedProcess(cmd, 0, stdout="fake-container-id\n", stderr="") + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") - minisweagent_mod = types.ModuleType("minisweagent") - environments_mod = types.ModuleType("minisweagent.environments") - docker_mod = types.ModuleType("minisweagent.environments.docker") - docker_mod.DockerEnvironment = MockInnerDocker - - monkeypatch.setitem(sys.modules, "minisweagent", minisweagent_mod) - monkeypatch.setitem(sys.modules, "minisweagent.environments", environments_mod) - monkeypatch.setitem(sys.modules, "minisweagent.environments.docker", docker_mod) + monkeypatch.setattr(docker_env.subprocess, "run", _run) + return calls def _make_dummy_env(**kwargs): @@ -49,7 +48,7 @@ def _make_dummy_env(**kwargs): def test_ensure_docker_available_logs_and_raises_when_not_found(monkeypatch, caplog): - """When docker cannot be found, raise a clear error before mini-swe setup.""" + """When docker cannot be found, raise a clear error before container setup.""" monkeypatch.setattr(docker_env, "find_docker", lambda: None) monkeypatch.setattr( @@ -118,14 +117,8 @@ def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path): project_dir = tmp_path / "my-project" project_dir.mkdir() - def _run_docker_version(*args, **kwargs): - return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") - monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") - monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) - - captured_run_args = [] - _install_fake_minisweagent(monkeypatch, captured_run_args) + calls = _mock_subprocess_run(monkeypatch) _make_dummy_env( cwd="/workspace", @@ -133,7 +126,10 @@ def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path): auto_mount_cwd=True, ) - run_args_str = " ".join(captured_run_args) + # Find the docker run call and check its args + run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"] + assert run_calls, "docker run should have been called" + run_args_str = " ".join(run_calls[0][0]) assert f"{project_dir}:/workspace" in run_args_str @@ -142,14 +138,8 @@ def test_auto_mount_disabled_by_default(monkeypatch, tmp_path): project_dir = tmp_path / "my-project" project_dir.mkdir() - def _run_docker_version(*args, **kwargs): - return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") - monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") - monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) - - captured_run_args = [] - _install_fake_minisweagent(monkeypatch, captured_run_args) + calls = _mock_subprocess_run(monkeypatch) _make_dummy_env( cwd="/root", @@ -157,7 +147,9 @@ def test_auto_mount_disabled_by_default(monkeypatch, tmp_path): auto_mount_cwd=False, ) - run_args_str = " ".join(captured_run_args) + run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"] + assert run_calls, "docker run should have been called" + run_args_str = " ".join(run_calls[0][0]) assert f"{project_dir}:/workspace" not in run_args_str @@ -168,14 +160,8 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path other_dir = tmp_path / "other" other_dir.mkdir() - def _run_docker_version(*args, **kwargs): - return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") - monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") - monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) - - captured_run_args = [] - _install_fake_minisweagent(monkeypatch, captured_run_args) + calls = _mock_subprocess_run(monkeypatch) _make_dummy_env( cwd="/workspace", @@ -184,7 +170,9 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path volumes=[f"{other_dir}:/workspace"], ) - run_args_str = " ".join(captured_run_args) + run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"] + assert run_calls, "docker run should have been called" + run_args_str = " ".join(run_calls[0][0]) assert f"{other_dir}:/workspace" in run_args_str assert run_args_str.count(":/workspace") == 1 @@ -194,14 +182,8 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path): project_dir = tmp_path / "my-project" project_dir.mkdir() - def _run_docker_version(*args, **kwargs): - return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="") - monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") - monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version) - - captured_run_args = [] - _install_fake_minisweagent(monkeypatch, captured_run_args) + calls = _mock_subprocess_run(monkeypatch) _make_dummy_env( cwd="/workspace", @@ -211,28 +193,23 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path): task_id="test-persistent-auto-mount", ) - run_args_str = " ".join(captured_run_args) + run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"] + assert run_calls, "docker run should have been called" + run_args_str = " ".join(run_calls[0][0]) assert f"{project_dir}:/workspace" in run_args_str assert "/sandboxes/docker/test-persistent-auto-mount/workspace:/workspace" not in run_args_str def test_non_persistent_cleanup_removes_container(monkeypatch): - """When container_persistent=false, cleanup() must run docker rm -f so the container is removed (Fixes #1679).""" - run_calls = [] - - def _run(cmd, **kwargs): - run_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs)) - if cmd and getattr(cmd[0], "__str__", None) and "docker" in str(cmd[0]): - if len(cmd) >= 2 and cmd[1] == "run": - return subprocess.CompletedProcess(cmd, 0, stdout="abc123container\n", stderr="") - return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") - + """When persistent=false, cleanup() must schedule docker stop + rm.""" monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker") - monkeypatch.setattr(docker_env.subprocess, "run", _run) - monkeypatch.setattr(docker_env.subprocess, "Popen", lambda *a, **k: type("P", (), {"poll": lambda: None, "wait": lambda **kw: None, "returncode": 0, "stdout": iter([]), "stdin": None})()) + calls = _mock_subprocess_run(monkeypatch) - captured_run_args = [] - _install_fake_minisweagent(monkeypatch, captured_run_args) + popen_cmds = [] + monkeypatch.setattr( + docker_env.subprocess, "Popen", + lambda cmd, **kw: (popen_cmds.append(cmd), type("P", (), {"poll": lambda s: 0, "wait": lambda s, **k: None, "returncode": 0, "stdout": iter([]), "stdin": None})())[1], + ) env = _make_dummy_env(persistent_filesystem=False, task_id="ephemeral-task") assert env._container_id @@ -240,8 +217,9 @@ def test_non_persistent_cleanup_removes_container(monkeypatch): env.cleanup() - rm_calls = [c for c in run_calls if isinstance(c[0], list) and len(c[0]) >= 4 and c[0][1:4] == ["rm", "-f", container_id]] - assert len(rm_calls) >= 1, "cleanup() should run docker rm -f when container_persistent=false" + # Should have stop and rm calls via Popen + stop_cmds = [c for c in popen_cmds if container_id in str(c) and "stop" in str(c)] + assert len(stop_cmds) >= 1, f"cleanup() should schedule docker stop for {container_id}" class _FakePopen: @@ -263,10 +241,8 @@ def _make_execute_only_env(forward_env=None): env._forward_env = forward_env or [] env._prepare_command = lambda command: (command, None) env._timeout_result = lambda timeout: {"output": f"timed out after {timeout}", "returncode": 124} - env._inner = type("Inner", (), { - "container_id": "test-container", - "config": type("Cfg", (), {"executable": "/usr/bin/docker", "env": {}})(), - })() + env._container_id = "test-container" + env._docker_exe = "/usr/bin/docker" return env @@ -304,31 +280,3 @@ def test_execute_prefers_shell_env_over_hermes_dotenv(monkeypatch): assert "GITHUB_TOKEN=value_from_shell" in popen_calls[0] assert "GITHUB_TOKEN=value_from_dotenv" not in popen_calls[0] - - -def test_non_persistent_cleanup_removes_container(monkeypatch): - """When container_persistent=false, cleanup() must run docker rm -f so the container is removed (Fixes #1679).""" - run_calls = [] - - def _run(cmd, **kwargs): - run_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs)) - if cmd and getattr(cmd[0], '__str__', None) and 'docker' in str(cmd[0]): - if len(cmd) >= 2 and cmd[1] == 'run': - return subprocess.CompletedProcess(cmd, 0, stdout="abc123container\n", stderr="") - return subprocess.CompletedProcess(cmd, 0, stdout='', stderr='') - - monkeypatch.setattr(docker_env, 'find_docker', lambda: '/usr/bin/docker') - monkeypatch.setattr(docker_env.subprocess, 'run', _run) - monkeypatch.setattr(docker_env.subprocess, 'Popen', lambda *a, **k: type('P', (), {'poll': lambda: None, 'wait': lambda **kw: None, 'returncode': 0, 'stdout': iter([]), 'stdin': None})()) - - captured_run_args = [] - _install_fake_minisweagent(monkeypatch, captured_run_args) - - env = _make_dummy_env(persistent_filesystem=False, task_id='ephemeral-task') - assert env._container_id - container_id = env._container_id - - env.cleanup() - - rm_calls = [c for c in run_calls if isinstance(c[0], list) and len(c[0]) >= 4 and c[0][1:4] == ['rm', '-f', container_id]] - assert len(rm_calls) >= 1, 'cleanup() should run docker rm -f when container_persistent=false' diff --git a/tests/tools/test_modal_sandbox_fixes.py b/tests/tools/test_modal_sandbox_fixes.py index 49c30623..23dfa2f8 100644 --- a/tests/tools/test_modal_sandbox_fixes.py +++ b/tests/tools/test_modal_sandbox_fixes.py @@ -1,11 +1,11 @@ """Tests for Modal sandbox infrastructure fixes (TBLite baseline). -Covers the 9 bugs discovered while setting up TBLite evaluation: -1. Tool resolution — terminal + file tools load with minisweagent +Covers the bugs discovered while setting up TBLite evaluation: +1. Tool resolution — terminal + file tools load correctly 2. CWD fix — host paths get replaced with /root for container backends 3. ephemeral_disk version check 4. Tilde ~ replaced with /root for container backends -5. ensurepip fix in patches.py for Modal image builder +5. ensurepip fix in Modal image builder 6. install_pipx stays True for swerex-remote 7. /home/ added to host prefix check """ @@ -36,17 +36,8 @@ except ImportError: class TestToolResolution: """Verify get_tool_definitions returns all expected tools for eval.""" - def _has_minisweagent(self): - try: - import minisweagent # noqa: F401 - return True - except ImportError: - return False - def test_terminal_and_file_toolsets_resolve_all_tools(self): """enabled_toolsets=['terminal', 'file'] should produce 6 tools.""" - if not self._has_minisweagent(): - pytest.skip("minisweagent not installed (git submodule update --init)") from model_tools import get_tool_definitions tools = get_tool_definitions( enabled_toolsets=["terminal", "file"], @@ -58,18 +49,13 @@ class TestToolResolution: def test_terminal_tool_present(self): """The terminal tool must be present (not silently dropped).""" - if not self._has_minisweagent(): - pytest.skip("minisweagent not installed (git submodule update --init)") from model_tools import get_tool_definitions tools = get_tool_definitions( enabled_toolsets=["terminal", "file"], quiet_mode=True, ) names = [t["function"]["name"] for t in tools] - assert "terminal" in names, ( - f"terminal tool missing! Only got: {names}. " - "Check that minisweagent is installed (git submodule update --init)." - ) + assert "terminal" in names, f"terminal tool missing! Only got: {names}." # ========================================================================= @@ -269,38 +255,37 @@ class TestModalEnvironmentDefaults: # ========================================================================= class TestEnsurepipFix: - """Verify the pip fix is applied in the patched Modal init.""" + """Verify the pip fix is applied in the ModalEnvironment init.""" - def test_patched_init_creates_image_with_setup_commands(self): - """The patched __init__ should create a modal.Image with pip fix.""" + def test_modal_environment_creates_image_with_setup_commands(self): + """ModalEnvironment.__init__ should create a modal.Image with pip fix.""" try: - from environments.patches import _patch_swerex_modal + from tools.environments.modal import ModalEnvironment except ImportError: - pytest.skip("environments.patches not importable") + pytest.skip("tools.environments.modal not importable") - # Check that the patch code references ensurepip import inspect - source = inspect.getsource(_patch_swerex_modal) + source = inspect.getsource(ModalEnvironment.__init__) assert "ensurepip" in source, ( - "patches._patch_swerex_modal should include ensurepip fix " + "ModalEnvironment should include ensurepip fix " "for Modal's legacy image builder" ) assert "setup_dockerfile_commands" in source, ( - "patches._patch_swerex_modal should use setup_dockerfile_commands " + "ModalEnvironment should use setup_dockerfile_commands " "to fix pip before Modal's bootstrap" ) - def test_patched_init_uses_install_pipx_from_config(self): - """The patched init should respect install_pipx from config.""" + def test_modal_environment_uses_install_pipx(self): + """ModalEnvironment should pass install_pipx to ModalDeployment.""" try: - from environments.patches import _patch_swerex_modal + from tools.environments.modal import ModalEnvironment except ImportError: - pytest.skip("environments.patches not importable") + pytest.skip("tools.environments.modal not importable") import inspect - source = inspect.getsource(_patch_swerex_modal) + source = inspect.getsource(ModalEnvironment.__init__) assert "install_pipx" in source, ( - "patches._patch_swerex_modal should pass install_pipx to ModalDeployment" + "ModalEnvironment should pass install_pipx to ModalDeployment" ) diff --git a/tests/tools/test_terminal_requirements.py b/tests/tools/test_terminal_requirements.py index dfba9124..b3bc0b19 100644 --- a/tests/tools/test_terminal_requirements.py +++ b/tests/tools/test_terminal_requirements.py @@ -18,9 +18,8 @@ def _clear_terminal_env(monkeypatch): monkeypatch.delenv(key, raising=False) -def test_local_terminal_requirements_do_not_depend_on_minisweagent(monkeypatch, caplog): - """Local backend uses Hermes' own LocalEnvironment wrapper and should not - be marked unavailable just because `minisweagent` isn't importable.""" +def test_local_terminal_requirements(monkeypatch, caplog): + """Local backend uses Hermes' own LocalEnvironment wrapper.""" _clear_terminal_env(monkeypatch) monkeypatch.setenv("TERMINAL_ENV", "local") @@ -64,7 +63,7 @@ def test_modal_backend_without_token_or_config_logs_specific_error(monkeypatch, monkeypatch.setenv("TERMINAL_ENV", "modal") monkeypatch.setenv("HOME", str(tmp_path)) monkeypatch.setenv("USERPROFILE", str(tmp_path)) - monkeypatch.setattr(terminal_tool_module, "ensure_minisweagent_on_path", lambda *_args, **_kwargs: None) + # Pretend swerex is installed monkeypatch.setattr(terminal_tool_module.importlib.util, "find_spec", lambda _name: object()) with caplog.at_level(logging.ERROR): diff --git a/tests/tools/test_terminal_tool_requirements.py b/tests/tools/test_terminal_tool_requirements.py index 9c8bc8aa..5a347cc6 100644 --- a/tests/tools/test_terminal_tool_requirements.py +++ b/tests/tools/test_terminal_tool_requirements.py @@ -8,7 +8,7 @@ terminal_tool_module = importlib.import_module("tools.terminal_tool") class TestTerminalRequirements: - def test_local_backend_does_not_require_minisweagent_package(self, monkeypatch): + def test_local_backend_requirements(self, monkeypatch): monkeypatch.setattr( terminal_tool_module, "_get_env_config", diff --git a/tools/environments/docker.py b/tools/environments/docker.py index d7fd2ad7..c5546dbe 100644 --- a/tools/environments/docker.py +++ b/tools/environments/docker.py @@ -1,6 +1,6 @@ -"""Docker execution environment wrapping mini-swe-agent's DockerEnvironment. +"""Docker execution environment for sandboxed command execution. -Adds security hardening (cap-drop ALL, no-new-privileges, PID limits), +Security hardened (cap-drop ALL, no-new-privileges, PID limits), configurable resource limits (CPU, memory, disk), and optional filesystem persistence via bind mounts. """ @@ -13,6 +13,7 @@ import subprocess import sys import threading import time +import uuid from typing import Optional from tools.environments.base import BaseEnvironment @@ -227,12 +228,9 @@ class DockerEnvironment(BaseEnvironment): logger.warning(f"docker_volumes config is not a list: {volumes!r}") volumes = [] - # Fail fast if Docker is not available rather than surfacing a cryptic - # FileNotFoundError deep inside the mini-swe-agent stack. + # Fail fast if Docker is not available. _ensure_docker_available() - from minisweagent.environments.docker import DockerEnvironment as _Docker - # Build resource limit args resource_args = [] if cpu > 0: @@ -320,14 +318,28 @@ class DockerEnvironment(BaseEnvironment): # Resolve the docker executable once so it works even when # /usr/local/bin is not in PATH (common on macOS gateway/service). - docker_exe = find_docker() or "docker" + self._docker_exe = find_docker() or "docker" - self._inner = _Docker( - image=image, cwd=cwd, timeout=timeout, - run_args=all_run_args, - executable=docker_exe, + # Start the container directly via `docker run -d`. + container_name = f"hermes-{uuid.uuid4().hex[:8]}" + run_cmd = [ + self._docker_exe, "run", "-d", + "--name", container_name, + "-w", cwd, + *all_run_args, + image, + "sleep", "2h", + ] + logger.debug(f"Starting container: {' '.join(run_cmd)}") + result = subprocess.run( + run_cmd, + capture_output=True, + text=True, + timeout=120, # image pull may take a while + check=True, ) - self._container_id = self._inner.container_id + self._container_id = result.stdout.strip() + logger.info(f"Started container {container_name} ({self._container_id[:12]})") @staticmethod def _storage_opt_supported() -> bool: @@ -389,8 +401,8 @@ class DockerEnvironment(BaseEnvironment): exec_command = f"cd {work_dir} && {exec_command}" work_dir = "/" - assert self._inner.container_id, "Container not started" - cmd = [self._inner.config.executable, "exec"] + assert self._container_id, "Container not started" + cmd = [self._docker_exe, "exec"] if effective_stdin is not None: cmd.append("-i") cmd.extend(["-w", work_dir]) @@ -401,9 +413,7 @@ class DockerEnvironment(BaseEnvironment): value = hermes_env.get(key) if value is not None: cmd.extend(["-e", f"{key}={value}"]) - for key, value in self._inner.config.env.items(): - cmd.extend(["-e", f"{key}={value}"]) - cmd.extend([self._inner.container_id, "bash", "-lc", exec_command]) + cmd.extend([self._container_id, "bash", "-lc", exec_command]) try: _output_chunks = [] @@ -456,24 +466,29 @@ class DockerEnvironment(BaseEnvironment): def cleanup(self): """Stop and remove the container. Bind-mount dirs persist if persistent=True.""" - self._inner.cleanup() - - if not self._persistent and self._container_id: - # Inner cleanup only runs `docker stop` in background; container is left - # as stopped. When container_persistent=false we must remove it. - docker_exe = find_docker() or self._inner.config.executable + if self._container_id: try: - subprocess.run( - [docker_exe, "rm", "-f", self._container_id], - capture_output=True, - timeout=30, + # Stop in background so cleanup doesn't block + stop_cmd = ( + f"(timeout 60 {self._docker_exe} stop {self._container_id} || " + f"{self._docker_exe} rm -f {self._container_id}) >/dev/null 2>&1 &" ) + subprocess.Popen(stop_cmd, shell=True) except Exception as e: - logger.warning("Failed to remove non-persistent container %s: %s", self._container_id, e) + logger.warning("Failed to stop container %s: %s", self._container_id, e) + + if not self._persistent: + # Also schedule removal (stop only leaves it as stopped) + try: + subprocess.Popen( + f"sleep 3 && {self._docker_exe} rm -f {self._container_id} >/dev/null 2>&1 &", + shell=True, + ) + except Exception: + pass self._container_id = None if not self._persistent: - import shutil for d in (self._workspace_dir, self._home_dir): if d: shutil.rmtree(d, ignore_errors=True) diff --git a/tools/environments/modal.py b/tools/environments/modal.py index 56f08e9f..f8210ba7 100644 --- a/tools/environments/modal.py +++ b/tools/environments/modal.py @@ -1,14 +1,14 @@ -"""Modal cloud execution environment wrapping mini-swe-agent's SwerexModalEnvironment. +"""Modal cloud execution environment using SWE-ReX directly. Supports persistent filesystem snapshots: when enabled, the sandbox's filesystem is snapshotted on cleanup and restored on next creation, so installed packages, project files, and config changes survive across sessions. """ +import asyncio import json import logging import threading -import time import uuid from pathlib import Path from typing import Any, Dict, Optional @@ -38,15 +38,49 @@ def _save_snapshots(data: Dict[str, str]) -> None: _SNAPSHOT_STORE.write_text(json.dumps(data, indent=2)) -class ModalEnvironment(BaseEnvironment): - """Modal cloud execution via mini-swe-agent. +class _AsyncWorker: + """Background thread with its own event loop for async-safe swe-rex calls. - Wraps SwerexModalEnvironment and adds sudo -S support, configurable - resources (CPU, memory, disk), and optional filesystem persistence - via Modal's snapshot_filesystem() API. + Allows sync code to submit async coroutines and block for results, + even when called from inside another running event loop (e.g. Atropos). """ - _patches_applied = False + def __init__(self): + self._loop: Optional[asyncio.AbstractEventLoop] = None + self._thread: Optional[threading.Thread] = None + self._started = threading.Event() + + def start(self): + self._thread = threading.Thread(target=self._run_loop, daemon=True) + self._thread.start() + self._started.wait(timeout=30) + + def _run_loop(self): + self._loop = asyncio.new_event_loop() + asyncio.set_event_loop(self._loop) + self._started.set() + self._loop.run_forever() + + def run_coroutine(self, coro, timeout=600): + if self._loop is None or self._loop.is_closed(): + raise RuntimeError("AsyncWorker loop is not running") + future = asyncio.run_coroutine_threadsafe(coro, self._loop) + return future.result(timeout=timeout) + + def stop(self): + if self._loop and self._loop.is_running(): + self._loop.call_soon_threadsafe(self._loop.stop) + if self._thread: + self._thread.join(timeout=10) + + +class ModalEnvironment(BaseEnvironment): + """Modal cloud execution via SWE-ReX. + + Uses swe-rex's ModalDeployment directly for sandbox management. + Adds sudo -S support, configurable resources (CPU, memory, disk), + and optional filesystem persistence via Modal's snapshot API. + """ def __init__( self, @@ -59,17 +93,11 @@ class ModalEnvironment(BaseEnvironment): ): super().__init__(cwd=cwd, timeout=timeout) - if not ModalEnvironment._patches_applied: - try: - from environments.patches import apply_patches - apply_patches() - except ImportError: - pass - ModalEnvironment._patches_applied = True - self._persistent = persistent_filesystem self._task_id = task_id self._base_image = image + self._deployment = None + self._worker = _AsyncWorker() sandbox_kwargs = dict(modal_sandbox_kwargs or {}) @@ -88,16 +116,37 @@ class ModalEnvironment(BaseEnvironment): effective_image = restored_image if restored_image else image - from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment - self._inner = SwerexModalEnvironment( - image=effective_image, - cwd=cwd, - timeout=timeout, - startup_timeout=180.0, - runtime_timeout=3600.0, - modal_sandbox_kwargs=sandbox_kwargs, - install_pipx=True, # Required: installs pipx + swe-rex runtime (swerex-remote) - ) + # Pre-build a modal.Image with pip fix for Modal's legacy image builder. + # Some task images have broken pip; fix via ensurepip before Modal uses it. + import modal as _modal + if isinstance(effective_image, str): + effective_image = _modal.Image.from_registry( + effective_image, + setup_dockerfile_commands=[ + "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; " + "python -m ensurepip --upgrade --default-pip 2>/dev/null || true", + ], + ) + + # Start the async worker thread and create the deployment on it + # so all gRPC channels are bound to the worker's event loop. + self._worker.start() + + from swerex.deployment.modal import ModalDeployment + + async def _create_and_start(): + deployment = ModalDeployment( + image=effective_image, + startup_timeout=180.0, + runtime_timeout=3600.0, + deployment_timeout=3600.0, + install_pipx=True, + modal_sandbox_kwargs=sandbox_kwargs, + ) + await deployment.start() + return deployment + + self._deployment = self._worker.run_coroutine(_create_and_start()) def execute(self, command: str, cwd: str = "", *, timeout: int | None = None, @@ -114,21 +163,39 @@ class ModalEnvironment(BaseEnvironment): # subprocess stdin directly the way a local Popen can. When a sudo # password is present, use a shell-level pipe from printf so that the # password feeds sudo -S without appearing as an echo argument embedded - # in the shell string. The password is still visible in the remote - # sandbox's command line, but it is not exposed on the user's local - # machine — which is the primary threat being mitigated. + # in the shell string. if sudo_stdin is not None: import shlex exec_command = ( f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}" ) + from swerex.runtime.abstract import Command as RexCommand + + effective_cwd = cwd or self.cwd + effective_timeout = timeout or self.timeout + # Run in a background thread so we can poll for interrupts result_holder = {"value": None, "error": None} def _run(): try: - result_holder["value"] = self._inner.execute(exec_command, cwd=cwd, timeout=timeout) + async def _do_execute(): + return await self._deployment.runtime.execute( + RexCommand( + command=exec_command, + shell=True, + check=False, + cwd=effective_cwd, + timeout=effective_timeout, + merge_output_streams=True, + ) + ) + output = self._worker.run_coroutine(_do_execute()) + result_holder["value"] = { + "output": output.stdout, + "returncode": output.exit_code, + } except Exception as e: result_holder["error"] = e @@ -138,7 +205,10 @@ class ModalEnvironment(BaseEnvironment): t.join(timeout=0.2) if is_interrupted(): try: - self._inner.stop() + self._worker.run_coroutine( + asyncio.wait_for(self._deployment.stop(), timeout=10), + timeout=15, + ) except Exception: pass return { @@ -152,35 +222,38 @@ class ModalEnvironment(BaseEnvironment): def cleanup(self): """Snapshot the filesystem (if persistent) then stop the sandbox.""" - # Check if _inner was ever set (init may have failed) - if not hasattr(self, '_inner') or self._inner is None: + if self._deployment is None: return if self._persistent: try: - sandbox = getattr(self._inner, 'deployment', None) - sandbox = getattr(sandbox, '_sandbox', None) if sandbox else None + sandbox = getattr(self._deployment, '_sandbox', None) if sandbox: - import asyncio async def _snapshot(): img = await sandbox.snapshot_filesystem.aio() return img.object_id - try: - snapshot_id = asyncio.run(_snapshot()) - except RuntimeError: - import concurrent.futures - with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: - snapshot_id = pool.submit( - asyncio.run, _snapshot() - ).result(timeout=60) - snapshots = _load_snapshots() - snapshots[self._task_id] = snapshot_id - _save_snapshots(snapshots) - logger.info("Modal: saved filesystem snapshot %s for task %s", - snapshot_id[:20], self._task_id) + try: + snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60) + except Exception: + snapshot_id = None + + if snapshot_id: + snapshots = _load_snapshots() + snapshots[self._task_id] = snapshot_id + _save_snapshots(snapshots) + logger.info("Modal: saved filesystem snapshot %s for task %s", + snapshot_id[:20], self._task_id) except Exception as e: logger.warning("Modal: filesystem snapshot failed: %s", e) - if hasattr(self._inner, 'stop'): - self._inner.stop() + try: + self._worker.run_coroutine( + asyncio.wait_for(self._deployment.stop(), timeout=10), + timeout=15, + ) + except Exception: + pass + finally: + self._worker.stop() + self._deployment = None diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 6d1dcebe..67283e2f 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -51,13 +51,6 @@ logger = logging.getLogger(__name__) from tools.interrupt import is_interrupted, _interrupt_event -# Add mini-swe-agent to path if not installed. In git worktrees the populated -# submodule may live in the main checkout rather than the worktree itself. -from minisweagent_path import ensure_minisweagent_on_path - -ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent) - - # ============================================================================= # Custom Singularity Environment with more space # ============================================================================= @@ -1188,27 +1181,15 @@ def terminal_tool( def check_terminal_requirements() -> bool: - """Check if all requirements for the terminal tool are met. - - Important: local and singularity backends now use Hermes' own environment - wrappers directly and do not require the ``minisweagent`` Python package to - be installed. Docker and Modal still rely on mini-swe-agent internals. - """ + """Check if all requirements for the terminal tool are met.""" config = _get_env_config() env_type = config["env_type"] try: if env_type == "local": - # Local execution uses Hermes' own LocalEnvironment wrapper and does - # not depend on minisweagent being importable. return True elif env_type == "docker": - ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent) - if importlib.util.find_spec("minisweagent") is None: - logger.error("mini-swe-agent is required for docker terminal backend but is not importable") - return False - # Check if docker is available (use find_docker for macOS PATH issues) from tools.environments.docker import find_docker docker = find_docker() if not docker: @@ -1225,7 +1206,6 @@ def check_terminal_requirements() -> bool: return False elif env_type == "ssh": - # Check that host and user are configured if not config.get("ssh_host") or not config.get("ssh_user"): logger.error( "SSH backend selected but TERMINAL_SSH_HOST and TERMINAL_SSH_USER " @@ -1235,11 +1215,9 @@ def check_terminal_requirements() -> bool: return True elif env_type == "modal": - ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent) - if importlib.util.find_spec("minisweagent") is None: - logger.error("mini-swe-agent is required for modal terminal backend but is not importable") + if importlib.util.find_spec("swerex") is None: + logger.error("swe-rex is required for modal terminal backend: pip install 'swe-rex[modal]'") return False - # Check for modal token has_token = os.getenv("MODAL_TOKEN_ID") is not None has_config = Path.home().joinpath(".modal.toml").exists() if not (has_token or has_config): @@ -1269,7 +1247,7 @@ def check_terminal_requirements() -> bool: if __name__ == "__main__": # Simple test when run directly - print("Terminal Tool Module (mini-swe-agent backend)") + print("Terminal Tool Module") print("=" * 50) config = _get_env_config()