refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)

Drop the mini-swe-agent git submodule. All terminal backends now use hermes-agent's own environment implementations directly. Docker backend: - Inline the `docker run -d` container startup (was 15 lines in minisweagent's DockerEnvironment). Our wrapper already handled execute(), cleanup(), security hardening, volumes, and resource limits. Modal backend: - Import swe-rex's ModalDeployment directly instead of going through minisweagent's 90-line passthrough wrapper. - Bake the _AsyncWorker pattern (from environments/patches.py) directly into ModalEnvironment for Atropos compatibility without monkey-patching. Cleanup: - Remove minisweagent_path.py (submodule path resolution helper) - Remove submodule init/install from install.sh and setup-hermes.sh - Remove mini-swe-agent from .gitmodules - environments/patches.py is now a no-op (kept for backward compat) - terminal_tool.py no longer does sys.path hacking for minisweagent - mini_swe_runner.py guards imports (optional, for RL training only) - Update all affected tests to mock the new direct subprocess calls - Update README.md, CONTRIBUTING.md No functionality change — all Docker, Modal, local, SSH, Singularity, and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
parent 2233f764af
commit 02b38b93cb
22 changed files with 283 additions and 591 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,3 @@
 [submodule "mini-swe-agent"]
 	path = mini-swe-agent
 	url = https://github.com/SWE-agent/mini-swe-agent
 [submodule "tinker-atropos"]
 	path = tinker-atropos
 	url = https://github.com/nousresearch/tinker-atropos
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -72,8 +72,9 @@ export VIRTUAL_ENV="$(pwd)/venv"
 # Install with all extras (messaging, cron, CLI menus, dev tools)
 uv pip install -e ".[all,dev]"
-uv pip install -e "./mini-swe-agent"
+
-uv pip install -e "./tinker-atropos"
+# Optional: RL training submodule
 # git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos"
 # Optional: browser tools
 npm install
--- a/README.md
+++ b/README.md
@@ -144,16 +144,14 @@ Quick start for contributors:
 ```bash
 git clone https://github.com/NousResearch/hermes-agent.git
 cd hermes-agent
 git submodule update --init mini-swe-agent   # required terminal backend
 curl -LsSf https://astral.sh/uv/install.sh | sh
 uv venv venv --python 3.11
 source venv/bin/activate
 uv pip install -e ".[all,dev]"
 uv pip install -e "./mini-swe-agent"
 python -m pytest tests/ -q
 ```
-> **RL Training (optional):** To work on the RL/Tinker-Atropos integration, also run:
+> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
 > ```bash
 > git submodule update --init tinker-atropos
 > uv pip install -e "./tinker-atropos"
--- a/cli.py
+++ b/cli.py
@@ -31,7 +31,7 @@ from typing import List, Dict, Any, Optional
 logger = logging.getLogger(__name__)
 # Suppress startup messages for clean CLI experience
-os.environ["MSWEA_SILENT_STARTUP"] = "1"  # mini-swe-agent
+os.environ["MSWEA_SILENT_STARTUP"] = "1"  # suppress mini-swe-agent startup noise if installed
 os.environ["HERMES_QUIET"] = "1"  # Our own modules
 import yaml
@@ -78,7 +78,7 @@ _hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
 _project_env = Path(__file__).parent / '.env'
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
-# Point mini-swe-agent at ~/.hermes/ so it shares our config
+# Point mini-swe-agent at ~/.hermes/ if installed (RL training use)
 os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))
 # =============================================================================
--- a/environments/patches.py
+++ b/environments/patches.py
@@ -2,203 +2,41 @@
 Monkey patches for making hermes-agent tools work inside async frameworks (Atropos).
 Problem:
-    Some tools use asyncio.run() internally (e.g., mini-swe-agent's Modal backend,
+    Some tools use asyncio.run() internally (e.g., Modal backend via SWE-ReX,
    web_extract). This crashes when called from inside Atropos's event loop because
    asyncio.run() can't be nested.
 Solution:
-    Replace the problematic methods with versions that use a dedicated background
+    The Modal environment (tools/environments/modal.py) now uses a dedicated
-    thread with its own event loop. The calling code sees the same sync interface --
+    _AsyncWorker thread internally, making it safe for both CLI and Atropos use.
-    call a function, get a result -- but internally the async work happens on a
+    No monkey-patching is required.
    separate thread that doesn't conflict with Atropos's loop.
-    These patches are safe for normal CLI use too: when there's no running event
+    This module is kept for backward compatibility — apply_patches() is now a no-op.
    loop, the behavior is identical (the background thread approach works regardless).
 What gets patched:
    - SwerexModalEnvironment.__init__ -- creates Modal deployment on a background thread
    - SwerexModalEnvironment.execute -- runs commands on the same background thread
    - SwerexModalEnvironment.stop -- stops deployment on the background thread
 Usage:
    Call apply_patches() once at import time (done automatically by hermes_base_env.py).
-    This is idempotent -- calling it multiple times is safe.
+    This is idempotent — calling it multiple times is safe.
 """
 import asyncio
 import logging
 import threading
 from typing import Any
 logger = logging.getLogger(__name__)
 _patches_applied = False
 class _AsyncWorker:
    """
    A dedicated background thread with its own event loop.
    Allows sync code to submit async coroutines and block for results,
    even when called from inside another running event loop. Used to
    bridge sync tool interfaces with async backends (Modal, SWE-ReX).
    """
    def __init__(self):
        self._loop: asyncio.AbstractEventLoop = None
        self._thread: threading.Thread = None
        self._started = threading.Event()
    def start(self):
        """Start the background event loop thread."""
        self._thread = threading.Thread(target=self._run_loop, daemon=True)
        self._thread.start()
        self._started.wait(timeout=30)
    def _run_loop(self):
        """Background thread entry point -- runs the event loop forever."""
        self._loop = asyncio.new_event_loop()
        asyncio.set_event_loop(self._loop)
        self._started.set()
        self._loop.run_forever()
    def run_coroutine(self, coro, timeout=600):
        """
        Submit a coroutine to the background loop and block until it completes.
        Safe to call from any thread, including threads that already have
        a running event loop.
        """
        if self._loop is None or self._loop.is_closed():
            raise RuntimeError("AsyncWorker loop is not running")
        future = asyncio.run_coroutine_threadsafe(coro, self._loop)
        return future.result(timeout=timeout)
    def stop(self):
        """Stop the background event loop and join the thread."""
        if self._loop and self._loop.is_running():
            self._loop.call_soon_threadsafe(self._loop.stop)
        if self._thread:
            self._thread.join(timeout=10)
 def _patch_swerex_modal():
    """
    Monkey patch SwerexModalEnvironment to use a background thread event loop
    instead of asyncio.run(). This makes it safe to call from inside Atropos's
    async event loop.
    The patched methods have the exact same interface and behavior -- the only
    difference is HOW the async work is executed internally.
    """
    try:
        from minisweagent.environments.extra.swerex_modal import (
            SwerexModalEnvironment,
            SwerexModalEnvironmentConfig,
        )
        from swerex.deployment.modal import ModalDeployment
        from swerex.runtime.abstract import Command as RexCommand
    except ImportError:
        # mini-swe-agent or swe-rex not installed -- nothing to patch
        logger.debug("mini-swe-agent Modal backend not available, skipping patch")
        return
    # Save original methods so we can refer to config handling
    _original_init = SwerexModalEnvironment.__init__
    def _patched_init(self, **kwargs):
        """Patched __init__: creates Modal deployment on a background thread."""
        self.config = SwerexModalEnvironmentConfig(**kwargs)
        # Start a dedicated event loop thread for all Modal async operations
        self._worker = _AsyncWorker()
        self._worker.start()
        # Pre-build a modal.Image with pip fix for Modal's legacy image builder.
        # Modal requires `python -m pip` to work during image build, but some
        # task images (e.g., TBLite's broken-python) have intentionally broken pip.
        # Fix: remove stale pip dist-info and reinstall via ensurepip before Modal
        # tries to use it. This is a no-op for images where pip already works.
        import modal as _modal
        image_spec = self.config.image
        if isinstance(image_spec, str):
            image_spec = _modal.Image.from_registry(
                image_spec,
                setup_dockerfile_commands=[
                    "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
                    "python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
                ],
            )
        # Create AND start the deployment entirely on the worker's loop/thread
        # so all gRPC channels and async state are bound to that loop
        async def _create_and_start():
            deployment = ModalDeployment(
                image=image_spec,
                startup_timeout=self.config.startup_timeout,
                runtime_timeout=self.config.runtime_timeout,
                deployment_timeout=self.config.deployment_timeout,
                install_pipx=self.config.install_pipx,
                modal_sandbox_kwargs=self.config.modal_sandbox_kwargs,
            )
            await deployment.start()
            return deployment
        self.deployment = self._worker.run_coroutine(_create_and_start())
    def _patched_execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
        """Patched execute: runs commands on the background thread's loop."""
        async def _do_execute():
            return await self.deployment.runtime.execute(
                RexCommand(
                    command=command,
                    shell=True,
                    check=False,
                    cwd=cwd or self.config.cwd,
                    timeout=timeout or self.config.timeout,
                    merge_output_streams=True,
                    env=self.config.env if self.config.env else None,
                )
            )
        output = self._worker.run_coroutine(_do_execute())
        return {
            "output": output.stdout,
            "returncode": output.exit_code,
        }
    def _patched_stop(self):
        """Patched stop: stops deployment on the background thread, then stops the thread."""
        try:
            self._worker.run_coroutine(
                asyncio.wait_for(self.deployment.stop(), timeout=10),
                timeout=15,
            )
        except Exception:
            pass
        finally:
            self._worker.stop()
    # Apply the patches
    SwerexModalEnvironment.__init__ = _patched_init
    SwerexModalEnvironment.execute = _patched_execute
    SwerexModalEnvironment.stop = _patched_stop
    logger.debug("Patched SwerexModalEnvironment for async-safe operation")
 def apply_patches():
-    """
+    """Apply all monkey patches needed for Atropos compatibility.
    Apply all monkey patches needed for Atropos compatibility.
-    Safe to call multiple times -- patches are only applied once.
+    Now a no-op — Modal async safety is built directly into ModalEnvironment.
-    Safe for normal CLI use -- patched code works identically when
+    Safe to call multiple times.
    there is no running event loop.
    """
    global _patches_applied
    if _patches_applied:
        return
-    _patch_swerex_modal()
+    # Modal async-safety is now built into tools/environments/modal.py
    # via the _AsyncWorker class. No monkey-patching needed.
    logger.debug("apply_patches() called — no patches needed (async safety is built-in)")
    _patches_applied = True
--- a/1
+++ b/1
--- a/mini_swe_runner.py
+++ b/mini_swe_runner.py
@@ -42,11 +42,13 @@ from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
-# Add mini-swe-agent to path if not installed. In git worktrees the populated
+# mini-swe-agent is an optional dependency for this runner.
-# submodule may live in the main checkout rather than the worktree itself.
+# Install separately: git submodule update --init mini-swe-agent && pip install -e ./mini-swe-agent
-from minisweagent_path import ensure_minisweagent_on_path
+try:
-
+    import minisweagent  # noqa: F401
-ensure_minisweagent_on_path(Path(__file__).resolve().parent)
+    _HAS_MINISWEAGENT = True
 except ImportError:
    _HAS_MINISWEAGENT = False
 # ============================================================================
@@ -110,7 +112,10 @@ def create_environment(
    **kwargs
 ):
    """
-    Create an execution environment from mini-swe-agent.
+    Create an execution environment.
    Uses mini-swe-agent environments when available, which requires the
    mini-swe-agent submodule to be installed separately.
    Args:
        env_type: One of "local", "docker", "modal"
@@ -122,6 +127,12 @@ def create_environment(
    Returns:
        Environment instance with execute() method
    """
    if not _HAS_MINISWEAGENT:
        raise ImportError(
            "mini-swe-agent is required for mini_swe_runner.py. "
            "Install it: git submodule update --init mini-swe-agent && pip install -e ./mini-swe-agent"
        )
    if env_type == "local":
        from minisweagent.environments.local import LocalEnvironment
        return LocalEnvironment(cwd=cwd, timeout=timeout)
--- a/minisweagent_path.py
+++ b/minisweagent_path.py
@@ -1,92 +0,0 @@
 """Helpers for locating the mini-swe-agent source tree.
 Hermes often runs from git worktrees. In that layout the worktree root may have
 an empty ``mini-swe-agent/`` placeholder while the real populated submodule
 lives under the main checkout that owns the shared ``.git`` directory.
 These helpers locate a usable ``mini-swe-agent/src`` directory and optionally
 prepend it to ``sys.path`` so imports like ``import minisweagent`` work from
 both normal checkouts and worktrees.
 """
 from __future__ import annotations
 import importlib.util
 import sys
 from pathlib import Path
 from typing import Optional
 def _read_gitdir(repo_root: Path) -> Optional[Path]:
    """Resolve the gitdir referenced by ``repo_root/.git`` when it is a file."""
    git_marker = repo_root / ".git"
    if not git_marker.is_file():
        return None
    try:
        raw = git_marker.read_text(encoding="utf-8").strip()
    except OSError:
        return None
    prefix = "gitdir:"
    if not raw.lower().startswith(prefix):
        return None
    target = raw[len(prefix):].strip()
    gitdir = Path(target)
    if not gitdir.is_absolute():
        gitdir = (repo_root / gitdir).resolve()
    else:
        gitdir = gitdir.resolve()
    return gitdir
 def discover_minisweagent_src(repo_root: Optional[Path] = None) -> Optional[Path]:
    """Return the best available ``mini-swe-agent/src`` path, if any.
    Search order:
    1. Current checkout/worktree root
    2. Main checkout that owns the shared ``.git`` directory (for worktrees)
    """
    repo_root = (repo_root or Path(__file__).resolve().parent).resolve()
    candidates: list[Path] = [repo_root / "mini-swe-agent" / "src"]
    gitdir = _read_gitdir(repo_root)
    if gitdir is not None:
        # Worktree layout: <main>/.git/worktrees/<name>
        if len(gitdir.parents) >= 3 and gitdir.parent.name == "worktrees":
            candidates.append(gitdir.parents[2] / "mini-swe-agent" / "src")
        # Direct checkout with .git file pointing elsewhere
        elif gitdir.name == ".git":
            candidates.append(gitdir.parent / "mini-swe-agent" / "src")
    seen = set()
    for candidate in candidates:
        candidate = candidate.resolve()
        if candidate in seen:
            continue
        seen.add(candidate)
        if candidate.exists() and candidate.is_dir():
            return candidate
    return None
 def ensure_minisweagent_on_path(repo_root: Optional[Path] = None) -> Optional[Path]:
    """Ensure ``minisweagent`` is importable by prepending its src dir to sys.path.
    Returns the inserted/discovered path, or ``None`` if the package is already
    importable or no local source tree could be found.
    """
    if importlib.util.find_spec("minisweagent") is not None:
        return None
    src = discover_minisweagent_src(repo_root)
    if src is None:
        return None
    src_str = str(src)
    if src_str not in sys.path:
        sys.path.insert(0, src_str)
    return src
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -90,7 +90,7 @@ hermes-agent = "run_agent:main"
 hermes-acp = "acp_adapter.entry:main"
 [tool.setuptools]
-py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "mini_swe_runner", "minisweagent_path", "rl_cli", "utils"]
+py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "mini_swe_runner", "rl_cli", "utils"]
 [tool.setuptools.packages.find]
 include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "honcho_integration", "acp_adapter"]
--- a/run_agent.py
+++ b/run_agent.py
@@ -58,7 +58,7 @@ if _loaded_env_paths:
 else:
    logger.info("No .env file found. Using system environment variables.")
-# Point mini-swe-agent at ~/.hermes/ so it shares our config
+# Point mini-swe-agent at ~/.hermes/ if installed (RL training use)
 os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))
 os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -637,13 +637,6 @@ clone_repo() {
    cd "$INSTALL_DIR"
    # Only init mini-swe-agent (terminal tool backend — required).
    # tinker-atropos (RL training) is optional and heavy — users can opt in later
    # with: git submodule update --init tinker-atropos && uv pip install -e ./tinker-atropos
    log_info "Initializing mini-swe-agent submodule (terminal backend)..."
    git submodule update --init mini-swe-agent
    log_success "Submodule ready"
    log_success "Repository ready"
 }
@@ -718,18 +711,6 @@ install_deps() {
    log_success "Main package installed"
    # Install submodules
    log_info "Installing mini-swe-agent (terminal tool backend)..."
    if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
        if $UV_CMD pip install -e "./mini-swe-agent"; then
            log_success "mini-swe-agent installed"
        else
            log_warn "mini-swe-agent install failed (Docker/Modal terminal backends may not work, local terminal is unaffected)"
        fi
    else
        log_warn "mini-swe-agent not found (run: git submodule update --init)"
    fi
    # tinker-atropos (RL training) is optional — skip by default.
    # To enable RL tools: git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos"
    if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
--- a/setup-hermes.sh
+++ b/setup-hermes.sh
@@ -124,16 +124,7 @@ echo -e "${GREEN}✓${NC} Dependencies installed"
 # Submodules (terminal backend + RL training)
 # ============================================================================
-echo -e "${CYAN}→${NC} Installing submodules..."
+echo -e "${CYAN}→${NC} Installing optional submodules..."
 # mini-swe-agent (terminal tool backend)
 if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
    $UV_CMD pip install -e "./mini-swe-agent" && \
        echo -e "${GREEN}✓${NC} mini-swe-agent installed" || \
        echo -e "${YELLOW}⚠${NC} mini-swe-agent install failed (Docker/Modal terminal backends may not work, local terminal is unaffected)"
 else
    echo -e "${YELLOW}⚠${NC} mini-swe-agent not found (run: git submodule update --init --recursive)"
 fi
 # tinker-atropos (RL training backend)
 if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
--- a/tests/integration/test_modal_terminal.py
+++ b/tests/integration/test_modal_terminal.py
@@ -41,7 +41,6 @@ except ImportError:
 # Add project root to path for imports
 parent_dir = Path(__file__).parent.parent.parent
 sys.path.insert(0, str(parent_dir))
 sys.path.insert(0, str(parent_dir / "mini-swe-agent" / "src"))
 # Import terminal_tool module directly using importlib to avoid tools/__init__.py
 import importlib.util
--- a/tests/test_minisweagent_path.py
+++ b/tests/test_minisweagent_path.py
@@ -1,34 +1,5 @@
-"""Tests for minisweagent_path.py."""
+"""Tests for minisweagent_path.py — REMOVED.
-from pathlib import Path
+minisweagent_path.py was removed as part of dropping the mini-swe-agent
-
+dependency. These tests are no longer applicable.
-from minisweagent_path import discover_minisweagent_src
+"""
 def test_discover_minisweagent_src_in_current_checkout(tmp_path):
    repo = tmp_path / "repo"
    src = repo / "mini-swe-agent" / "src"
    src.mkdir(parents=True)
    assert discover_minisweagent_src(repo) == src.resolve()
 def test_discover_minisweagent_src_falls_back_from_worktree_to_main_checkout(tmp_path):
    main_repo = tmp_path / "main-repo"
    (main_repo / ".git" / "worktrees" / "wt1").mkdir(parents=True)
    main_src = main_repo / "mini-swe-agent" / "src"
    main_src.mkdir(parents=True)
    worktree = tmp_path / "worktree"
    worktree.mkdir()
    (worktree / ".git").write_text(f"gitdir: {main_repo / '.git' / 'worktrees' / 'wt1'}\n", encoding="utf-8")
    (worktree / "mini-swe-agent").mkdir()  # empty placeholder, no src/
    assert discover_minisweagent_src(worktree) == main_src.resolve()
 def test_discover_minisweagent_src_returns_none_when_missing(tmp_path):
    repo = tmp_path / "repo"
    repo.mkdir()
    assert discover_minisweagent_src(repo) is None
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@@ -131,9 +131,9 @@ class TestExecuteCode(unittest.TestCase):
    def test_repo_root_modules_are_importable(self):
        """Sandboxed scripts can import modules that live at the repo root."""
-        result = self._run('import minisweagent_path; print(minisweagent_path.__file__)')
+        result = self._run('import hermes_constants; print(hermes_constants.__file__)')
        self.assertEqual(result["status"], "success")
-        self.assertIn("minisweagent_path.py", result["output"])
+        self.assertIn("hermes_constants.py", result["output"])
    def test_single_tool_call(self):
        """Script calls terminal and prints the result."""
--- a/tests/tools/test_docker_environment.py
+++ b/tests/tools/test_docker_environment.py
@@ -9,25 +9,24 @@ import pytest
 from tools.environments import docker as docker_env
-def _install_fake_minisweagent(monkeypatch, captured_run_args):
+def _mock_subprocess_run(monkeypatch):
-    class MockInnerDocker:
+    """Mock subprocess.run to intercept docker run -d and docker version calls.
        container_id = "fake-container"
        config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})()
-        def __init__(self, **kwargs):
+    Returns a list of captured (cmd, kwargs) tuples for inspection.
-            captured_run_args.extend(kwargs.get("run_args", []))
+    """
    calls = []
-        def cleanup(self):
+    def _run(cmd, **kwargs):
-            pass
+        calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
        if isinstance(cmd, list) and len(cmd) >= 2:
            if cmd[1] == "version":
                return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
            if cmd[1] == "run":
                return subprocess.CompletedProcess(cmd, 0, stdout="fake-container-id\n", stderr="")
        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
-    minisweagent_mod = types.ModuleType("minisweagent")
+    monkeypatch.setattr(docker_env.subprocess, "run", _run)
-    environments_mod = types.ModuleType("minisweagent.environments")
+    return calls
    docker_mod = types.ModuleType("minisweagent.environments.docker")
    docker_mod.DockerEnvironment = MockInnerDocker
    monkeypatch.setitem(sys.modules, "minisweagent", minisweagent_mod)
    monkeypatch.setitem(sys.modules, "minisweagent.environments", environments_mod)
    monkeypatch.setitem(sys.modules, "minisweagent.environments.docker", docker_mod)
 def _make_dummy_env(**kwargs):
@@ -49,7 +48,7 @@ def _make_dummy_env(**kwargs):
 def test_ensure_docker_available_logs_and_raises_when_not_found(monkeypatch, caplog):
-    """When docker cannot be found, raise a clear error before mini-swe setup."""
+    """When docker cannot be found, raise a clear error before container setup."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: None)
    monkeypatch.setattr(
@@ -118,14 +117,8 @@ def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path):
    project_dir = tmp_path / "my-project"
    project_dir.mkdir()
    def _run_docker_version(*args, **kwargs):
        return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
-    monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
+    calls = _mock_subprocess_run(monkeypatch)
    captured_run_args = []
    _install_fake_minisweagent(monkeypatch, captured_run_args)
    _make_dummy_env(
        cwd="/workspace",
@@ -133,7 +126,10 @@ def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path):
        auto_mount_cwd=True,
    )
-    run_args_str = " ".join(captured_run_args)
+    # Find the docker run call and check its args
    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert run_calls, "docker run should have been called"
    run_args_str = " ".join(run_calls[0][0])
    assert f"{project_dir}:/workspace" in run_args_str
@@ -142,14 +138,8 @@ def test_auto_mount_disabled_by_default(monkeypatch, tmp_path):
    project_dir = tmp_path / "my-project"
    project_dir.mkdir()
    def _run_docker_version(*args, **kwargs):
        return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
-    monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
+    calls = _mock_subprocess_run(monkeypatch)
    captured_run_args = []
    _install_fake_minisweagent(monkeypatch, captured_run_args)
    _make_dummy_env(
        cwd="/root",
@@ -157,7 +147,9 @@ def test_auto_mount_disabled_by_default(monkeypatch, tmp_path):
        auto_mount_cwd=False,
    )
-    run_args_str = " ".join(captured_run_args)
+    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert run_calls, "docker run should have been called"
    run_args_str = " ".join(run_calls[0][0])
    assert f"{project_dir}:/workspace" not in run_args_str
@@ -168,14 +160,8 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path
    other_dir = tmp_path / "other"
    other_dir.mkdir()
    def _run_docker_version(*args, **kwargs):
        return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
-    monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
+    calls = _mock_subprocess_run(monkeypatch)
    captured_run_args = []
    _install_fake_minisweagent(monkeypatch, captured_run_args)
    _make_dummy_env(
        cwd="/workspace",
@@ -184,7 +170,9 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path
        volumes=[f"{other_dir}:/workspace"],
    )
-    run_args_str = " ".join(captured_run_args)
+    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert run_calls, "docker run should have been called"
    run_args_str = " ".join(run_calls[0][0])
    assert f"{other_dir}:/workspace" in run_args_str
    assert run_args_str.count(":/workspace") == 1
@@ -194,14 +182,8 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
    project_dir = tmp_path / "my-project"
    project_dir.mkdir()
    def _run_docker_version(*args, **kwargs):
        return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
-    monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
+    calls = _mock_subprocess_run(monkeypatch)
    captured_run_args = []
    _install_fake_minisweagent(monkeypatch, captured_run_args)
    _make_dummy_env(
        cwd="/workspace",
@@ -211,28 +193,23 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
        task_id="test-persistent-auto-mount",
    )
-    run_args_str = " ".join(captured_run_args)
+    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
    assert run_calls, "docker run should have been called"
    run_args_str = " ".join(run_calls[0][0])
    assert f"{project_dir}:/workspace" in run_args_str
    assert "/sandboxes/docker/test-persistent-auto-mount/workspace:/workspace" not in run_args_str
 def test_non_persistent_cleanup_removes_container(monkeypatch):
-    """When container_persistent=false, cleanup() must run docker rm -f so the container is removed (Fixes #1679)."""
+    """When persistent=false, cleanup() must schedule docker stop + rm."""
    run_calls = []
    def _run(cmd, **kwargs):
        run_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
        if cmd and getattr(cmd[0], "__str__", None) and "docker" in str(cmd[0]):
            if len(cmd) >= 2 and cmd[1] == "run":
                return subprocess.CompletedProcess(cmd, 0, stdout="abc123container\n", stderr="")
        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
-    monkeypatch.setattr(docker_env.subprocess, "run", _run)
+    calls = _mock_subprocess_run(monkeypatch)
    monkeypatch.setattr(docker_env.subprocess, "Popen", lambda *a, **k: type("P", (), {"poll": lambda: None, "wait": lambda **kw: None, "returncode": 0, "stdout": iter([]), "stdin": None})())
-    captured_run_args = []
+    popen_cmds = []
-    _install_fake_minisweagent(monkeypatch, captured_run_args)
+    monkeypatch.setattr(
        docker_env.subprocess, "Popen",
        lambda cmd, **kw: (popen_cmds.append(cmd), type("P", (), {"poll": lambda s: 0, "wait": lambda s, **k: None, "returncode": 0, "stdout": iter([]), "stdin": None})())[1],
    )
    env = _make_dummy_env(persistent_filesystem=False, task_id="ephemeral-task")
    assert env._container_id
@@ -240,8 +217,9 @@ def test_non_persistent_cleanup_removes_container(monkeypatch):
    env.cleanup()
-    rm_calls = [c for c in run_calls if isinstance(c[0], list) and len(c[0]) >= 4 and c[0][1:4] == ["rm", "-f", container_id]]
+    # Should have stop and rm calls via Popen
-    assert len(rm_calls) >= 1, "cleanup() should run docker rm -f <container_id> when container_persistent=false"
+    stop_cmds = [c for c in popen_cmds if container_id in str(c) and "stop" in str(c)]
    assert len(stop_cmds) >= 1, f"cleanup() should schedule docker stop for {container_id}"
 class _FakePopen:
@@ -263,10 +241,8 @@ def _make_execute_only_env(forward_env=None):
    env._forward_env = forward_env or []
    env._prepare_command = lambda command: (command, None)
    env._timeout_result = lambda timeout: {"output": f"timed out after {timeout}", "returncode": 124}
-    env._inner = type("Inner", (), {
+    env._container_id = "test-container"
-        "container_id": "test-container",
+    env._docker_exe = "/usr/bin/docker"
        "config": type("Cfg", (), {"executable": "/usr/bin/docker", "env": {}})(),
    })()
    return env
@@ -304,31 +280,3 @@ def test_execute_prefers_shell_env_over_hermes_dotenv(monkeypatch):
    assert "GITHUB_TOKEN=value_from_shell" in popen_calls[0]
    assert "GITHUB_TOKEN=value_from_dotenv" not in popen_calls[0]
 def test_non_persistent_cleanup_removes_container(monkeypatch):
    """When container_persistent=false, cleanup() must run docker rm -f so the container is removed (Fixes #1679)."""
    run_calls = []
    def _run(cmd, **kwargs):
        run_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
        if cmd and getattr(cmd[0], '__str__', None) and 'docker' in str(cmd[0]):
            if len(cmd) >= 2 and cmd[1] == 'run':
                return subprocess.CompletedProcess(cmd, 0, stdout="abc123container\n", stderr="")
        return subprocess.CompletedProcess(cmd, 0, stdout='', stderr='')
    monkeypatch.setattr(docker_env, 'find_docker', lambda: '/usr/bin/docker')
    monkeypatch.setattr(docker_env.subprocess, 'run', _run)
    monkeypatch.setattr(docker_env.subprocess, 'Popen', lambda *a, **k: type('P', (), {'poll': lambda: None, 'wait': lambda **kw: None, 'returncode': 0, 'stdout': iter([]), 'stdin': None})())
    captured_run_args = []
    _install_fake_minisweagent(monkeypatch, captured_run_args)
    env = _make_dummy_env(persistent_filesystem=False, task_id='ephemeral-task')
    assert env._container_id
    container_id = env._container_id
    env.cleanup()
    rm_calls = [c for c in run_calls if isinstance(c[0], list) and len(c[0]) >= 4 and c[0][1:4] == ['rm', '-f', container_id]]
    assert len(rm_calls) >= 1, 'cleanup() should run docker rm -f <container_id> when container_persistent=false'
--- a/tests/tools/test_modal_sandbox_fixes.py
+++ b/tests/tools/test_modal_sandbox_fixes.py
@@ -1,11 +1,11 @@
 """Tests for Modal sandbox infrastructure fixes (TBLite baseline).
-Covers the 9 bugs discovered while setting up TBLite evaluation:
+Covers the bugs discovered while setting up TBLite evaluation:
-1. Tool resolution — terminal + file tools load with minisweagent
+1. Tool resolution — terminal + file tools load correctly
 2. CWD fix — host paths get replaced with /root for container backends
 3. ephemeral_disk version check
 4. Tilde ~ replaced with /root for container backends
-5. ensurepip fix in patches.py for Modal image builder
+5. ensurepip fix in Modal image builder
 6. install_pipx stays True for swerex-remote
 7. /home/ added to host prefix check
 """
@@ -36,17 +36,8 @@ except ImportError:
 class TestToolResolution:
    """Verify get_tool_definitions returns all expected tools for eval."""
    def _has_minisweagent(self):
        try:
            import minisweagent  # noqa: F401
            return True
        except ImportError:
            return False
    def test_terminal_and_file_toolsets_resolve_all_tools(self):
        """enabled_toolsets=['terminal', 'file'] should produce 6 tools."""
        if not self._has_minisweagent():
            pytest.skip("minisweagent not installed (git submodule update --init)")
        from model_tools import get_tool_definitions
        tools = get_tool_definitions(
            enabled_toolsets=["terminal", "file"],
@@ -58,18 +49,13 @@ class TestToolResolution:
    def test_terminal_tool_present(self):
        """The terminal tool must be present (not silently dropped)."""
        if not self._has_minisweagent():
            pytest.skip("minisweagent not installed (git submodule update --init)")
        from model_tools import get_tool_definitions
        tools = get_tool_definitions(
            enabled_toolsets=["terminal", "file"],
            quiet_mode=True,
        )
        names = [t["function"]["name"] for t in tools]
-        assert "terminal" in names, (
+        assert "terminal" in names, f"terminal tool missing! Only got: {names}."
            f"terminal tool missing! Only got: {names}. "
            "Check that minisweagent is installed (git submodule update --init)."
        )
 # =========================================================================
@@ -269,38 +255,37 @@ class TestModalEnvironmentDefaults:
 # =========================================================================
 class TestEnsurepipFix:
-    """Verify the pip fix is applied in the patched Modal init."""
+    """Verify the pip fix is applied in the ModalEnvironment init."""
-    def test_patched_init_creates_image_with_setup_commands(self):
+    def test_modal_environment_creates_image_with_setup_commands(self):
-        """The patched __init__ should create a modal.Image with pip fix."""
+        """ModalEnvironment.__init__ should create a modal.Image with pip fix."""
        try:
-            from environments.patches import _patch_swerex_modal
+            from tools.environments.modal import ModalEnvironment
        except ImportError:
-            pytest.skip("environments.patches not importable")
+            pytest.skip("tools.environments.modal not importable")
        # Check that the patch code references ensurepip
        import inspect
-        source = inspect.getsource(_patch_swerex_modal)
+        source = inspect.getsource(ModalEnvironment.__init__)
        assert "ensurepip" in source, (
-            "patches._patch_swerex_modal should include ensurepip fix "
+            "ModalEnvironment should include ensurepip fix "
            "for Modal's legacy image builder"
        )
        assert "setup_dockerfile_commands" in source, (
-            "patches._patch_swerex_modal should use setup_dockerfile_commands "
+            "ModalEnvironment should use setup_dockerfile_commands "
            "to fix pip before Modal's bootstrap"
        )
-    def test_patched_init_uses_install_pipx_from_config(self):
+    def test_modal_environment_uses_install_pipx(self):
-        """The patched init should respect install_pipx from config."""
+        """ModalEnvironment should pass install_pipx to ModalDeployment."""
        try:
-            from environments.patches import _patch_swerex_modal
+            from tools.environments.modal import ModalEnvironment
        except ImportError:
-            pytest.skip("environments.patches not importable")
+            pytest.skip("tools.environments.modal not importable")
        import inspect
-        source = inspect.getsource(_patch_swerex_modal)
+        source = inspect.getsource(ModalEnvironment.__init__)
        assert "install_pipx" in source, (
-            "patches._patch_swerex_modal should pass install_pipx to ModalDeployment"
+            "ModalEnvironment should pass install_pipx to ModalDeployment"
        )
--- a/tests/tools/test_terminal_requirements.py
+++ b/tests/tools/test_terminal_requirements.py
@@ -18,9 +18,8 @@ def _clear_terminal_env(monkeypatch):
        monkeypatch.delenv(key, raising=False)
-def test_local_terminal_requirements_do_not_depend_on_minisweagent(monkeypatch, caplog):
+def test_local_terminal_requirements(monkeypatch, caplog):
-    """Local backend uses Hermes' own LocalEnvironment wrapper and should not
+    """Local backend uses Hermes' own LocalEnvironment wrapper."""
    be marked unavailable just because `minisweagent` isn't importable."""
    _clear_terminal_env(monkeypatch)
    monkeypatch.setenv("TERMINAL_ENV", "local")
@@ -64,7 +63,7 @@ def test_modal_backend_without_token_or_config_logs_specific_error(monkeypatch,
    monkeypatch.setenv("TERMINAL_ENV", "modal")
    monkeypatch.setenv("HOME", str(tmp_path))
    monkeypatch.setenv("USERPROFILE", str(tmp_path))
-    monkeypatch.setattr(terminal_tool_module, "ensure_minisweagent_on_path", lambda *_args, **_kwargs: None)
+    # Pretend swerex is installed
    monkeypatch.setattr(terminal_tool_module.importlib.util, "find_spec", lambda _name: object())
    with caplog.at_level(logging.ERROR):
--- a/tests/tools/test_terminal_tool_requirements.py
+++ b/tests/tools/test_terminal_tool_requirements.py
@@ -8,7 +8,7 @@ terminal_tool_module = importlib.import_module("tools.terminal_tool")
 class TestTerminalRequirements:
-    def test_local_backend_does_not_require_minisweagent_package(self, monkeypatch):
+    def test_local_backend_requirements(self, monkeypatch):
        monkeypatch.setattr(
            terminal_tool_module,
            "_get_env_config",
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@@ -1,6 +1,6 @@
-"""Docker execution environment wrapping mini-swe-agent's DockerEnvironment.
+"""Docker execution environment for sandboxed command execution.
-Adds security hardening (cap-drop ALL, no-new-privileges, PID limits),
+Security hardened (cap-drop ALL, no-new-privileges, PID limits),
 configurable resource limits (CPU, memory, disk), and optional filesystem
 persistence via bind mounts.
 """
@@ -13,6 +13,7 @@ import subprocess
 import sys
 import threading
 import time
 import uuid
 from typing import Optional
 from tools.environments.base import BaseEnvironment
@@ -227,12 +228,9 @@ class DockerEnvironment(BaseEnvironment):
            logger.warning(f"docker_volumes config is not a list: {volumes!r}")
            volumes = []
-        # Fail fast if Docker is not available rather than surfacing a cryptic
+        # Fail fast if Docker is not available.
        # FileNotFoundError deep inside the mini-swe-agent stack.
        _ensure_docker_available()
        from minisweagent.environments.docker import DockerEnvironment as _Docker
        # Build resource limit args
        resource_args = []
        if cpu > 0:
@@ -320,14 +318,28 @@ class DockerEnvironment(BaseEnvironment):
        # Resolve the docker executable once so it works even when
        # /usr/local/bin is not in PATH (common on macOS gateway/service).
-        docker_exe = find_docker() or "docker"
+        self._docker_exe = find_docker() or "docker"
-        self._inner = _Docker(
+        # Start the container directly via `docker run -d`.
-            image=image, cwd=cwd, timeout=timeout,
+        container_name = f"hermes-{uuid.uuid4().hex[:8]}"
-            run_args=all_run_args,
+        run_cmd = [
-            executable=docker_exe,
+            self._docker_exe, "run", "-d",
            "--name", container_name,
            "-w", cwd,
            *all_run_args,
            image,
            "sleep", "2h",
        ]
        logger.debug(f"Starting container: {' '.join(run_cmd)}")
        result = subprocess.run(
            run_cmd,
            capture_output=True,
            text=True,
            timeout=120,  # image pull may take a while
            check=True,
        )
-        self._container_id = self._inner.container_id
+        self._container_id = result.stdout.strip()
        logger.info(f"Started container {container_name} ({self._container_id[:12]})")
    @staticmethod
    def _storage_opt_supported() -> bool:
@@ -389,8 +401,8 @@ class DockerEnvironment(BaseEnvironment):
            exec_command = f"cd {work_dir} && {exec_command}"
            work_dir = "/"
-        assert self._inner.container_id, "Container not started"
+        assert self._container_id, "Container not started"
-        cmd = [self._inner.config.executable, "exec"]
+        cmd = [self._docker_exe, "exec"]
        if effective_stdin is not None:
            cmd.append("-i")
        cmd.extend(["-w", work_dir])
@@ -401,9 +413,7 @@ class DockerEnvironment(BaseEnvironment):
                value = hermes_env.get(key)
            if value is not None:
                cmd.extend(["-e", f"{key}={value}"])
-        for key, value in self._inner.config.env.items():
+        cmd.extend([self._container_id, "bash", "-lc", exec_command])
            cmd.extend(["-e", f"{key}={value}"])
        cmd.extend([self._inner.container_id, "bash", "-lc", exec_command])
        try:
            _output_chunks = []
@@ -456,24 +466,29 @@ class DockerEnvironment(BaseEnvironment):
    def cleanup(self):
        """Stop and remove the container. Bind-mount dirs persist if persistent=True."""
-        self._inner.cleanup()
+        if self._container_id:
        if not self._persistent and self._container_id:
            # Inner cleanup only runs `docker stop` in background; container is left
            # as stopped. When container_persistent=false we must remove it.
            docker_exe = find_docker() or self._inner.config.executable
            try:
-                subprocess.run(
+                # Stop in background so cleanup doesn't block
-                    [docker_exe, "rm", "-f", self._container_id],
+                stop_cmd = (
-                    capture_output=True,
+                    f"(timeout 60 {self._docker_exe} stop {self._container_id} || "
-                    timeout=30,
+                    f"{self._docker_exe} rm -f {self._container_id}) >/dev/null 2>&1 &"
                )
                subprocess.Popen(stop_cmd, shell=True)
            except Exception as e:
-                logger.warning("Failed to remove non-persistent container %s: %s", self._container_id, e)
+                logger.warning("Failed to stop container %s: %s", self._container_id, e)
            if not self._persistent:
                # Also schedule removal (stop only leaves it as stopped)
                try:
                    subprocess.Popen(
                        f"sleep 3 && {self._docker_exe} rm -f {self._container_id} >/dev/null 2>&1 &",
                        shell=True,
                    )
                except Exception:
                    pass
            self._container_id = None
        if not self._persistent:
            import shutil
            for d in (self._workspace_dir, self._home_dir):
                if d:
                    shutil.rmtree(d, ignore_errors=True)
--- a/tools/environments/modal.py
+++ b/tools/environments/modal.py
@@ -1,14 +1,14 @@
-"""Modal cloud execution environment wrapping mini-swe-agent's SwerexModalEnvironment.
+"""Modal cloud execution environment using SWE-ReX directly.
 Supports persistent filesystem snapshots: when enabled, the sandbox's filesystem
 is snapshotted on cleanup and restored on next creation, so installed packages,
 project files, and config changes survive across sessions.
 """
 import asyncio
 import json
 import logging
 import threading
 import time
 import uuid
 from pathlib import Path
 from typing import Any, Dict, Optional
@@ -38,15 +38,49 @@ def _save_snapshots(data: Dict[str, str]) -> None:
    _SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))
-class ModalEnvironment(BaseEnvironment):
+class _AsyncWorker:
-    """Modal cloud execution via mini-swe-agent.
+    """Background thread with its own event loop for async-safe swe-rex calls.
-    Wraps SwerexModalEnvironment and adds sudo -S support, configurable
+    Allows sync code to submit async coroutines and block for results,
-    resources (CPU, memory, disk), and optional filesystem persistence
+    even when called from inside another running event loop (e.g. Atropos).
    via Modal's snapshot_filesystem() API.
    """
-    _patches_applied = False
+    def __init__(self):
        self._loop: Optional[asyncio.AbstractEventLoop] = None
        self._thread: Optional[threading.Thread] = None
        self._started = threading.Event()
    def start(self):
        self._thread = threading.Thread(target=self._run_loop, daemon=True)
        self._thread.start()
        self._started.wait(timeout=30)
    def _run_loop(self):
        self._loop = asyncio.new_event_loop()
        asyncio.set_event_loop(self._loop)
        self._started.set()
        self._loop.run_forever()
    def run_coroutine(self, coro, timeout=600):
        if self._loop is None or self._loop.is_closed():
            raise RuntimeError("AsyncWorker loop is not running")
        future = asyncio.run_coroutine_threadsafe(coro, self._loop)
        return future.result(timeout=timeout)
    def stop(self):
        if self._loop and self._loop.is_running():
            self._loop.call_soon_threadsafe(self._loop.stop)
        if self._thread:
            self._thread.join(timeout=10)
 class ModalEnvironment(BaseEnvironment):
    """Modal cloud execution via SWE-ReX.
    Uses swe-rex's ModalDeployment directly for sandbox management.
    Adds sudo -S support, configurable resources (CPU, memory, disk),
    and optional filesystem persistence via Modal's snapshot API.
    """
    def __init__(
        self,
@@ -59,17 +93,11 @@ class ModalEnvironment(BaseEnvironment):
    ):
        super().__init__(cwd=cwd, timeout=timeout)
        if not ModalEnvironment._patches_applied:
            try:
                from environments.patches import apply_patches
                apply_patches()
            except ImportError:
                pass
            ModalEnvironment._patches_applied = True
        self._persistent = persistent_filesystem
        self._task_id = task_id
        self._base_image = image
        self._deployment = None
        self._worker = _AsyncWorker()
        sandbox_kwargs = dict(modal_sandbox_kwargs or {})
@@ -88,16 +116,37 @@ class ModalEnvironment(BaseEnvironment):
        effective_image = restored_image if restored_image else image
-        from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
+        # Pre-build a modal.Image with pip fix for Modal's legacy image builder.
-        self._inner = SwerexModalEnvironment(
+        # Some task images have broken pip; fix via ensurepip before Modal uses it.
-            image=effective_image,
+        import modal as _modal
-            cwd=cwd,
+        if isinstance(effective_image, str):
-            timeout=timeout,
+            effective_image = _modal.Image.from_registry(
-            startup_timeout=180.0,
+                effective_image,
-            runtime_timeout=3600.0,
+                setup_dockerfile_commands=[
-            modal_sandbox_kwargs=sandbox_kwargs,
+                    "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
-            install_pipx=True,  # Required: installs pipx + swe-rex runtime (swerex-remote)
+                    "python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
-        )
+                ],
            )
        # Start the async worker thread and create the deployment on it
        # so all gRPC channels are bound to the worker's event loop.
        self._worker.start()
        from swerex.deployment.modal import ModalDeployment
        async def _create_and_start():
            deployment = ModalDeployment(
                image=effective_image,
                startup_timeout=180.0,
                runtime_timeout=3600.0,
                deployment_timeout=3600.0,
                install_pipx=True,
                modal_sandbox_kwargs=sandbox_kwargs,
            )
            await deployment.start()
            return deployment
        self._deployment = self._worker.run_coroutine(_create_and_start())
    def execute(self, command: str, cwd: str = "", *,
                timeout: int | None = None,
@@ -114,21 +163,39 @@ class ModalEnvironment(BaseEnvironment):
        # subprocess stdin directly the way a local Popen can.  When a sudo
        # password is present, use a shell-level pipe from printf so that the
        # password feeds sudo -S without appearing as an echo argument embedded
-        # in the shell string.  The password is still visible in the remote
+        # in the shell string.
        # sandbox's command line, but it is not exposed on the user's local
        # machine — which is the primary threat being mitigated.
        if sudo_stdin is not None:
            import shlex
            exec_command = (
                f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
            )
        from swerex.runtime.abstract import Command as RexCommand
        effective_cwd = cwd or self.cwd
        effective_timeout = timeout or self.timeout
        # Run in a background thread so we can poll for interrupts
        result_holder = {"value": None, "error": None}
        def _run():
            try:
-                result_holder["value"] = self._inner.execute(exec_command, cwd=cwd, timeout=timeout)
+                async def _do_execute():
                    return await self._deployment.runtime.execute(
                        RexCommand(
                            command=exec_command,
                            shell=True,
                            check=False,
                            cwd=effective_cwd,
                            timeout=effective_timeout,
                            merge_output_streams=True,
                        )
                    )
                output = self._worker.run_coroutine(_do_execute())
                result_holder["value"] = {
                    "output": output.stdout,
                    "returncode": output.exit_code,
                }
            except Exception as e:
                result_holder["error"] = e
@@ -138,7 +205,10 @@ class ModalEnvironment(BaseEnvironment):
            t.join(timeout=0.2)
            if is_interrupted():
                try:
-                    self._inner.stop()
+                    self._worker.run_coroutine(
                        asyncio.wait_for(self._deployment.stop(), timeout=10),
                        timeout=15,
                    )
                except Exception:
                    pass
                return {
@@ -152,35 +222,38 @@ class ModalEnvironment(BaseEnvironment):
    def cleanup(self):
        """Snapshot the filesystem (if persistent) then stop the sandbox."""
-        # Check if _inner was ever set (init may have failed)
+        if self._deployment is None:
        if not hasattr(self, '_inner') or self._inner is None:
            return
        if self._persistent:
            try:
-                sandbox = getattr(self._inner, 'deployment', None)
+                sandbox = getattr(self._deployment, '_sandbox', None)
                sandbox = getattr(sandbox, '_sandbox', None) if sandbox else None
                if sandbox:
                    import asyncio
                    async def _snapshot():
                        img = await sandbox.snapshot_filesystem.aio()
                        return img.object_id
                    try:
                        snapshot_id = asyncio.run(_snapshot())
                    except RuntimeError:
                        import concurrent.futures
                        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
                            snapshot_id = pool.submit(
                                asyncio.run, _snapshot()
                            ).result(timeout=60)
-                    snapshots = _load_snapshots()
+                    try:
-                    snapshots[self._task_id] = snapshot_id
+                        snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60)
-                    _save_snapshots(snapshots)
+                    except Exception:
-                    logger.info("Modal: saved filesystem snapshot %s for task %s",
+                        snapshot_id = None
-                                snapshot_id[:20], self._task_id)
+
                    if snapshot_id:
                        snapshots = _load_snapshots()
                        snapshots[self._task_id] = snapshot_id
                        _save_snapshots(snapshots)
                        logger.info("Modal: saved filesystem snapshot %s for task %s",
                                    snapshot_id[:20], self._task_id)
            except Exception as e:
                logger.warning("Modal: filesystem snapshot failed: %s", e)
-        if hasattr(self._inner, 'stop'):
+        try:
-            self._inner.stop()
+            self._worker.run_coroutine(
                asyncio.wait_for(self._deployment.stop(), timeout=10),
                timeout=15,
            )
        except Exception:
            pass
        finally:
            self._worker.stop()
            self._deployment = None
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -51,13 +51,6 @@ logger = logging.getLogger(__name__)
 from tools.interrupt import is_interrupted, _interrupt_event
 # Add mini-swe-agent to path if not installed. In git worktrees the populated
 # submodule may live in the main checkout rather than the worktree itself.
 from minisweagent_path import ensure_minisweagent_on_path
 ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
 # =============================================================================
 # Custom Singularity Environment with more space
 # =============================================================================
@@ -1188,27 +1181,15 @@ def terminal_tool(
 def check_terminal_requirements() -> bool:
-    """Check if all requirements for the terminal tool are met.
+    """Check if all requirements for the terminal tool are met."""
    Important: local and singularity backends now use Hermes' own environment
    wrappers directly and do not require the ``minisweagent`` Python package to
    be installed. Docker and Modal still rely on mini-swe-agent internals.
    """
    config = _get_env_config()
    env_type = config["env_type"]
    try:
        if env_type == "local":
            # Local execution uses Hermes' own LocalEnvironment wrapper and does
            # not depend on minisweagent being importable.
            return True
        elif env_type == "docker":
            ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
            if importlib.util.find_spec("minisweagent") is None:
                logger.error("mini-swe-agent is required for docker terminal backend but is not importable")
                return False
            # Check if docker is available (use find_docker for macOS PATH issues)
            from tools.environments.docker import find_docker
            docker = find_docker()
            if not docker:
@@ -1225,7 +1206,6 @@ def check_terminal_requirements() -> bool:
            return False
        elif env_type == "ssh":
            # Check that host and user are configured
            if not config.get("ssh_host") or not config.get("ssh_user"):
                logger.error(
                    "SSH backend selected but TERMINAL_SSH_HOST and TERMINAL_SSH_USER "
@@ -1235,11 +1215,9 @@ def check_terminal_requirements() -> bool:
            return True
        elif env_type == "modal":
-            ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
+            if importlib.util.find_spec("swerex") is None:
-            if importlib.util.find_spec("minisweagent") is None:
+                logger.error("swe-rex is required for modal terminal backend: pip install 'swe-rex[modal]'")
                logger.error("mini-swe-agent is required for modal terminal backend but is not importable")
                return False
            # Check for modal token
            has_token = os.getenv("MODAL_TOKEN_ID") is not None
            has_config = Path.home().joinpath(".modal.toml").exists()
            if not (has_token or has_config):
@@ -1269,7 +1247,7 @@ def check_terminal_requirements() -> bool:
 if __name__ == "__main__":
    # Simple test when run directly
-    print("Terminal Tool Module (mini-swe-agent backend)")
+    print("Terminal Tool Module")
    print("=" * 50)
    config = _get_env_config()