refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use hermes-agent's own environment implementations directly. Docker backend: - Inline the `docker run -d` container startup (was 15 lines in minisweagent's DockerEnvironment). Our wrapper already handled execute(), cleanup(), security hardening, volumes, and resource limits. Modal backend: - Import swe-rex's ModalDeployment directly instead of going through minisweagent's 90-line passthrough wrapper. - Bake the _AsyncWorker pattern (from environments/patches.py) directly into ModalEnvironment for Atropos compatibility without monkey-patching. Cleanup: - Remove minisweagent_path.py (submodule path resolution helper) - Remove submodule init/install from install.sh and setup-hermes.sh - Remove mini-swe-agent from .gitmodules - environments/patches.py is now a no-op (kept for backward compat) - terminal_tool.py no longer does sys.path hacking for minisweagent - mini_swe_runner.py guards imports (optional, for RL training only) - Update all affected tests to mock the new direct subprocess calls - Update README.md, CONTRIBUTING.md No functionality change — all Docker, Modal, local, SSH, Singularity, and Daytona backends behave identically. 6093 tests pass.
This commit is contained in:
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -1,6 +1,3 @@
|
|||||||
[submodule "mini-swe-agent"]
|
|
||||||
path = mini-swe-agent
|
|
||||||
url = https://github.com/SWE-agent/mini-swe-agent
|
|
||||||
[submodule "tinker-atropos"]
|
[submodule "tinker-atropos"]
|
||||||
path = tinker-atropos
|
path = tinker-atropos
|
||||||
url = https://github.com/nousresearch/tinker-atropos
|
url = https://github.com/nousresearch/tinker-atropos
|
||||||
|
|||||||
@@ -72,8 +72,9 @@ export VIRTUAL_ENV="$(pwd)/venv"
|
|||||||
|
|
||||||
# Install with all extras (messaging, cron, CLI menus, dev tools)
|
# Install with all extras (messaging, cron, CLI menus, dev tools)
|
||||||
uv pip install -e ".[all,dev]"
|
uv pip install -e ".[all,dev]"
|
||||||
uv pip install -e "./mini-swe-agent"
|
|
||||||
uv pip install -e "./tinker-atropos"
|
# Optional: RL training submodule
|
||||||
|
# git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos"
|
||||||
|
|
||||||
# Optional: browser tools
|
# Optional: browser tools
|
||||||
npm install
|
npm install
|
||||||
|
|||||||
@@ -144,16 +144,14 @@ Quick start for contributors:
|
|||||||
```bash
|
```bash
|
||||||
git clone https://github.com/NousResearch/hermes-agent.git
|
git clone https://github.com/NousResearch/hermes-agent.git
|
||||||
cd hermes-agent
|
cd hermes-agent
|
||||||
git submodule update --init mini-swe-agent # required terminal backend
|
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
uv venv venv --python 3.11
|
uv venv venv --python 3.11
|
||||||
source venv/bin/activate
|
source venv/bin/activate
|
||||||
uv pip install -e ".[all,dev]"
|
uv pip install -e ".[all,dev]"
|
||||||
uv pip install -e "./mini-swe-agent"
|
|
||||||
python -m pytest tests/ -q
|
python -m pytest tests/ -q
|
||||||
```
|
```
|
||||||
|
|
||||||
> **RL Training (optional):** To work on the RL/Tinker-Atropos integration, also run:
|
> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
|
||||||
> ```bash
|
> ```bash
|
||||||
> git submodule update --init tinker-atropos
|
> git submodule update --init tinker-atropos
|
||||||
> uv pip install -e "./tinker-atropos"
|
> uv pip install -e "./tinker-atropos"
|
||||||
|
|||||||
4
cli.py
4
cli.py
@@ -31,7 +31,7 @@ from typing import List, Dict, Any, Optional
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Suppress startup messages for clean CLI experience
|
# Suppress startup messages for clean CLI experience
|
||||||
os.environ["MSWEA_SILENT_STARTUP"] = "1" # mini-swe-agent
|
os.environ["MSWEA_SILENT_STARTUP"] = "1" # suppress mini-swe-agent startup noise if installed
|
||||||
os.environ["HERMES_QUIET"] = "1" # Our own modules
|
os.environ["HERMES_QUIET"] = "1" # Our own modules
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
@@ -78,7 +78,7 @@ _hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
|||||||
_project_env = Path(__file__).parent / '.env'
|
_project_env = Path(__file__).parent / '.env'
|
||||||
load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
|
load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
|
||||||
|
|
||||||
# Point mini-swe-agent at ~/.hermes/ so it shares our config
|
# Point mini-swe-agent at ~/.hermes/ if installed (RL training use)
|
||||||
os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))
|
os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
@@ -2,203 +2,41 @@
|
|||||||
Monkey patches for making hermes-agent tools work inside async frameworks (Atropos).
|
Monkey patches for making hermes-agent tools work inside async frameworks (Atropos).
|
||||||
|
|
||||||
Problem:
|
Problem:
|
||||||
Some tools use asyncio.run() internally (e.g., mini-swe-agent's Modal backend,
|
Some tools use asyncio.run() internally (e.g., Modal backend via SWE-ReX,
|
||||||
web_extract). This crashes when called from inside Atropos's event loop because
|
web_extract). This crashes when called from inside Atropos's event loop because
|
||||||
asyncio.run() can't be nested.
|
asyncio.run() can't be nested.
|
||||||
|
|
||||||
Solution:
|
Solution:
|
||||||
Replace the problematic methods with versions that use a dedicated background
|
The Modal environment (tools/environments/modal.py) now uses a dedicated
|
||||||
thread with its own event loop. The calling code sees the same sync interface --
|
_AsyncWorker thread internally, making it safe for both CLI and Atropos use.
|
||||||
call a function, get a result -- but internally the async work happens on a
|
No monkey-patching is required.
|
||||||
separate thread that doesn't conflict with Atropos's loop.
|
|
||||||
|
|
||||||
These patches are safe for normal CLI use too: when there's no running event
|
This module is kept for backward compatibility — apply_patches() is now a no-op.
|
||||||
loop, the behavior is identical (the background thread approach works regardless).
|
|
||||||
|
|
||||||
What gets patched:
|
|
||||||
- SwerexModalEnvironment.__init__ -- creates Modal deployment on a background thread
|
|
||||||
- SwerexModalEnvironment.execute -- runs commands on the same background thread
|
|
||||||
- SwerexModalEnvironment.stop -- stops deployment on the background thread
|
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
Call apply_patches() once at import time (done automatically by hermes_base_env.py).
|
Call apply_patches() once at import time (done automatically by hermes_base_env.py).
|
||||||
This is idempotent -- calling it multiple times is safe.
|
This is idempotent — calling it multiple times is safe.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
import logging
|
||||||
import threading
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_patches_applied = False
|
_patches_applied = False
|
||||||
|
|
||||||
|
|
||||||
class _AsyncWorker:
|
|
||||||
"""
|
|
||||||
A dedicated background thread with its own event loop.
|
|
||||||
|
|
||||||
Allows sync code to submit async coroutines and block for results,
|
|
||||||
even when called from inside another running event loop. Used to
|
|
||||||
bridge sync tool interfaces with async backends (Modal, SWE-ReX).
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self._loop: asyncio.AbstractEventLoop = None
|
|
||||||
self._thread: threading.Thread = None
|
|
||||||
self._started = threading.Event()
|
|
||||||
|
|
||||||
def start(self):
|
|
||||||
"""Start the background event loop thread."""
|
|
||||||
self._thread = threading.Thread(target=self._run_loop, daemon=True)
|
|
||||||
self._thread.start()
|
|
||||||
self._started.wait(timeout=30)
|
|
||||||
|
|
||||||
def _run_loop(self):
|
|
||||||
"""Background thread entry point -- runs the event loop forever."""
|
|
||||||
self._loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(self._loop)
|
|
||||||
self._started.set()
|
|
||||||
self._loop.run_forever()
|
|
||||||
|
|
||||||
def run_coroutine(self, coro, timeout=600):
|
|
||||||
"""
|
|
||||||
Submit a coroutine to the background loop and block until it completes.
|
|
||||||
|
|
||||||
Safe to call from any thread, including threads that already have
|
|
||||||
a running event loop.
|
|
||||||
"""
|
|
||||||
if self._loop is None or self._loop.is_closed():
|
|
||||||
raise RuntimeError("AsyncWorker loop is not running")
|
|
||||||
future = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
|
||||||
return future.result(timeout=timeout)
|
|
||||||
|
|
||||||
def stop(self):
|
|
||||||
"""Stop the background event loop and join the thread."""
|
|
||||||
if self._loop and self._loop.is_running():
|
|
||||||
self._loop.call_soon_threadsafe(self._loop.stop)
|
|
||||||
if self._thread:
|
|
||||||
self._thread.join(timeout=10)
|
|
||||||
|
|
||||||
|
|
||||||
def _patch_swerex_modal():
|
|
||||||
"""
|
|
||||||
Monkey patch SwerexModalEnvironment to use a background thread event loop
|
|
||||||
instead of asyncio.run(). This makes it safe to call from inside Atropos's
|
|
||||||
async event loop.
|
|
||||||
|
|
||||||
The patched methods have the exact same interface and behavior -- the only
|
|
||||||
difference is HOW the async work is executed internally.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from minisweagent.environments.extra.swerex_modal import (
|
|
||||||
SwerexModalEnvironment,
|
|
||||||
SwerexModalEnvironmentConfig,
|
|
||||||
)
|
|
||||||
from swerex.deployment.modal import ModalDeployment
|
|
||||||
from swerex.runtime.abstract import Command as RexCommand
|
|
||||||
except ImportError:
|
|
||||||
# mini-swe-agent or swe-rex not installed -- nothing to patch
|
|
||||||
logger.debug("mini-swe-agent Modal backend not available, skipping patch")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Save original methods so we can refer to config handling
|
|
||||||
_original_init = SwerexModalEnvironment.__init__
|
|
||||||
|
|
||||||
def _patched_init(self, **kwargs):
|
|
||||||
"""Patched __init__: creates Modal deployment on a background thread."""
|
|
||||||
self.config = SwerexModalEnvironmentConfig(**kwargs)
|
|
||||||
|
|
||||||
# Start a dedicated event loop thread for all Modal async operations
|
|
||||||
self._worker = _AsyncWorker()
|
|
||||||
self._worker.start()
|
|
||||||
|
|
||||||
# Pre-build a modal.Image with pip fix for Modal's legacy image builder.
|
|
||||||
# Modal requires `python -m pip` to work during image build, but some
|
|
||||||
# task images (e.g., TBLite's broken-python) have intentionally broken pip.
|
|
||||||
# Fix: remove stale pip dist-info and reinstall via ensurepip before Modal
|
|
||||||
# tries to use it. This is a no-op for images where pip already works.
|
|
||||||
import modal as _modal
|
|
||||||
image_spec = self.config.image
|
|
||||||
if isinstance(image_spec, str):
|
|
||||||
image_spec = _modal.Image.from_registry(
|
|
||||||
image_spec,
|
|
||||||
setup_dockerfile_commands=[
|
|
||||||
"RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
|
|
||||||
"python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create AND start the deployment entirely on the worker's loop/thread
|
|
||||||
# so all gRPC channels and async state are bound to that loop
|
|
||||||
async def _create_and_start():
|
|
||||||
deployment = ModalDeployment(
|
|
||||||
image=image_spec,
|
|
||||||
startup_timeout=self.config.startup_timeout,
|
|
||||||
runtime_timeout=self.config.runtime_timeout,
|
|
||||||
deployment_timeout=self.config.deployment_timeout,
|
|
||||||
install_pipx=self.config.install_pipx,
|
|
||||||
modal_sandbox_kwargs=self.config.modal_sandbox_kwargs,
|
|
||||||
)
|
|
||||||
await deployment.start()
|
|
||||||
return deployment
|
|
||||||
|
|
||||||
self.deployment = self._worker.run_coroutine(_create_and_start())
|
|
||||||
|
|
||||||
def _patched_execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
|
|
||||||
"""Patched execute: runs commands on the background thread's loop."""
|
|
||||||
async def _do_execute():
|
|
||||||
return await self.deployment.runtime.execute(
|
|
||||||
RexCommand(
|
|
||||||
command=command,
|
|
||||||
shell=True,
|
|
||||||
check=False,
|
|
||||||
cwd=cwd or self.config.cwd,
|
|
||||||
timeout=timeout or self.config.timeout,
|
|
||||||
merge_output_streams=True,
|
|
||||||
env=self.config.env if self.config.env else None,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
output = self._worker.run_coroutine(_do_execute())
|
|
||||||
return {
|
|
||||||
"output": output.stdout,
|
|
||||||
"returncode": output.exit_code,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _patched_stop(self):
|
|
||||||
"""Patched stop: stops deployment on the background thread, then stops the thread."""
|
|
||||||
try:
|
|
||||||
self._worker.run_coroutine(
|
|
||||||
asyncio.wait_for(self.deployment.stop(), timeout=10),
|
|
||||||
timeout=15,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
finally:
|
|
||||||
self._worker.stop()
|
|
||||||
|
|
||||||
# Apply the patches
|
|
||||||
SwerexModalEnvironment.__init__ = _patched_init
|
|
||||||
SwerexModalEnvironment.execute = _patched_execute
|
|
||||||
SwerexModalEnvironment.stop = _patched_stop
|
|
||||||
|
|
||||||
logger.debug("Patched SwerexModalEnvironment for async-safe operation")
|
|
||||||
|
|
||||||
|
|
||||||
def apply_patches():
|
def apply_patches():
|
||||||
"""
|
"""Apply all monkey patches needed for Atropos compatibility.
|
||||||
Apply all monkey patches needed for Atropos compatibility.
|
|
||||||
|
|
||||||
Safe to call multiple times -- patches are only applied once.
|
Now a no-op — Modal async safety is built directly into ModalEnvironment.
|
||||||
Safe for normal CLI use -- patched code works identically when
|
Safe to call multiple times.
|
||||||
there is no running event loop.
|
|
||||||
"""
|
"""
|
||||||
global _patches_applied
|
global _patches_applied
|
||||||
if _patches_applied:
|
if _patches_applied:
|
||||||
return
|
return
|
||||||
|
|
||||||
_patch_swerex_modal()
|
# Modal async-safety is now built into tools/environments/modal.py
|
||||||
|
# via the _AsyncWorker class. No monkey-patching needed.
|
||||||
|
logger.debug("apply_patches() called — no patches needed (async safety is built-in)")
|
||||||
|
|
||||||
_patches_applied = True
|
_patches_applied = True
|
||||||
|
|||||||
Submodule mini-swe-agent deleted from 07aa6a7385
@@ -42,11 +42,13 @@ from dotenv import load_dotenv
|
|||||||
# Load environment variables
|
# Load environment variables
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Add mini-swe-agent to path if not installed. In git worktrees the populated
|
# mini-swe-agent is an optional dependency for this runner.
|
||||||
# submodule may live in the main checkout rather than the worktree itself.
|
# Install separately: git submodule update --init mini-swe-agent && pip install -e ./mini-swe-agent
|
||||||
from minisweagent_path import ensure_minisweagent_on_path
|
try:
|
||||||
|
import minisweagent # noqa: F401
|
||||||
ensure_minisweagent_on_path(Path(__file__).resolve().parent)
|
_HAS_MINISWEAGENT = True
|
||||||
|
except ImportError:
|
||||||
|
_HAS_MINISWEAGENT = False
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@@ -110,7 +112,10 @@ def create_environment(
|
|||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create an execution environment from mini-swe-agent.
|
Create an execution environment.
|
||||||
|
|
||||||
|
Uses mini-swe-agent environments when available, which requires the
|
||||||
|
mini-swe-agent submodule to be installed separately.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
env_type: One of "local", "docker", "modal"
|
env_type: One of "local", "docker", "modal"
|
||||||
@@ -122,6 +127,12 @@ def create_environment(
|
|||||||
Returns:
|
Returns:
|
||||||
Environment instance with execute() method
|
Environment instance with execute() method
|
||||||
"""
|
"""
|
||||||
|
if not _HAS_MINISWEAGENT:
|
||||||
|
raise ImportError(
|
||||||
|
"mini-swe-agent is required for mini_swe_runner.py. "
|
||||||
|
"Install it: git submodule update --init mini-swe-agent && pip install -e ./mini-swe-agent"
|
||||||
|
)
|
||||||
|
|
||||||
if env_type == "local":
|
if env_type == "local":
|
||||||
from minisweagent.environments.local import LocalEnvironment
|
from minisweagent.environments.local import LocalEnvironment
|
||||||
return LocalEnvironment(cwd=cwd, timeout=timeout)
|
return LocalEnvironment(cwd=cwd, timeout=timeout)
|
||||||
|
|||||||
@@ -1,92 +0,0 @@
|
|||||||
"""Helpers for locating the mini-swe-agent source tree.
|
|
||||||
|
|
||||||
Hermes often runs from git worktrees. In that layout the worktree root may have
|
|
||||||
an empty ``mini-swe-agent/`` placeholder while the real populated submodule
|
|
||||||
lives under the main checkout that owns the shared ``.git`` directory.
|
|
||||||
|
|
||||||
These helpers locate a usable ``mini-swe-agent/src`` directory and optionally
|
|
||||||
prepend it to ``sys.path`` so imports like ``import minisweagent`` work from
|
|
||||||
both normal checkouts and worktrees.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import importlib.util
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
def _read_gitdir(repo_root: Path) -> Optional[Path]:
|
|
||||||
"""Resolve the gitdir referenced by ``repo_root/.git`` when it is a file."""
|
|
||||||
git_marker = repo_root / ".git"
|
|
||||||
if not git_marker.is_file():
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
raw = git_marker.read_text(encoding="utf-8").strip()
|
|
||||||
except OSError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
prefix = "gitdir:"
|
|
||||||
if not raw.lower().startswith(prefix):
|
|
||||||
return None
|
|
||||||
|
|
||||||
target = raw[len(prefix):].strip()
|
|
||||||
gitdir = Path(target)
|
|
||||||
if not gitdir.is_absolute():
|
|
||||||
gitdir = (repo_root / gitdir).resolve()
|
|
||||||
else:
|
|
||||||
gitdir = gitdir.resolve()
|
|
||||||
return gitdir
|
|
||||||
|
|
||||||
|
|
||||||
def discover_minisweagent_src(repo_root: Optional[Path] = None) -> Optional[Path]:
|
|
||||||
"""Return the best available ``mini-swe-agent/src`` path, if any.
|
|
||||||
|
|
||||||
Search order:
|
|
||||||
1. Current checkout/worktree root
|
|
||||||
2. Main checkout that owns the shared ``.git`` directory (for worktrees)
|
|
||||||
"""
|
|
||||||
repo_root = (repo_root or Path(__file__).resolve().parent).resolve()
|
|
||||||
|
|
||||||
candidates: list[Path] = [repo_root / "mini-swe-agent" / "src"]
|
|
||||||
|
|
||||||
gitdir = _read_gitdir(repo_root)
|
|
||||||
if gitdir is not None:
|
|
||||||
# Worktree layout: <main>/.git/worktrees/<name>
|
|
||||||
if len(gitdir.parents) >= 3 and gitdir.parent.name == "worktrees":
|
|
||||||
candidates.append(gitdir.parents[2] / "mini-swe-agent" / "src")
|
|
||||||
# Direct checkout with .git file pointing elsewhere
|
|
||||||
elif gitdir.name == ".git":
|
|
||||||
candidates.append(gitdir.parent / "mini-swe-agent" / "src")
|
|
||||||
|
|
||||||
seen = set()
|
|
||||||
for candidate in candidates:
|
|
||||||
candidate = candidate.resolve()
|
|
||||||
if candidate in seen:
|
|
||||||
continue
|
|
||||||
seen.add(candidate)
|
|
||||||
if candidate.exists() and candidate.is_dir():
|
|
||||||
return candidate
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_minisweagent_on_path(repo_root: Optional[Path] = None) -> Optional[Path]:
|
|
||||||
"""Ensure ``minisweagent`` is importable by prepending its src dir to sys.path.
|
|
||||||
|
|
||||||
Returns the inserted/discovered path, or ``None`` if the package is already
|
|
||||||
importable or no local source tree could be found.
|
|
||||||
"""
|
|
||||||
if importlib.util.find_spec("minisweagent") is not None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
src = discover_minisweagent_src(repo_root)
|
|
||||||
if src is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
src_str = str(src)
|
|
||||||
if src_str not in sys.path:
|
|
||||||
sys.path.insert(0, src_str)
|
|
||||||
return src
|
|
||||||
@@ -90,7 +90,7 @@ hermes-agent = "run_agent:main"
|
|||||||
hermes-acp = "acp_adapter.entry:main"
|
hermes-acp = "acp_adapter.entry:main"
|
||||||
|
|
||||||
[tool.setuptools]
|
[tool.setuptools]
|
||||||
py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "mini_swe_runner", "minisweagent_path", "rl_cli", "utils"]
|
py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "mini_swe_runner", "rl_cli", "utils"]
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "honcho_integration", "acp_adapter"]
|
include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "honcho_integration", "acp_adapter"]
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ if _loaded_env_paths:
|
|||||||
else:
|
else:
|
||||||
logger.info("No .env file found. Using system environment variables.")
|
logger.info("No .env file found. Using system environment variables.")
|
||||||
|
|
||||||
# Point mini-swe-agent at ~/.hermes/ so it shares our config
|
# Point mini-swe-agent at ~/.hermes/ if installed (RL training use)
|
||||||
os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))
|
os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))
|
||||||
os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")
|
os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")
|
||||||
|
|
||||||
|
|||||||
@@ -637,13 +637,6 @@ clone_repo() {
|
|||||||
|
|
||||||
cd "$INSTALL_DIR"
|
cd "$INSTALL_DIR"
|
||||||
|
|
||||||
# Only init mini-swe-agent (terminal tool backend — required).
|
|
||||||
# tinker-atropos (RL training) is optional and heavy — users can opt in later
|
|
||||||
# with: git submodule update --init tinker-atropos && uv pip install -e ./tinker-atropos
|
|
||||||
log_info "Initializing mini-swe-agent submodule (terminal backend)..."
|
|
||||||
git submodule update --init mini-swe-agent
|
|
||||||
log_success "Submodule ready"
|
|
||||||
|
|
||||||
log_success "Repository ready"
|
log_success "Repository ready"
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -718,18 +711,6 @@ install_deps() {
|
|||||||
|
|
||||||
log_success "Main package installed"
|
log_success "Main package installed"
|
||||||
|
|
||||||
# Install submodules
|
|
||||||
log_info "Installing mini-swe-agent (terminal tool backend)..."
|
|
||||||
if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
|
|
||||||
if $UV_CMD pip install -e "./mini-swe-agent"; then
|
|
||||||
log_success "mini-swe-agent installed"
|
|
||||||
else
|
|
||||||
log_warn "mini-swe-agent install failed (Docker/Modal terminal backends may not work, local terminal is unaffected)"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
log_warn "mini-swe-agent not found (run: git submodule update --init)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# tinker-atropos (RL training) is optional — skip by default.
|
# tinker-atropos (RL training) is optional — skip by default.
|
||||||
# To enable RL tools: git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos"
|
# To enable RL tools: git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos"
|
||||||
if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
|
if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
|
||||||
|
|||||||
@@ -124,16 +124,7 @@ echo -e "${GREEN}✓${NC} Dependencies installed"
|
|||||||
# Submodules (terminal backend + RL training)
|
# Submodules (terminal backend + RL training)
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
echo -e "${CYAN}→${NC} Installing submodules..."
|
echo -e "${CYAN}→${NC} Installing optional submodules..."
|
||||||
|
|
||||||
# mini-swe-agent (terminal tool backend)
|
|
||||||
if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
|
|
||||||
$UV_CMD pip install -e "./mini-swe-agent" && \
|
|
||||||
echo -e "${GREEN}✓${NC} mini-swe-agent installed" || \
|
|
||||||
echo -e "${YELLOW}⚠${NC} mini-swe-agent install failed (Docker/Modal terminal backends may not work, local terminal is unaffected)"
|
|
||||||
else
|
|
||||||
echo -e "${YELLOW}⚠${NC} mini-swe-agent not found (run: git submodule update --init --recursive)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# tinker-atropos (RL training backend)
|
# tinker-atropos (RL training backend)
|
||||||
if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
|
if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
|
||||||
|
|||||||
@@ -41,7 +41,6 @@ except ImportError:
|
|||||||
# Add project root to path for imports
|
# Add project root to path for imports
|
||||||
parent_dir = Path(__file__).parent.parent.parent
|
parent_dir = Path(__file__).parent.parent.parent
|
||||||
sys.path.insert(0, str(parent_dir))
|
sys.path.insert(0, str(parent_dir))
|
||||||
sys.path.insert(0, str(parent_dir / "mini-swe-agent" / "src"))
|
|
||||||
|
|
||||||
# Import terminal_tool module directly using importlib to avoid tools/__init__.py
|
# Import terminal_tool module directly using importlib to avoid tools/__init__.py
|
||||||
import importlib.util
|
import importlib.util
|
||||||
|
|||||||
@@ -1,34 +1,5 @@
|
|||||||
"""Tests for minisweagent_path.py."""
|
"""Tests for minisweagent_path.py — REMOVED.
|
||||||
|
|
||||||
from pathlib import Path
|
minisweagent_path.py was removed as part of dropping the mini-swe-agent
|
||||||
|
dependency. These tests are no longer applicable.
|
||||||
from minisweagent_path import discover_minisweagent_src
|
"""
|
||||||
|
|
||||||
|
|
||||||
def test_discover_minisweagent_src_in_current_checkout(tmp_path):
|
|
||||||
repo = tmp_path / "repo"
|
|
||||||
src = repo / "mini-swe-agent" / "src"
|
|
||||||
src.mkdir(parents=True)
|
|
||||||
|
|
||||||
assert discover_minisweagent_src(repo) == src.resolve()
|
|
||||||
|
|
||||||
|
|
||||||
def test_discover_minisweagent_src_falls_back_from_worktree_to_main_checkout(tmp_path):
|
|
||||||
main_repo = tmp_path / "main-repo"
|
|
||||||
(main_repo / ".git" / "worktrees" / "wt1").mkdir(parents=True)
|
|
||||||
main_src = main_repo / "mini-swe-agent" / "src"
|
|
||||||
main_src.mkdir(parents=True)
|
|
||||||
|
|
||||||
worktree = tmp_path / "worktree"
|
|
||||||
worktree.mkdir()
|
|
||||||
(worktree / ".git").write_text(f"gitdir: {main_repo / '.git' / 'worktrees' / 'wt1'}\n", encoding="utf-8")
|
|
||||||
(worktree / "mini-swe-agent").mkdir() # empty placeholder, no src/
|
|
||||||
|
|
||||||
assert discover_minisweagent_src(worktree) == main_src.resolve()
|
|
||||||
|
|
||||||
|
|
||||||
def test_discover_minisweagent_src_returns_none_when_missing(tmp_path):
|
|
||||||
repo = tmp_path / "repo"
|
|
||||||
repo.mkdir()
|
|
||||||
|
|
||||||
assert discover_minisweagent_src(repo) is None
|
|
||||||
|
|||||||
@@ -131,9 +131,9 @@ class TestExecuteCode(unittest.TestCase):
|
|||||||
|
|
||||||
def test_repo_root_modules_are_importable(self):
|
def test_repo_root_modules_are_importable(self):
|
||||||
"""Sandboxed scripts can import modules that live at the repo root."""
|
"""Sandboxed scripts can import modules that live at the repo root."""
|
||||||
result = self._run('import minisweagent_path; print(minisweagent_path.__file__)')
|
result = self._run('import hermes_constants; print(hermes_constants.__file__)')
|
||||||
self.assertEqual(result["status"], "success")
|
self.assertEqual(result["status"], "success")
|
||||||
self.assertIn("minisweagent_path.py", result["output"])
|
self.assertIn("hermes_constants.py", result["output"])
|
||||||
|
|
||||||
def test_single_tool_call(self):
|
def test_single_tool_call(self):
|
||||||
"""Script calls terminal and prints the result."""
|
"""Script calls terminal and prints the result."""
|
||||||
|
|||||||
@@ -9,25 +9,24 @@ import pytest
|
|||||||
from tools.environments import docker as docker_env
|
from tools.environments import docker as docker_env
|
||||||
|
|
||||||
|
|
||||||
def _install_fake_minisweagent(monkeypatch, captured_run_args):
|
def _mock_subprocess_run(monkeypatch):
|
||||||
class MockInnerDocker:
|
"""Mock subprocess.run to intercept docker run -d and docker version calls.
|
||||||
container_id = "fake-container"
|
|
||||||
config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})()
|
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
Returns a list of captured (cmd, kwargs) tuples for inspection.
|
||||||
captured_run_args.extend(kwargs.get("run_args", []))
|
"""
|
||||||
|
calls = []
|
||||||
|
|
||||||
def cleanup(self):
|
def _run(cmd, **kwargs):
|
||||||
pass
|
calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
|
||||||
|
if isinstance(cmd, list) and len(cmd) >= 2:
|
||||||
|
if cmd[1] == "version":
|
||||||
|
return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
|
||||||
|
if cmd[1] == "run":
|
||||||
|
return subprocess.CompletedProcess(cmd, 0, stdout="fake-container-id\n", stderr="")
|
||||||
|
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||||
|
|
||||||
minisweagent_mod = types.ModuleType("minisweagent")
|
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||||
environments_mod = types.ModuleType("minisweagent.environments")
|
return calls
|
||||||
docker_mod = types.ModuleType("minisweagent.environments.docker")
|
|
||||||
docker_mod.DockerEnvironment = MockInnerDocker
|
|
||||||
|
|
||||||
monkeypatch.setitem(sys.modules, "minisweagent", minisweagent_mod)
|
|
||||||
monkeypatch.setitem(sys.modules, "minisweagent.environments", environments_mod)
|
|
||||||
monkeypatch.setitem(sys.modules, "minisweagent.environments.docker", docker_mod)
|
|
||||||
|
|
||||||
|
|
||||||
def _make_dummy_env(**kwargs):
|
def _make_dummy_env(**kwargs):
|
||||||
@@ -49,7 +48,7 @@ def _make_dummy_env(**kwargs):
|
|||||||
|
|
||||||
|
|
||||||
def test_ensure_docker_available_logs_and_raises_when_not_found(monkeypatch, caplog):
|
def test_ensure_docker_available_logs_and_raises_when_not_found(monkeypatch, caplog):
|
||||||
"""When docker cannot be found, raise a clear error before mini-swe setup."""
|
"""When docker cannot be found, raise a clear error before container setup."""
|
||||||
|
|
||||||
monkeypatch.setattr(docker_env, "find_docker", lambda: None)
|
monkeypatch.setattr(docker_env, "find_docker", lambda: None)
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
@@ -118,14 +117,8 @@ def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path):
|
|||||||
project_dir = tmp_path / "my-project"
|
project_dir = tmp_path / "my-project"
|
||||||
project_dir.mkdir()
|
project_dir.mkdir()
|
||||||
|
|
||||||
def _run_docker_version(*args, **kwargs):
|
|
||||||
return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
|
|
||||||
|
|
||||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||||
monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
|
calls = _mock_subprocess_run(monkeypatch)
|
||||||
|
|
||||||
captured_run_args = []
|
|
||||||
_install_fake_minisweagent(monkeypatch, captured_run_args)
|
|
||||||
|
|
||||||
_make_dummy_env(
|
_make_dummy_env(
|
||||||
cwd="/workspace",
|
cwd="/workspace",
|
||||||
@@ -133,7 +126,10 @@ def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path):
|
|||||||
auto_mount_cwd=True,
|
auto_mount_cwd=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
run_args_str = " ".join(captured_run_args)
|
# Find the docker run call and check its args
|
||||||
|
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||||
|
assert run_calls, "docker run should have been called"
|
||||||
|
run_args_str = " ".join(run_calls[0][0])
|
||||||
assert f"{project_dir}:/workspace" in run_args_str
|
assert f"{project_dir}:/workspace" in run_args_str
|
||||||
|
|
||||||
|
|
||||||
@@ -142,14 +138,8 @@ def test_auto_mount_disabled_by_default(monkeypatch, tmp_path):
|
|||||||
project_dir = tmp_path / "my-project"
|
project_dir = tmp_path / "my-project"
|
||||||
project_dir.mkdir()
|
project_dir.mkdir()
|
||||||
|
|
||||||
def _run_docker_version(*args, **kwargs):
|
|
||||||
return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
|
|
||||||
|
|
||||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||||
monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
|
calls = _mock_subprocess_run(monkeypatch)
|
||||||
|
|
||||||
captured_run_args = []
|
|
||||||
_install_fake_minisweagent(monkeypatch, captured_run_args)
|
|
||||||
|
|
||||||
_make_dummy_env(
|
_make_dummy_env(
|
||||||
cwd="/root",
|
cwd="/root",
|
||||||
@@ -157,7 +147,9 @@ def test_auto_mount_disabled_by_default(monkeypatch, tmp_path):
|
|||||||
auto_mount_cwd=False,
|
auto_mount_cwd=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
run_args_str = " ".join(captured_run_args)
|
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||||
|
assert run_calls, "docker run should have been called"
|
||||||
|
run_args_str = " ".join(run_calls[0][0])
|
||||||
assert f"{project_dir}:/workspace" not in run_args_str
|
assert f"{project_dir}:/workspace" not in run_args_str
|
||||||
|
|
||||||
|
|
||||||
@@ -168,14 +160,8 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path
|
|||||||
other_dir = tmp_path / "other"
|
other_dir = tmp_path / "other"
|
||||||
other_dir.mkdir()
|
other_dir.mkdir()
|
||||||
|
|
||||||
def _run_docker_version(*args, **kwargs):
|
|
||||||
return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
|
|
||||||
|
|
||||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||||
monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
|
calls = _mock_subprocess_run(monkeypatch)
|
||||||
|
|
||||||
captured_run_args = []
|
|
||||||
_install_fake_minisweagent(monkeypatch, captured_run_args)
|
|
||||||
|
|
||||||
_make_dummy_env(
|
_make_dummy_env(
|
||||||
cwd="/workspace",
|
cwd="/workspace",
|
||||||
@@ -184,7 +170,9 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path
|
|||||||
volumes=[f"{other_dir}:/workspace"],
|
volumes=[f"{other_dir}:/workspace"],
|
||||||
)
|
)
|
||||||
|
|
||||||
run_args_str = " ".join(captured_run_args)
|
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||||
|
assert run_calls, "docker run should have been called"
|
||||||
|
run_args_str = " ".join(run_calls[0][0])
|
||||||
assert f"{other_dir}:/workspace" in run_args_str
|
assert f"{other_dir}:/workspace" in run_args_str
|
||||||
assert run_args_str.count(":/workspace") == 1
|
assert run_args_str.count(":/workspace") == 1
|
||||||
|
|
||||||
@@ -194,14 +182,8 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
|
|||||||
project_dir = tmp_path / "my-project"
|
project_dir = tmp_path / "my-project"
|
||||||
project_dir.mkdir()
|
project_dir.mkdir()
|
||||||
|
|
||||||
def _run_docker_version(*args, **kwargs):
|
|
||||||
return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
|
|
||||||
|
|
||||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||||
monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
|
calls = _mock_subprocess_run(monkeypatch)
|
||||||
|
|
||||||
captured_run_args = []
|
|
||||||
_install_fake_minisweagent(monkeypatch, captured_run_args)
|
|
||||||
|
|
||||||
_make_dummy_env(
|
_make_dummy_env(
|
||||||
cwd="/workspace",
|
cwd="/workspace",
|
||||||
@@ -211,28 +193,23 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
|
|||||||
task_id="test-persistent-auto-mount",
|
task_id="test-persistent-auto-mount",
|
||||||
)
|
)
|
||||||
|
|
||||||
run_args_str = " ".join(captured_run_args)
|
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||||
|
assert run_calls, "docker run should have been called"
|
||||||
|
run_args_str = " ".join(run_calls[0][0])
|
||||||
assert f"{project_dir}:/workspace" in run_args_str
|
assert f"{project_dir}:/workspace" in run_args_str
|
||||||
assert "/sandboxes/docker/test-persistent-auto-mount/workspace:/workspace" not in run_args_str
|
assert "/sandboxes/docker/test-persistent-auto-mount/workspace:/workspace" not in run_args_str
|
||||||
|
|
||||||
|
|
||||||
def test_non_persistent_cleanup_removes_container(monkeypatch):
|
def test_non_persistent_cleanup_removes_container(monkeypatch):
|
||||||
"""When container_persistent=false, cleanup() must run docker rm -f so the container is removed (Fixes #1679)."""
|
"""When persistent=false, cleanup() must schedule docker stop + rm."""
|
||||||
run_calls = []
|
|
||||||
|
|
||||||
def _run(cmd, **kwargs):
|
|
||||||
run_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
|
|
||||||
if cmd and getattr(cmd[0], "__str__", None) and "docker" in str(cmd[0]):
|
|
||||||
if len(cmd) >= 2 and cmd[1] == "run":
|
|
||||||
return subprocess.CompletedProcess(cmd, 0, stdout="abc123container\n", stderr="")
|
|
||||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
|
||||||
|
|
||||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
calls = _mock_subprocess_run(monkeypatch)
|
||||||
monkeypatch.setattr(docker_env.subprocess, "Popen", lambda *a, **k: type("P", (), {"poll": lambda: None, "wait": lambda **kw: None, "returncode": 0, "stdout": iter([]), "stdin": None})())
|
|
||||||
|
|
||||||
captured_run_args = []
|
popen_cmds = []
|
||||||
_install_fake_minisweagent(monkeypatch, captured_run_args)
|
monkeypatch.setattr(
|
||||||
|
docker_env.subprocess, "Popen",
|
||||||
|
lambda cmd, **kw: (popen_cmds.append(cmd), type("P", (), {"poll": lambda s: 0, "wait": lambda s, **k: None, "returncode": 0, "stdout": iter([]), "stdin": None})())[1],
|
||||||
|
)
|
||||||
|
|
||||||
env = _make_dummy_env(persistent_filesystem=False, task_id="ephemeral-task")
|
env = _make_dummy_env(persistent_filesystem=False, task_id="ephemeral-task")
|
||||||
assert env._container_id
|
assert env._container_id
|
||||||
@@ -240,8 +217,9 @@ def test_non_persistent_cleanup_removes_container(monkeypatch):
|
|||||||
|
|
||||||
env.cleanup()
|
env.cleanup()
|
||||||
|
|
||||||
rm_calls = [c for c in run_calls if isinstance(c[0], list) and len(c[0]) >= 4 and c[0][1:4] == ["rm", "-f", container_id]]
|
# Should have stop and rm calls via Popen
|
||||||
assert len(rm_calls) >= 1, "cleanup() should run docker rm -f <container_id> when container_persistent=false"
|
stop_cmds = [c for c in popen_cmds if container_id in str(c) and "stop" in str(c)]
|
||||||
|
assert len(stop_cmds) >= 1, f"cleanup() should schedule docker stop for {container_id}"
|
||||||
|
|
||||||
|
|
||||||
class _FakePopen:
|
class _FakePopen:
|
||||||
@@ -263,10 +241,8 @@ def _make_execute_only_env(forward_env=None):
|
|||||||
env._forward_env = forward_env or []
|
env._forward_env = forward_env or []
|
||||||
env._prepare_command = lambda command: (command, None)
|
env._prepare_command = lambda command: (command, None)
|
||||||
env._timeout_result = lambda timeout: {"output": f"timed out after {timeout}", "returncode": 124}
|
env._timeout_result = lambda timeout: {"output": f"timed out after {timeout}", "returncode": 124}
|
||||||
env._inner = type("Inner", (), {
|
env._container_id = "test-container"
|
||||||
"container_id": "test-container",
|
env._docker_exe = "/usr/bin/docker"
|
||||||
"config": type("Cfg", (), {"executable": "/usr/bin/docker", "env": {}})(),
|
|
||||||
})()
|
|
||||||
return env
|
return env
|
||||||
|
|
||||||
|
|
||||||
@@ -304,31 +280,3 @@ def test_execute_prefers_shell_env_over_hermes_dotenv(monkeypatch):
|
|||||||
|
|
||||||
assert "GITHUB_TOKEN=value_from_shell" in popen_calls[0]
|
assert "GITHUB_TOKEN=value_from_shell" in popen_calls[0]
|
||||||
assert "GITHUB_TOKEN=value_from_dotenv" not in popen_calls[0]
|
assert "GITHUB_TOKEN=value_from_dotenv" not in popen_calls[0]
|
||||||
|
|
||||||
|
|
||||||
def test_non_persistent_cleanup_removes_container(monkeypatch):
|
|
||||||
"""When container_persistent=false, cleanup() must run docker rm -f so the container is removed (Fixes #1679)."""
|
|
||||||
run_calls = []
|
|
||||||
|
|
||||||
def _run(cmd, **kwargs):
|
|
||||||
run_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
|
|
||||||
if cmd and getattr(cmd[0], '__str__', None) and 'docker' in str(cmd[0]):
|
|
||||||
if len(cmd) >= 2 and cmd[1] == 'run':
|
|
||||||
return subprocess.CompletedProcess(cmd, 0, stdout="abc123container\n", stderr="")
|
|
||||||
return subprocess.CompletedProcess(cmd, 0, stdout='', stderr='')
|
|
||||||
|
|
||||||
monkeypatch.setattr(docker_env, 'find_docker', lambda: '/usr/bin/docker')
|
|
||||||
monkeypatch.setattr(docker_env.subprocess, 'run', _run)
|
|
||||||
monkeypatch.setattr(docker_env.subprocess, 'Popen', lambda *a, **k: type('P', (), {'poll': lambda: None, 'wait': lambda **kw: None, 'returncode': 0, 'stdout': iter([]), 'stdin': None})())
|
|
||||||
|
|
||||||
captured_run_args = []
|
|
||||||
_install_fake_minisweagent(monkeypatch, captured_run_args)
|
|
||||||
|
|
||||||
env = _make_dummy_env(persistent_filesystem=False, task_id='ephemeral-task')
|
|
||||||
assert env._container_id
|
|
||||||
container_id = env._container_id
|
|
||||||
|
|
||||||
env.cleanup()
|
|
||||||
|
|
||||||
rm_calls = [c for c in run_calls if isinstance(c[0], list) and len(c[0]) >= 4 and c[0][1:4] == ['rm', '-f', container_id]]
|
|
||||||
assert len(rm_calls) >= 1, 'cleanup() should run docker rm -f <container_id> when container_persistent=false'
|
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
"""Tests for Modal sandbox infrastructure fixes (TBLite baseline).
|
"""Tests for Modal sandbox infrastructure fixes (TBLite baseline).
|
||||||
|
|
||||||
Covers the 9 bugs discovered while setting up TBLite evaluation:
|
Covers the bugs discovered while setting up TBLite evaluation:
|
||||||
1. Tool resolution — terminal + file tools load with minisweagent
|
1. Tool resolution — terminal + file tools load correctly
|
||||||
2. CWD fix — host paths get replaced with /root for container backends
|
2. CWD fix — host paths get replaced with /root for container backends
|
||||||
3. ephemeral_disk version check
|
3. ephemeral_disk version check
|
||||||
4. Tilde ~ replaced with /root for container backends
|
4. Tilde ~ replaced with /root for container backends
|
||||||
5. ensurepip fix in patches.py for Modal image builder
|
5. ensurepip fix in Modal image builder
|
||||||
6. install_pipx stays True for swerex-remote
|
6. install_pipx stays True for swerex-remote
|
||||||
7. /home/ added to host prefix check
|
7. /home/ added to host prefix check
|
||||||
"""
|
"""
|
||||||
@@ -36,17 +36,8 @@ except ImportError:
|
|||||||
class TestToolResolution:
|
class TestToolResolution:
|
||||||
"""Verify get_tool_definitions returns all expected tools for eval."""
|
"""Verify get_tool_definitions returns all expected tools for eval."""
|
||||||
|
|
||||||
def _has_minisweagent(self):
|
|
||||||
try:
|
|
||||||
import minisweagent # noqa: F401
|
|
||||||
return True
|
|
||||||
except ImportError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def test_terminal_and_file_toolsets_resolve_all_tools(self):
|
def test_terminal_and_file_toolsets_resolve_all_tools(self):
|
||||||
"""enabled_toolsets=['terminal', 'file'] should produce 6 tools."""
|
"""enabled_toolsets=['terminal', 'file'] should produce 6 tools."""
|
||||||
if not self._has_minisweagent():
|
|
||||||
pytest.skip("minisweagent not installed (git submodule update --init)")
|
|
||||||
from model_tools import get_tool_definitions
|
from model_tools import get_tool_definitions
|
||||||
tools = get_tool_definitions(
|
tools = get_tool_definitions(
|
||||||
enabled_toolsets=["terminal", "file"],
|
enabled_toolsets=["terminal", "file"],
|
||||||
@@ -58,18 +49,13 @@ class TestToolResolution:
|
|||||||
|
|
||||||
def test_terminal_tool_present(self):
|
def test_terminal_tool_present(self):
|
||||||
"""The terminal tool must be present (not silently dropped)."""
|
"""The terminal tool must be present (not silently dropped)."""
|
||||||
if not self._has_minisweagent():
|
|
||||||
pytest.skip("minisweagent not installed (git submodule update --init)")
|
|
||||||
from model_tools import get_tool_definitions
|
from model_tools import get_tool_definitions
|
||||||
tools = get_tool_definitions(
|
tools = get_tool_definitions(
|
||||||
enabled_toolsets=["terminal", "file"],
|
enabled_toolsets=["terminal", "file"],
|
||||||
quiet_mode=True,
|
quiet_mode=True,
|
||||||
)
|
)
|
||||||
names = [t["function"]["name"] for t in tools]
|
names = [t["function"]["name"] for t in tools]
|
||||||
assert "terminal" in names, (
|
assert "terminal" in names, f"terminal tool missing! Only got: {names}."
|
||||||
f"terminal tool missing! Only got: {names}. "
|
|
||||||
"Check that minisweagent is installed (git submodule update --init)."
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
@@ -269,38 +255,37 @@ class TestModalEnvironmentDefaults:
|
|||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
class TestEnsurepipFix:
|
class TestEnsurepipFix:
|
||||||
"""Verify the pip fix is applied in the patched Modal init."""
|
"""Verify the pip fix is applied in the ModalEnvironment init."""
|
||||||
|
|
||||||
def test_patched_init_creates_image_with_setup_commands(self):
|
def test_modal_environment_creates_image_with_setup_commands(self):
|
||||||
"""The patched __init__ should create a modal.Image with pip fix."""
|
"""ModalEnvironment.__init__ should create a modal.Image with pip fix."""
|
||||||
try:
|
try:
|
||||||
from environments.patches import _patch_swerex_modal
|
from tools.environments.modal import ModalEnvironment
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pytest.skip("environments.patches not importable")
|
pytest.skip("tools.environments.modal not importable")
|
||||||
|
|
||||||
# Check that the patch code references ensurepip
|
|
||||||
import inspect
|
import inspect
|
||||||
source = inspect.getsource(_patch_swerex_modal)
|
source = inspect.getsource(ModalEnvironment.__init__)
|
||||||
assert "ensurepip" in source, (
|
assert "ensurepip" in source, (
|
||||||
"patches._patch_swerex_modal should include ensurepip fix "
|
"ModalEnvironment should include ensurepip fix "
|
||||||
"for Modal's legacy image builder"
|
"for Modal's legacy image builder"
|
||||||
)
|
)
|
||||||
assert "setup_dockerfile_commands" in source, (
|
assert "setup_dockerfile_commands" in source, (
|
||||||
"patches._patch_swerex_modal should use setup_dockerfile_commands "
|
"ModalEnvironment should use setup_dockerfile_commands "
|
||||||
"to fix pip before Modal's bootstrap"
|
"to fix pip before Modal's bootstrap"
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_patched_init_uses_install_pipx_from_config(self):
|
def test_modal_environment_uses_install_pipx(self):
|
||||||
"""The patched init should respect install_pipx from config."""
|
"""ModalEnvironment should pass install_pipx to ModalDeployment."""
|
||||||
try:
|
try:
|
||||||
from environments.patches import _patch_swerex_modal
|
from tools.environments.modal import ModalEnvironment
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pytest.skip("environments.patches not importable")
|
pytest.skip("tools.environments.modal not importable")
|
||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
source = inspect.getsource(_patch_swerex_modal)
|
source = inspect.getsource(ModalEnvironment.__init__)
|
||||||
assert "install_pipx" in source, (
|
assert "install_pipx" in source, (
|
||||||
"patches._patch_swerex_modal should pass install_pipx to ModalDeployment"
|
"ModalEnvironment should pass install_pipx to ModalDeployment"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -18,9 +18,8 @@ def _clear_terminal_env(monkeypatch):
|
|||||||
monkeypatch.delenv(key, raising=False)
|
monkeypatch.delenv(key, raising=False)
|
||||||
|
|
||||||
|
|
||||||
def test_local_terminal_requirements_do_not_depend_on_minisweagent(monkeypatch, caplog):
|
def test_local_terminal_requirements(monkeypatch, caplog):
|
||||||
"""Local backend uses Hermes' own LocalEnvironment wrapper and should not
|
"""Local backend uses Hermes' own LocalEnvironment wrapper."""
|
||||||
be marked unavailable just because `minisweagent` isn't importable."""
|
|
||||||
_clear_terminal_env(monkeypatch)
|
_clear_terminal_env(monkeypatch)
|
||||||
monkeypatch.setenv("TERMINAL_ENV", "local")
|
monkeypatch.setenv("TERMINAL_ENV", "local")
|
||||||
|
|
||||||
@@ -64,7 +63,7 @@ def test_modal_backend_without_token_or_config_logs_specific_error(monkeypatch,
|
|||||||
monkeypatch.setenv("TERMINAL_ENV", "modal")
|
monkeypatch.setenv("TERMINAL_ENV", "modal")
|
||||||
monkeypatch.setenv("HOME", str(tmp_path))
|
monkeypatch.setenv("HOME", str(tmp_path))
|
||||||
monkeypatch.setenv("USERPROFILE", str(tmp_path))
|
monkeypatch.setenv("USERPROFILE", str(tmp_path))
|
||||||
monkeypatch.setattr(terminal_tool_module, "ensure_minisweagent_on_path", lambda *_args, **_kwargs: None)
|
# Pretend swerex is installed
|
||||||
monkeypatch.setattr(terminal_tool_module.importlib.util, "find_spec", lambda _name: object())
|
monkeypatch.setattr(terminal_tool_module.importlib.util, "find_spec", lambda _name: object())
|
||||||
|
|
||||||
with caplog.at_level(logging.ERROR):
|
with caplog.at_level(logging.ERROR):
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ terminal_tool_module = importlib.import_module("tools.terminal_tool")
|
|||||||
|
|
||||||
|
|
||||||
class TestTerminalRequirements:
|
class TestTerminalRequirements:
|
||||||
def test_local_backend_does_not_require_minisweagent_package(self, monkeypatch):
|
def test_local_backend_requirements(self, monkeypatch):
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
terminal_tool_module,
|
terminal_tool_module,
|
||||||
"_get_env_config",
|
"_get_env_config",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"""Docker execution environment wrapping mini-swe-agent's DockerEnvironment.
|
"""Docker execution environment for sandboxed command execution.
|
||||||
|
|
||||||
Adds security hardening (cap-drop ALL, no-new-privileges, PID limits),
|
Security hardened (cap-drop ALL, no-new-privileges, PID limits),
|
||||||
configurable resource limits (CPU, memory, disk), and optional filesystem
|
configurable resource limits (CPU, memory, disk), and optional filesystem
|
||||||
persistence via bind mounts.
|
persistence via bind mounts.
|
||||||
"""
|
"""
|
||||||
@@ -13,6 +13,7 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
import uuid
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from tools.environments.base import BaseEnvironment
|
from tools.environments.base import BaseEnvironment
|
||||||
@@ -227,12 +228,9 @@ class DockerEnvironment(BaseEnvironment):
|
|||||||
logger.warning(f"docker_volumes config is not a list: {volumes!r}")
|
logger.warning(f"docker_volumes config is not a list: {volumes!r}")
|
||||||
volumes = []
|
volumes = []
|
||||||
|
|
||||||
# Fail fast if Docker is not available rather than surfacing a cryptic
|
# Fail fast if Docker is not available.
|
||||||
# FileNotFoundError deep inside the mini-swe-agent stack.
|
|
||||||
_ensure_docker_available()
|
_ensure_docker_available()
|
||||||
|
|
||||||
from minisweagent.environments.docker import DockerEnvironment as _Docker
|
|
||||||
|
|
||||||
# Build resource limit args
|
# Build resource limit args
|
||||||
resource_args = []
|
resource_args = []
|
||||||
if cpu > 0:
|
if cpu > 0:
|
||||||
@@ -320,14 +318,28 @@ class DockerEnvironment(BaseEnvironment):
|
|||||||
|
|
||||||
# Resolve the docker executable once so it works even when
|
# Resolve the docker executable once so it works even when
|
||||||
# /usr/local/bin is not in PATH (common on macOS gateway/service).
|
# /usr/local/bin is not in PATH (common on macOS gateway/service).
|
||||||
docker_exe = find_docker() or "docker"
|
self._docker_exe = find_docker() or "docker"
|
||||||
|
|
||||||
self._inner = _Docker(
|
# Start the container directly via `docker run -d`.
|
||||||
image=image, cwd=cwd, timeout=timeout,
|
container_name = f"hermes-{uuid.uuid4().hex[:8]}"
|
||||||
run_args=all_run_args,
|
run_cmd = [
|
||||||
executable=docker_exe,
|
self._docker_exe, "run", "-d",
|
||||||
|
"--name", container_name,
|
||||||
|
"-w", cwd,
|
||||||
|
*all_run_args,
|
||||||
|
image,
|
||||||
|
"sleep", "2h",
|
||||||
|
]
|
||||||
|
logger.debug(f"Starting container: {' '.join(run_cmd)}")
|
||||||
|
result = subprocess.run(
|
||||||
|
run_cmd,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=120, # image pull may take a while
|
||||||
|
check=True,
|
||||||
)
|
)
|
||||||
self._container_id = self._inner.container_id
|
self._container_id = result.stdout.strip()
|
||||||
|
logger.info(f"Started container {container_name} ({self._container_id[:12]})")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _storage_opt_supported() -> bool:
|
def _storage_opt_supported() -> bool:
|
||||||
@@ -389,8 +401,8 @@ class DockerEnvironment(BaseEnvironment):
|
|||||||
exec_command = f"cd {work_dir} && {exec_command}"
|
exec_command = f"cd {work_dir} && {exec_command}"
|
||||||
work_dir = "/"
|
work_dir = "/"
|
||||||
|
|
||||||
assert self._inner.container_id, "Container not started"
|
assert self._container_id, "Container not started"
|
||||||
cmd = [self._inner.config.executable, "exec"]
|
cmd = [self._docker_exe, "exec"]
|
||||||
if effective_stdin is not None:
|
if effective_stdin is not None:
|
||||||
cmd.append("-i")
|
cmd.append("-i")
|
||||||
cmd.extend(["-w", work_dir])
|
cmd.extend(["-w", work_dir])
|
||||||
@@ -401,9 +413,7 @@ class DockerEnvironment(BaseEnvironment):
|
|||||||
value = hermes_env.get(key)
|
value = hermes_env.get(key)
|
||||||
if value is not None:
|
if value is not None:
|
||||||
cmd.extend(["-e", f"{key}={value}"])
|
cmd.extend(["-e", f"{key}={value}"])
|
||||||
for key, value in self._inner.config.env.items():
|
cmd.extend([self._container_id, "bash", "-lc", exec_command])
|
||||||
cmd.extend(["-e", f"{key}={value}"])
|
|
||||||
cmd.extend([self._inner.container_id, "bash", "-lc", exec_command])
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_output_chunks = []
|
_output_chunks = []
|
||||||
@@ -456,24 +466,29 @@ class DockerEnvironment(BaseEnvironment):
|
|||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
"""Stop and remove the container. Bind-mount dirs persist if persistent=True."""
|
"""Stop and remove the container. Bind-mount dirs persist if persistent=True."""
|
||||||
self._inner.cleanup()
|
if self._container_id:
|
||||||
|
|
||||||
if not self._persistent and self._container_id:
|
|
||||||
# Inner cleanup only runs `docker stop` in background; container is left
|
|
||||||
# as stopped. When container_persistent=false we must remove it.
|
|
||||||
docker_exe = find_docker() or self._inner.config.executable
|
|
||||||
try:
|
try:
|
||||||
subprocess.run(
|
# Stop in background so cleanup doesn't block
|
||||||
[docker_exe, "rm", "-f", self._container_id],
|
stop_cmd = (
|
||||||
capture_output=True,
|
f"(timeout 60 {self._docker_exe} stop {self._container_id} || "
|
||||||
timeout=30,
|
f"{self._docker_exe} rm -f {self._container_id}) >/dev/null 2>&1 &"
|
||||||
)
|
)
|
||||||
|
subprocess.Popen(stop_cmd, shell=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Failed to remove non-persistent container %s: %s", self._container_id, e)
|
logger.warning("Failed to stop container %s: %s", self._container_id, e)
|
||||||
|
|
||||||
|
if not self._persistent:
|
||||||
|
# Also schedule removal (stop only leaves it as stopped)
|
||||||
|
try:
|
||||||
|
subprocess.Popen(
|
||||||
|
f"sleep 3 && {self._docker_exe} rm -f {self._container_id} >/dev/null 2>&1 &",
|
||||||
|
shell=True,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
self._container_id = None
|
self._container_id = None
|
||||||
|
|
||||||
if not self._persistent:
|
if not self._persistent:
|
||||||
import shutil
|
|
||||||
for d in (self._workspace_dir, self._home_dir):
|
for d in (self._workspace_dir, self._home_dir):
|
||||||
if d:
|
if d:
|
||||||
shutil.rmtree(d, ignore_errors=True)
|
shutil.rmtree(d, ignore_errors=True)
|
||||||
|
|||||||
@@ -1,14 +1,14 @@
|
|||||||
"""Modal cloud execution environment wrapping mini-swe-agent's SwerexModalEnvironment.
|
"""Modal cloud execution environment using SWE-ReX directly.
|
||||||
|
|
||||||
Supports persistent filesystem snapshots: when enabled, the sandbox's filesystem
|
Supports persistent filesystem snapshots: when enabled, the sandbox's filesystem
|
||||||
is snapshotted on cleanup and restored on next creation, so installed packages,
|
is snapshotted on cleanup and restored on next creation, so installed packages,
|
||||||
project files, and config changes survive across sessions.
|
project files, and config changes survive across sessions.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
import time
|
|
||||||
import uuid
|
import uuid
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
@@ -38,15 +38,49 @@ def _save_snapshots(data: Dict[str, str]) -> None:
|
|||||||
_SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))
|
_SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))
|
||||||
|
|
||||||
|
|
||||||
class ModalEnvironment(BaseEnvironment):
|
class _AsyncWorker:
|
||||||
"""Modal cloud execution via mini-swe-agent.
|
"""Background thread with its own event loop for async-safe swe-rex calls.
|
||||||
|
|
||||||
Wraps SwerexModalEnvironment and adds sudo -S support, configurable
|
Allows sync code to submit async coroutines and block for results,
|
||||||
resources (CPU, memory, disk), and optional filesystem persistence
|
even when called from inside another running event loop (e.g. Atropos).
|
||||||
via Modal's snapshot_filesystem() API.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_patches_applied = False
|
def __init__(self):
|
||||||
|
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
||||||
|
self._thread: Optional[threading.Thread] = None
|
||||||
|
self._started = threading.Event()
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
self._thread = threading.Thread(target=self._run_loop, daemon=True)
|
||||||
|
self._thread.start()
|
||||||
|
self._started.wait(timeout=30)
|
||||||
|
|
||||||
|
def _run_loop(self):
|
||||||
|
self._loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(self._loop)
|
||||||
|
self._started.set()
|
||||||
|
self._loop.run_forever()
|
||||||
|
|
||||||
|
def run_coroutine(self, coro, timeout=600):
|
||||||
|
if self._loop is None or self._loop.is_closed():
|
||||||
|
raise RuntimeError("AsyncWorker loop is not running")
|
||||||
|
future = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
||||||
|
return future.result(timeout=timeout)
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
if self._loop and self._loop.is_running():
|
||||||
|
self._loop.call_soon_threadsafe(self._loop.stop)
|
||||||
|
if self._thread:
|
||||||
|
self._thread.join(timeout=10)
|
||||||
|
|
||||||
|
|
||||||
|
class ModalEnvironment(BaseEnvironment):
|
||||||
|
"""Modal cloud execution via SWE-ReX.
|
||||||
|
|
||||||
|
Uses swe-rex's ModalDeployment directly for sandbox management.
|
||||||
|
Adds sudo -S support, configurable resources (CPU, memory, disk),
|
||||||
|
and optional filesystem persistence via Modal's snapshot API.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@@ -59,17 +93,11 @@ class ModalEnvironment(BaseEnvironment):
|
|||||||
):
|
):
|
||||||
super().__init__(cwd=cwd, timeout=timeout)
|
super().__init__(cwd=cwd, timeout=timeout)
|
||||||
|
|
||||||
if not ModalEnvironment._patches_applied:
|
|
||||||
try:
|
|
||||||
from environments.patches import apply_patches
|
|
||||||
apply_patches()
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
ModalEnvironment._patches_applied = True
|
|
||||||
|
|
||||||
self._persistent = persistent_filesystem
|
self._persistent = persistent_filesystem
|
||||||
self._task_id = task_id
|
self._task_id = task_id
|
||||||
self._base_image = image
|
self._base_image = image
|
||||||
|
self._deployment = None
|
||||||
|
self._worker = _AsyncWorker()
|
||||||
|
|
||||||
sandbox_kwargs = dict(modal_sandbox_kwargs or {})
|
sandbox_kwargs = dict(modal_sandbox_kwargs or {})
|
||||||
|
|
||||||
@@ -88,16 +116,37 @@ class ModalEnvironment(BaseEnvironment):
|
|||||||
|
|
||||||
effective_image = restored_image if restored_image else image
|
effective_image = restored_image if restored_image else image
|
||||||
|
|
||||||
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
|
# Pre-build a modal.Image with pip fix for Modal's legacy image builder.
|
||||||
self._inner = SwerexModalEnvironment(
|
# Some task images have broken pip; fix via ensurepip before Modal uses it.
|
||||||
image=effective_image,
|
import modal as _modal
|
||||||
cwd=cwd,
|
if isinstance(effective_image, str):
|
||||||
timeout=timeout,
|
effective_image = _modal.Image.from_registry(
|
||||||
startup_timeout=180.0,
|
effective_image,
|
||||||
runtime_timeout=3600.0,
|
setup_dockerfile_commands=[
|
||||||
modal_sandbox_kwargs=sandbox_kwargs,
|
"RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
|
||||||
install_pipx=True, # Required: installs pipx + swe-rex runtime (swerex-remote)
|
"python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
|
||||||
)
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Start the async worker thread and create the deployment on it
|
||||||
|
# so all gRPC channels are bound to the worker's event loop.
|
||||||
|
self._worker.start()
|
||||||
|
|
||||||
|
from swerex.deployment.modal import ModalDeployment
|
||||||
|
|
||||||
|
async def _create_and_start():
|
||||||
|
deployment = ModalDeployment(
|
||||||
|
image=effective_image,
|
||||||
|
startup_timeout=180.0,
|
||||||
|
runtime_timeout=3600.0,
|
||||||
|
deployment_timeout=3600.0,
|
||||||
|
install_pipx=True,
|
||||||
|
modal_sandbox_kwargs=sandbox_kwargs,
|
||||||
|
)
|
||||||
|
await deployment.start()
|
||||||
|
return deployment
|
||||||
|
|
||||||
|
self._deployment = self._worker.run_coroutine(_create_and_start())
|
||||||
|
|
||||||
def execute(self, command: str, cwd: str = "", *,
|
def execute(self, command: str, cwd: str = "", *,
|
||||||
timeout: int | None = None,
|
timeout: int | None = None,
|
||||||
@@ -114,21 +163,39 @@ class ModalEnvironment(BaseEnvironment):
|
|||||||
# subprocess stdin directly the way a local Popen can. When a sudo
|
# subprocess stdin directly the way a local Popen can. When a sudo
|
||||||
# password is present, use a shell-level pipe from printf so that the
|
# password is present, use a shell-level pipe from printf so that the
|
||||||
# password feeds sudo -S without appearing as an echo argument embedded
|
# password feeds sudo -S without appearing as an echo argument embedded
|
||||||
# in the shell string. The password is still visible in the remote
|
# in the shell string.
|
||||||
# sandbox's command line, but it is not exposed on the user's local
|
|
||||||
# machine — which is the primary threat being mitigated.
|
|
||||||
if sudo_stdin is not None:
|
if sudo_stdin is not None:
|
||||||
import shlex
|
import shlex
|
||||||
exec_command = (
|
exec_command = (
|
||||||
f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
|
f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from swerex.runtime.abstract import Command as RexCommand
|
||||||
|
|
||||||
|
effective_cwd = cwd or self.cwd
|
||||||
|
effective_timeout = timeout or self.timeout
|
||||||
|
|
||||||
# Run in a background thread so we can poll for interrupts
|
# Run in a background thread so we can poll for interrupts
|
||||||
result_holder = {"value": None, "error": None}
|
result_holder = {"value": None, "error": None}
|
||||||
|
|
||||||
def _run():
|
def _run():
|
||||||
try:
|
try:
|
||||||
result_holder["value"] = self._inner.execute(exec_command, cwd=cwd, timeout=timeout)
|
async def _do_execute():
|
||||||
|
return await self._deployment.runtime.execute(
|
||||||
|
RexCommand(
|
||||||
|
command=exec_command,
|
||||||
|
shell=True,
|
||||||
|
check=False,
|
||||||
|
cwd=effective_cwd,
|
||||||
|
timeout=effective_timeout,
|
||||||
|
merge_output_streams=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
output = self._worker.run_coroutine(_do_execute())
|
||||||
|
result_holder["value"] = {
|
||||||
|
"output": output.stdout,
|
||||||
|
"returncode": output.exit_code,
|
||||||
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
result_holder["error"] = e
|
result_holder["error"] = e
|
||||||
|
|
||||||
@@ -138,7 +205,10 @@ class ModalEnvironment(BaseEnvironment):
|
|||||||
t.join(timeout=0.2)
|
t.join(timeout=0.2)
|
||||||
if is_interrupted():
|
if is_interrupted():
|
||||||
try:
|
try:
|
||||||
self._inner.stop()
|
self._worker.run_coroutine(
|
||||||
|
asyncio.wait_for(self._deployment.stop(), timeout=10),
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return {
|
return {
|
||||||
@@ -152,35 +222,38 @@ class ModalEnvironment(BaseEnvironment):
|
|||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
"""Snapshot the filesystem (if persistent) then stop the sandbox."""
|
"""Snapshot the filesystem (if persistent) then stop the sandbox."""
|
||||||
# Check if _inner was ever set (init may have failed)
|
if self._deployment is None:
|
||||||
if not hasattr(self, '_inner') or self._inner is None:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if self._persistent:
|
if self._persistent:
|
||||||
try:
|
try:
|
||||||
sandbox = getattr(self._inner, 'deployment', None)
|
sandbox = getattr(self._deployment, '_sandbox', None)
|
||||||
sandbox = getattr(sandbox, '_sandbox', None) if sandbox else None
|
|
||||||
if sandbox:
|
if sandbox:
|
||||||
import asyncio
|
|
||||||
async def _snapshot():
|
async def _snapshot():
|
||||||
img = await sandbox.snapshot_filesystem.aio()
|
img = await sandbox.snapshot_filesystem.aio()
|
||||||
return img.object_id
|
return img.object_id
|
||||||
try:
|
|
||||||
snapshot_id = asyncio.run(_snapshot())
|
|
||||||
except RuntimeError:
|
|
||||||
import concurrent.futures
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
|
||||||
snapshot_id = pool.submit(
|
|
||||||
asyncio.run, _snapshot()
|
|
||||||
).result(timeout=60)
|
|
||||||
|
|
||||||
snapshots = _load_snapshots()
|
try:
|
||||||
snapshots[self._task_id] = snapshot_id
|
snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60)
|
||||||
_save_snapshots(snapshots)
|
except Exception:
|
||||||
logger.info("Modal: saved filesystem snapshot %s for task %s",
|
snapshot_id = None
|
||||||
snapshot_id[:20], self._task_id)
|
|
||||||
|
if snapshot_id:
|
||||||
|
snapshots = _load_snapshots()
|
||||||
|
snapshots[self._task_id] = snapshot_id
|
||||||
|
_save_snapshots(snapshots)
|
||||||
|
logger.info("Modal: saved filesystem snapshot %s for task %s",
|
||||||
|
snapshot_id[:20], self._task_id)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Modal: filesystem snapshot failed: %s", e)
|
logger.warning("Modal: filesystem snapshot failed: %s", e)
|
||||||
|
|
||||||
if hasattr(self._inner, 'stop'):
|
try:
|
||||||
self._inner.stop()
|
self._worker.run_coroutine(
|
||||||
|
asyncio.wait_for(self._deployment.stop(), timeout=10),
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
self._worker.stop()
|
||||||
|
self._deployment = None
|
||||||
|
|||||||
@@ -51,13 +51,6 @@ logger = logging.getLogger(__name__)
|
|||||||
from tools.interrupt import is_interrupted, _interrupt_event
|
from tools.interrupt import is_interrupted, _interrupt_event
|
||||||
|
|
||||||
|
|
||||||
# Add mini-swe-agent to path if not installed. In git worktrees the populated
|
|
||||||
# submodule may live in the main checkout rather than the worktree itself.
|
|
||||||
from minisweagent_path import ensure_minisweagent_on_path
|
|
||||||
|
|
||||||
ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Custom Singularity Environment with more space
|
# Custom Singularity Environment with more space
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -1188,27 +1181,15 @@ def terminal_tool(
|
|||||||
|
|
||||||
|
|
||||||
def check_terminal_requirements() -> bool:
|
def check_terminal_requirements() -> bool:
|
||||||
"""Check if all requirements for the terminal tool are met.
|
"""Check if all requirements for the terminal tool are met."""
|
||||||
|
|
||||||
Important: local and singularity backends now use Hermes' own environment
|
|
||||||
wrappers directly and do not require the ``minisweagent`` Python package to
|
|
||||||
be installed. Docker and Modal still rely on mini-swe-agent internals.
|
|
||||||
"""
|
|
||||||
config = _get_env_config()
|
config = _get_env_config()
|
||||||
env_type = config["env_type"]
|
env_type = config["env_type"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if env_type == "local":
|
if env_type == "local":
|
||||||
# Local execution uses Hermes' own LocalEnvironment wrapper and does
|
|
||||||
# not depend on minisweagent being importable.
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
elif env_type == "docker":
|
elif env_type == "docker":
|
||||||
ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
|
|
||||||
if importlib.util.find_spec("minisweagent") is None:
|
|
||||||
logger.error("mini-swe-agent is required for docker terminal backend but is not importable")
|
|
||||||
return False
|
|
||||||
# Check if docker is available (use find_docker for macOS PATH issues)
|
|
||||||
from tools.environments.docker import find_docker
|
from tools.environments.docker import find_docker
|
||||||
docker = find_docker()
|
docker = find_docker()
|
||||||
if not docker:
|
if not docker:
|
||||||
@@ -1225,7 +1206,6 @@ def check_terminal_requirements() -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
elif env_type == "ssh":
|
elif env_type == "ssh":
|
||||||
# Check that host and user are configured
|
|
||||||
if not config.get("ssh_host") or not config.get("ssh_user"):
|
if not config.get("ssh_host") or not config.get("ssh_user"):
|
||||||
logger.error(
|
logger.error(
|
||||||
"SSH backend selected but TERMINAL_SSH_HOST and TERMINAL_SSH_USER "
|
"SSH backend selected but TERMINAL_SSH_HOST and TERMINAL_SSH_USER "
|
||||||
@@ -1235,11 +1215,9 @@ def check_terminal_requirements() -> bool:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
elif env_type == "modal":
|
elif env_type == "modal":
|
||||||
ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
|
if importlib.util.find_spec("swerex") is None:
|
||||||
if importlib.util.find_spec("minisweagent") is None:
|
logger.error("swe-rex is required for modal terminal backend: pip install 'swe-rex[modal]'")
|
||||||
logger.error("mini-swe-agent is required for modal terminal backend but is not importable")
|
|
||||||
return False
|
return False
|
||||||
# Check for modal token
|
|
||||||
has_token = os.getenv("MODAL_TOKEN_ID") is not None
|
has_token = os.getenv("MODAL_TOKEN_ID") is not None
|
||||||
has_config = Path.home().joinpath(".modal.toml").exists()
|
has_config = Path.home().joinpath(".modal.toml").exists()
|
||||||
if not (has_token or has_config):
|
if not (has_token or has_config):
|
||||||
@@ -1269,7 +1247,7 @@ def check_terminal_requirements() -> bool:
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Simple test when run directly
|
# Simple test when run directly
|
||||||
print("Terminal Tool Module (mini-swe-agent backend)")
|
print("Terminal Tool Module")
|
||||||
print("=" * 50)
|
print("=" * 50)
|
||||||
|
|
||||||
config = _get_env_config()
|
config = _get_env_config()
|
||||||
|
|||||||
Reference in New Issue
Block a user