2026-03-09 20:26:09 -05:00
|
|
|
"""Tests for Modal sandbox infrastructure fixes (TBLite baseline).
|
|
|
|
|
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
Covers the bugs discovered while setting up TBLite evaluation:
|
|
|
|
|
1. Tool resolution — terminal + file tools load correctly
|
2026-03-09 20:26:09 -05:00
|
|
|
2. CWD fix — host paths get replaced with /root for container backends
|
|
|
|
|
3. ephemeral_disk version check
|
|
|
|
|
4. Tilde ~ replaced with /root for container backends
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
5. ensurepip fix in Modal image builder
|
2026-03-09 20:26:09 -05:00
|
|
|
6. install_pipx stays True for swerex-remote
|
|
|
|
|
7. /home/ added to host prefix check
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
import sys
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
# Ensure repo root is importable
|
|
|
|
|
_repo_root = Path(__file__).resolve().parent.parent.parent
|
|
|
|
|
if str(_repo_root) not in sys.path:
|
|
|
|
|
sys.path.insert(0, str(_repo_root))
|
|
|
|
|
|
2026-03-09 23:14:53 -05:00
|
|
|
try:
|
|
|
|
|
import tools.terminal_tool # noqa: F401
|
|
|
|
|
_tt_mod = sys.modules["tools.terminal_tool"]
|
|
|
|
|
except ImportError:
|
|
|
|
|
pytest.skip("hermes-agent tools not importable (missing deps)", allow_module_level=True)
|
2026-03-09 20:26:09 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
# Test 1: Tool resolution includes terminal + file tools
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
class TestToolResolution:
|
|
|
|
|
"""Verify get_tool_definitions returns all expected tools for eval."""
|
|
|
|
|
|
|
|
|
|
def test_terminal_and_file_toolsets_resolve_all_tools(self):
|
|
|
|
|
"""enabled_toolsets=['terminal', 'file'] should produce 6 tools."""
|
|
|
|
|
from model_tools import get_tool_definitions
|
|
|
|
|
tools = get_tool_definitions(
|
|
|
|
|
enabled_toolsets=["terminal", "file"],
|
|
|
|
|
quiet_mode=True,
|
|
|
|
|
)
|
|
|
|
|
names = {t["function"]["name"] for t in tools}
|
|
|
|
|
expected = {"terminal", "process", "read_file", "write_file", "search_files", "patch"}
|
|
|
|
|
assert expected == names, f"Expected {expected}, got {names}"
|
|
|
|
|
|
|
|
|
|
def test_terminal_tool_present(self):
|
|
|
|
|
"""The terminal tool must be present (not silently dropped)."""
|
|
|
|
|
from model_tools import get_tool_definitions
|
|
|
|
|
tools = get_tool_definitions(
|
|
|
|
|
enabled_toolsets=["terminal", "file"],
|
|
|
|
|
quiet_mode=True,
|
|
|
|
|
)
|
|
|
|
|
names = [t["function"]["name"] for t in tools]
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
assert "terminal" in names, f"terminal tool missing! Only got: {names}."
|
2026-03-09 20:26:09 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
# Test 2-4: CWD handling for container backends
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
class TestCwdHandling:
|
|
|
|
|
"""Verify host paths are sanitized for container backends."""
|
|
|
|
|
|
|
|
|
|
def test_home_path_replaced_for_modal(self):
|
|
|
|
|
"""TERMINAL_CWD=/home/user/... should be replaced with /root for modal."""
|
|
|
|
|
with patch.dict(os.environ, {
|
|
|
|
|
"TERMINAL_ENV": "modal",
|
|
|
|
|
"TERMINAL_CWD": "/home/dakota/github/hermes-agent",
|
|
|
|
|
}):
|
|
|
|
|
config = _tt_mod._get_env_config()
|
|
|
|
|
assert config["cwd"] == "/root", (
|
|
|
|
|
f"Expected /root, got {config['cwd']}. "
|
|
|
|
|
"/home/ paths should be replaced for modal backend."
|
|
|
|
|
)
|
|
|
|
|
|
2026-03-16 05:19:43 -07:00
|
|
|
def test_users_path_replaced_for_docker_by_default(self):
|
|
|
|
|
"""Docker should keep host paths out of the sandbox unless explicitly enabled."""
|
2026-03-09 20:26:09 -05:00
|
|
|
with patch.dict(os.environ, {
|
|
|
|
|
"TERMINAL_ENV": "docker",
|
|
|
|
|
"TERMINAL_CWD": "/Users/someone/projects",
|
|
|
|
|
}):
|
|
|
|
|
config = _tt_mod._get_env_config()
|
|
|
|
|
assert config["cwd"] == "/root", (
|
|
|
|
|
f"Expected /root, got {config['cwd']}. "
|
2026-03-16 05:19:43 -07:00
|
|
|
"Host paths should be discarded for docker backend by default."
|
2026-03-09 20:26:09 -05:00
|
|
|
)
|
2026-03-16 05:19:43 -07:00
|
|
|
assert config["host_cwd"] is None
|
|
|
|
|
assert config["docker_mount_cwd_to_workspace"] is False
|
|
|
|
|
|
|
|
|
|
def test_users_path_maps_to_workspace_for_docker_when_enabled(self):
|
|
|
|
|
"""Docker should map the host cwd into /workspace only when explicitly enabled."""
|
|
|
|
|
with patch.dict(os.environ, {
|
|
|
|
|
"TERMINAL_ENV": "docker",
|
|
|
|
|
"TERMINAL_CWD": "/Users/someone/projects",
|
|
|
|
|
"TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true",
|
|
|
|
|
}):
|
|
|
|
|
config = _tt_mod._get_env_config()
|
|
|
|
|
assert config["cwd"] == "/workspace"
|
|
|
|
|
assert config["host_cwd"] == "/Users/someone/projects"
|
|
|
|
|
assert config["docker_mount_cwd_to_workspace"] is True
|
2026-03-09 20:26:09 -05:00
|
|
|
|
|
|
|
|
def test_windows_path_replaced_for_modal(self):
|
|
|
|
|
"""TERMINAL_CWD=C:\\Users\\... should be replaced for modal."""
|
|
|
|
|
with patch.dict(os.environ, {
|
|
|
|
|
"TERMINAL_ENV": "modal",
|
|
|
|
|
"TERMINAL_CWD": "C:\\Users\\someone\\projects",
|
|
|
|
|
}):
|
|
|
|
|
config = _tt_mod._get_env_config()
|
|
|
|
|
assert config["cwd"] == "/root"
|
|
|
|
|
|
|
|
|
|
def test_default_cwd_is_root_for_container_backends(self):
|
|
|
|
|
"""Container backends should default to /root, not ~."""
|
|
|
|
|
for backend in ("modal", "docker", "singularity", "daytona"):
|
|
|
|
|
with patch.dict(os.environ, {"TERMINAL_ENV": backend}, clear=False):
|
|
|
|
|
# Remove TERMINAL_CWD so it uses default
|
|
|
|
|
env = os.environ.copy()
|
|
|
|
|
env.pop("TERMINAL_CWD", None)
|
2026-03-16 05:19:43 -07:00
|
|
|
env.pop("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", None)
|
2026-03-09 20:26:09 -05:00
|
|
|
with patch.dict(os.environ, env, clear=True):
|
|
|
|
|
config = _tt_mod._get_env_config()
|
|
|
|
|
assert config["cwd"] == "/root", (
|
|
|
|
|
f"Backend {backend}: expected /root default, got {config['cwd']}"
|
|
|
|
|
)
|
|
|
|
|
|
2026-03-16 05:19:43 -07:00
|
|
|
def test_docker_default_cwd_maps_current_directory_when_enabled(self):
|
|
|
|
|
"""Docker should use /workspace when cwd mounting is explicitly enabled."""
|
|
|
|
|
with patch("tools.terminal_tool.os.getcwd", return_value="/home/user/project"):
|
|
|
|
|
with patch.dict(os.environ, {
|
|
|
|
|
"TERMINAL_ENV": "docker",
|
|
|
|
|
"TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true",
|
|
|
|
|
}, clear=False):
|
|
|
|
|
env = os.environ.copy()
|
|
|
|
|
env.pop("TERMINAL_CWD", None)
|
|
|
|
|
with patch.dict(os.environ, env, clear=True):
|
|
|
|
|
config = _tt_mod._get_env_config()
|
|
|
|
|
assert config["cwd"] == "/workspace"
|
|
|
|
|
assert config["host_cwd"] == "/home/user/project"
|
|
|
|
|
|
2026-03-09 20:26:09 -05:00
|
|
|
def test_local_backend_uses_getcwd(self):
|
|
|
|
|
"""Local backend should use os.getcwd(), not /root."""
|
|
|
|
|
with patch.dict(os.environ, {"TERMINAL_ENV": "local"}, clear=False):
|
|
|
|
|
env = os.environ.copy()
|
|
|
|
|
env.pop("TERMINAL_CWD", None)
|
|
|
|
|
with patch.dict(os.environ, env, clear=True):
|
|
|
|
|
config = _tt_mod._get_env_config()
|
|
|
|
|
assert config["cwd"] == os.getcwd()
|
|
|
|
|
|
2026-03-16 05:19:43 -07:00
|
|
|
def test_create_environment_passes_docker_host_cwd_and_flag(self, monkeypatch):
|
|
|
|
|
"""Docker host cwd and mount flag should reach DockerEnvironment."""
|
|
|
|
|
captured = {}
|
|
|
|
|
sentinel = object()
|
|
|
|
|
|
|
|
|
|
def _fake_docker_environment(**kwargs):
|
|
|
|
|
captured.update(kwargs)
|
|
|
|
|
return sentinel
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(_tt_mod, "_DockerEnvironment", _fake_docker_environment)
|
|
|
|
|
|
|
|
|
|
env = _tt_mod._create_environment(
|
|
|
|
|
env_type="docker",
|
|
|
|
|
image="python:3.11",
|
|
|
|
|
cwd="/workspace",
|
|
|
|
|
timeout=60,
|
|
|
|
|
container_config={"docker_mount_cwd_to_workspace": True},
|
|
|
|
|
host_cwd="/home/user/project",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert env is sentinel
|
|
|
|
|
assert captured["cwd"] == "/workspace"
|
|
|
|
|
assert captured["host_cwd"] == "/home/user/project"
|
|
|
|
|
assert captured["auto_mount_cwd"] is True
|
|
|
|
|
|
2026-03-09 20:26:09 -05:00
|
|
|
def test_ssh_preserves_home_paths(self):
|
|
|
|
|
"""SSH backend should NOT replace /home/ paths (they're valid remotely)."""
|
|
|
|
|
with patch.dict(os.environ, {
|
|
|
|
|
"TERMINAL_ENV": "ssh",
|
|
|
|
|
"TERMINAL_CWD": "/home/remote-user/work",
|
|
|
|
|
"TERMINAL_SSH_HOST": "example.com",
|
|
|
|
|
"TERMINAL_SSH_USER": "user",
|
|
|
|
|
}):
|
|
|
|
|
config = _tt_mod._get_env_config()
|
|
|
|
|
assert config["cwd"] == "/home/remote-user/work", (
|
|
|
|
|
"SSH backend should preserve /home/ paths"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
# Test 5: ephemeral_disk version check
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
class TestEphemeralDiskCheck:
|
|
|
|
|
"""Verify ephemeral_disk is only passed when modal supports it."""
|
|
|
|
|
|
|
|
|
|
def test_ephemeral_disk_skipped_when_unsupported(self):
|
|
|
|
|
"""If modal.Sandbox.create doesn't have ephemeral_disk param, skip it."""
|
|
|
|
|
# Mock the modal import and Sandbox.create signature
|
|
|
|
|
mock_modal = MagicMock()
|
|
|
|
|
mock_sandbox_create = MagicMock()
|
|
|
|
|
# Simulate a signature WITHOUT ephemeral_disk
|
|
|
|
|
import inspect
|
|
|
|
|
mock_params = {
|
|
|
|
|
"args": inspect.Parameter("args", inspect.Parameter.VAR_POSITIONAL),
|
|
|
|
|
"image": inspect.Parameter("image", inspect.Parameter.KEYWORD_ONLY),
|
|
|
|
|
"timeout": inspect.Parameter("timeout", inspect.Parameter.KEYWORD_ONLY),
|
|
|
|
|
"cpu": inspect.Parameter("cpu", inspect.Parameter.KEYWORD_ONLY),
|
|
|
|
|
"memory": inspect.Parameter("memory", inspect.Parameter.KEYWORD_ONLY),
|
|
|
|
|
}
|
|
|
|
|
mock_sig = inspect.Signature(parameters=list(mock_params.values()))
|
|
|
|
|
|
|
|
|
|
with patch.dict(os.environ, {"TERMINAL_ENV": "modal"}):
|
|
|
|
|
config = _tt_mod._get_env_config()
|
|
|
|
|
# The config has container_disk default of 51200
|
|
|
|
|
disk = config.get("container_disk", 51200)
|
|
|
|
|
assert disk > 0, "disk should default to > 0"
|
|
|
|
|
|
|
|
|
|
# Simulate the version check logic from terminal_tool.py
|
|
|
|
|
sandbox_kwargs = {}
|
|
|
|
|
if disk > 0:
|
|
|
|
|
try:
|
|
|
|
|
if "ephemeral_disk" in mock_params:
|
|
|
|
|
sandbox_kwargs["ephemeral_disk"] = disk
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
assert "ephemeral_disk" not in sandbox_kwargs, (
|
|
|
|
|
"ephemeral_disk should not be set when Sandbox.create doesn't support it"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
# Test 6: ModalEnvironment defaults
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
class TestModalEnvironmentDefaults:
|
|
|
|
|
"""Verify ModalEnvironment has correct defaults."""
|
|
|
|
|
|
|
|
|
|
def test_default_cwd_is_root(self):
|
|
|
|
|
"""ModalEnvironment default cwd should be /root, not ~."""
|
|
|
|
|
from tools.environments.modal import ModalEnvironment
|
|
|
|
|
import inspect
|
|
|
|
|
sig = inspect.signature(ModalEnvironment.__init__)
|
|
|
|
|
cwd_default = sig.parameters["cwd"].default
|
|
|
|
|
assert cwd_default == "/root", (
|
|
|
|
|
f"ModalEnvironment cwd default should be /root, got {cwd_default!r}. "
|
|
|
|
|
"Tilde ~ is not expanded by subprocess.run(cwd=...)."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
# Test 7: ensurepip fix in patches.py
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
class TestEnsurepipFix:
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
"""Verify the pip fix is applied in the ModalEnvironment init."""
|
2026-03-09 20:26:09 -05:00
|
|
|
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
def test_modal_environment_creates_image_with_setup_commands(self):
|
|
|
|
|
"""ModalEnvironment.__init__ should create a modal.Image with pip fix."""
|
2026-03-09 20:26:09 -05:00
|
|
|
try:
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
from tools.environments.modal import ModalEnvironment
|
2026-03-09 20:26:09 -05:00
|
|
|
except ImportError:
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
pytest.skip("tools.environments.modal not importable")
|
2026-03-09 20:26:09 -05:00
|
|
|
|
|
|
|
|
import inspect
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
source = inspect.getsource(ModalEnvironment.__init__)
|
2026-03-09 20:26:09 -05:00
|
|
|
assert "ensurepip" in source, (
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
"ModalEnvironment should include ensurepip fix "
|
2026-03-09 20:26:09 -05:00
|
|
|
"for Modal's legacy image builder"
|
|
|
|
|
)
|
|
|
|
|
assert "setup_dockerfile_commands" in source, (
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
"ModalEnvironment should use setup_dockerfile_commands "
|
2026-03-09 20:26:09 -05:00
|
|
|
"to fix pip before Modal's bootstrap"
|
|
|
|
|
)
|
|
|
|
|
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
def test_modal_environment_uses_install_pipx(self):
|
|
|
|
|
"""ModalEnvironment should pass install_pipx to ModalDeployment."""
|
2026-03-09 20:26:09 -05:00
|
|
|
try:
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
from tools.environments.modal import ModalEnvironment
|
2026-03-09 20:26:09 -05:00
|
|
|
except ImportError:
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
pytest.skip("tools.environments.modal not importable")
|
2026-03-09 20:26:09 -05:00
|
|
|
|
|
|
|
|
import inspect
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
source = inspect.getsource(ModalEnvironment.__init__)
|
2026-03-09 20:26:09 -05:00
|
|
|
assert "install_pipx" in source, (
|
refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)
Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.
Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
minisweagent's DockerEnvironment). Our wrapper already handled
execute(), cleanup(), security hardening, volumes, and resource limits.
Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
into ModalEnvironment for Atropos compatibility without monkey-patching.
Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md
No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
2026-03-24 07:30:25 -07:00
|
|
|
"ModalEnvironment should pass install_pipx to ModalDeployment"
|
2026-03-09 20:26:09 -05:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# =========================================================================
|
|
|
|
|
# Test 8: Host prefix list completeness
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
class TestHostPrefixList:
|
|
|
|
|
"""Verify the host prefix list catches common host-only paths."""
|
|
|
|
|
|
|
|
|
|
def test_all_common_host_prefixes_caught(self):
|
|
|
|
|
"""The host prefix check should catch /Users/, /home/, C:\\, C:/."""
|
|
|
|
|
# Read the actual source to verify the prefixes
|
|
|
|
|
import inspect
|
|
|
|
|
source = inspect.getsource(_tt_mod._get_env_config)
|
|
|
|
|
for prefix in ["/Users/", "/home/", 'C:\\\\"', "C:/"]:
|
|
|
|
|
# Normalize for source comparison
|
|
|
|
|
check = prefix.rstrip('"')
|
|
|
|
|
assert check in source or prefix in source, (
|
|
|
|
|
f"Host prefix {prefix!r} not found in _get_env_config. "
|
|
|
|
|
"Container backends need this to avoid using host paths."
|
|
|
|
|
)
|