Files
hermes-agent/tests/tools/test_modal_sandbox_fixes.py
Siddharth Balyan f3006ebef9 refactor(tests): re-architect tests + fix CI failures (#5946)
* refactor: re-architect tests to mirror the codebase

* Update tests.yml

* fix: add missing tool_error imports after registry refactor

* fix(tests): replace patch.dict with monkeypatch to prevent env var leaks under xdist

patch.dict(os.environ) can leak TERMINAL_ENV across xdist workers,
causing test_code_execution tests to hit the Modal remote path.

* fix(tests): fix update_check and telegram xdist failures

- test_update_check: replace patch("hermes_cli.banner.os.getenv") with
  monkeypatch.setenv("HERMES_HOME") — banner.py no longer imports os
  directly, it uses get_hermes_home() from hermes_constants.

- test_telegram_conflict/approval_buttons: provide real exception classes
  for telegram.error mock (NetworkError, TimedOut, BadRequest) so the
  except clause in connect() doesn't fail with "catching classes that do
  not inherit from BaseException" when xdist pollutes sys.modules.

* fix(tests): accept unavailable_models kwarg in _prompt_model_selection mock
2026-04-07 17:19:07 -07:00

291 lines
12 KiB
Python

"""Tests for Modal sandbox infrastructure fixes (TBLite baseline).
Covers the bugs discovered while setting up TBLite evaluation:
1. Tool resolution — terminal + file tools load correctly
2. CWD fix — host paths get replaced with /root for container backends
3. ephemeral_disk version check
4. ensurepip fix in Modal image builder
5. No swe-rex dependency — uses native Modal SDK
6. /home/ added to host prefix check
"""
import os
import sys
from pathlib import Path
import pytest
# Ensure repo root is importable
_repo_root = Path(__file__).resolve().parent.parent.parent
if str(_repo_root) not in sys.path:
sys.path.insert(0, str(_repo_root))
try:
import tools.terminal_tool # noqa: F401
_tt_mod = sys.modules["tools.terminal_tool"]
except ImportError:
pytest.skip("hermes-agent tools not importable (missing deps)", allow_module_level=True)
# =========================================================================
# Test 1: Tool resolution includes terminal + file tools
# =========================================================================
class TestToolResolution:
"""Verify get_tool_definitions returns all expected tools for eval."""
def test_terminal_and_file_toolsets_resolve_all_tools(self):
"""enabled_toolsets=['terminal', 'file'] should produce 6 tools."""
from model_tools import get_tool_definitions
tools = get_tool_definitions(
enabled_toolsets=["terminal", "file"],
quiet_mode=True,
)
names = {t["function"]["name"] for t in tools}
expected = {"terminal", "process", "read_file", "write_file", "search_files", "patch"}
assert expected == names, f"Expected {expected}, got {names}"
def test_terminal_tool_present(self):
"""The terminal tool must be present (not silently dropped)."""
from model_tools import get_tool_definitions
tools = get_tool_definitions(
enabled_toolsets=["terminal", "file"],
quiet_mode=True,
)
names = [t["function"]["name"] for t in tools]
assert "terminal" in names, f"terminal tool missing! Only got: {names}."
# =========================================================================
# Test 2-4: CWD handling for container backends
# =========================================================================
class TestCwdHandling:
"""Verify host paths are sanitized for container backends."""
def test_home_path_replaced_for_modal(self, monkeypatch):
"""TERMINAL_CWD=/home/user/... should be replaced with /root for modal."""
monkeypatch.setenv("TERMINAL_ENV", "modal")
monkeypatch.setenv("TERMINAL_CWD", "/home/dakota/github/hermes-agent")
config = _tt_mod._get_env_config()
assert config["cwd"] == "/root", (
f"Expected /root, got {config['cwd']}. "
"/home/ paths should be replaced for modal backend."
)
def test_users_path_replaced_for_docker_by_default(self, monkeypatch):
"""Docker should keep host paths out of the sandbox unless explicitly enabled."""
monkeypatch.setenv("TERMINAL_ENV", "docker")
monkeypatch.setenv("TERMINAL_CWD", "/Users/someone/projects")
config = _tt_mod._get_env_config()
assert config["cwd"] == "/root", (
f"Expected /root, got {config['cwd']}. "
"Host paths should be discarded for docker backend by default."
)
assert config["host_cwd"] is None
assert config["docker_mount_cwd_to_workspace"] is False
def test_users_path_maps_to_workspace_for_docker_when_enabled(self, monkeypatch):
"""Docker should map the host cwd into /workspace only when explicitly enabled."""
monkeypatch.setenv("TERMINAL_ENV", "docker")
monkeypatch.setenv("TERMINAL_CWD", "/Users/someone/projects")
monkeypatch.setenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "true")
config = _tt_mod._get_env_config()
assert config["cwd"] == "/workspace"
assert config["host_cwd"] == "/Users/someone/projects"
assert config["docker_mount_cwd_to_workspace"] is True
def test_windows_path_replaced_for_modal(self, monkeypatch):
"""TERMINAL_CWD=C:\\Users\\... should be replaced for modal."""
monkeypatch.setenv("TERMINAL_ENV", "modal")
monkeypatch.setenv("TERMINAL_CWD", "C:\\Users\\someone\\projects")
config = _tt_mod._get_env_config()
assert config["cwd"] == "/root"
@pytest.mark.parametrize("backend", ["modal", "docker", "singularity", "daytona"])
def test_default_cwd_is_root_for_container_backends(self, backend, monkeypatch):
"""Container backends should default to /root, not ~."""
monkeypatch.setenv("TERMINAL_ENV", backend)
monkeypatch.delenv("TERMINAL_CWD", raising=False)
monkeypatch.delenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", raising=False)
config = _tt_mod._get_env_config()
assert config["cwd"] == "/root", (
f"Backend {backend}: expected /root default, got {config['cwd']}"
)
def test_docker_default_cwd_maps_current_directory_when_enabled(self, monkeypatch):
"""Docker should use /workspace when cwd mounting is explicitly enabled."""
monkeypatch.setattr("tools.terminal_tool.os.getcwd", lambda: "/home/user/project")
monkeypatch.setenv("TERMINAL_ENV", "docker")
monkeypatch.setenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "true")
monkeypatch.delenv("TERMINAL_CWD", raising=False)
config = _tt_mod._get_env_config()
assert config["cwd"] == "/workspace"
assert config["host_cwd"] == "/home/user/project"
def test_local_backend_uses_getcwd(self, monkeypatch):
"""Local backend should use os.getcwd(), not /root."""
monkeypatch.setenv("TERMINAL_ENV", "local")
monkeypatch.delenv("TERMINAL_CWD", raising=False)
config = _tt_mod._get_env_config()
assert config["cwd"] == os.getcwd()
def test_create_environment_passes_docker_host_cwd_and_flag(self, monkeypatch):
"""Docker host cwd and mount flag should reach DockerEnvironment."""
captured = {}
sentinel = object()
def _fake_docker_environment(**kwargs):
captured.update(kwargs)
return sentinel
monkeypatch.setattr(_tt_mod, "_DockerEnvironment", _fake_docker_environment)
env = _tt_mod._create_environment(
env_type="docker",
image="python:3.11",
cwd="/workspace",
timeout=60,
container_config={"docker_mount_cwd_to_workspace": True},
host_cwd="/home/user/project",
)
assert env is sentinel
assert captured["cwd"] == "/workspace"
assert captured["host_cwd"] == "/home/user/project"
assert captured["auto_mount_cwd"] is True
def test_ssh_preserves_home_paths(self, monkeypatch):
"""SSH backend should NOT replace /home/ paths (they're valid remotely)."""
monkeypatch.setenv("TERMINAL_ENV", "ssh")
monkeypatch.setenv("TERMINAL_CWD", "/home/remote-user/work")
monkeypatch.setenv("TERMINAL_SSH_HOST", "example.com")
monkeypatch.setenv("TERMINAL_SSH_USER", "user")
config = _tt_mod._get_env_config()
assert config["cwd"] == "/home/remote-user/work", (
"SSH backend should preserve /home/ paths"
)
# =========================================================================
# Test 5: ephemeral_disk version check
# =========================================================================
class TestEphemeralDiskCheck:
"""Verify ephemeral_disk is only passed when modal supports it."""
def test_ephemeral_disk_skipped_when_unsupported(self, monkeypatch):
"""If modal.Sandbox.create doesn't have ephemeral_disk param, skip it."""
import inspect
mock_params = {
"args": inspect.Parameter("args", inspect.Parameter.VAR_POSITIONAL),
"image": inspect.Parameter("image", inspect.Parameter.KEYWORD_ONLY),
"timeout": inspect.Parameter("timeout", inspect.Parameter.KEYWORD_ONLY),
"cpu": inspect.Parameter("cpu", inspect.Parameter.KEYWORD_ONLY),
"memory": inspect.Parameter("memory", inspect.Parameter.KEYWORD_ONLY),
}
monkeypatch.setenv("TERMINAL_ENV", "modal")
config = _tt_mod._get_env_config()
# The config has container_disk default of 51200
disk = config.get("container_disk", 51200)
assert disk > 0, "disk should default to > 0"
# Simulate the version check logic from terminal_tool.py
sandbox_kwargs = {}
if disk > 0:
try:
if "ephemeral_disk" in mock_params:
sandbox_kwargs["ephemeral_disk"] = disk
except Exception:
pass
assert "ephemeral_disk" not in sandbox_kwargs, (
"ephemeral_disk should not be set when Sandbox.create doesn't support it"
)
# =========================================================================
# Test 6: ModalEnvironment defaults
# =========================================================================
class TestModalEnvironmentDefaults:
"""Verify ModalEnvironment has correct defaults."""
def test_default_cwd_is_root(self):
"""ModalEnvironment default cwd should be /root, not ~."""
from tools.environments.modal import ModalEnvironment
import inspect
sig = inspect.signature(ModalEnvironment.__init__)
cwd_default = sig.parameters["cwd"].default
assert cwd_default == "/root", (
f"ModalEnvironment cwd default should be /root, got {cwd_default!r}. "
"Tilde ~ is not expanded by subprocess.run(cwd=...)."
)
# =========================================================================
# Test 7: ensurepip fix in ModalEnvironment
# =========================================================================
class TestEnsurepipFix:
"""Verify the pip fix is applied in the ModalEnvironment init."""
def test_modal_environment_creates_image_with_setup_commands(self):
"""ModalEnvironment.__init__ should create a modal.Image with pip fix."""
try:
from tools.environments.modal import ModalEnvironment
except ImportError:
pytest.skip("tools.environments.modal not importable")
import inspect
source = inspect.getsource(ModalEnvironment.__init__)
assert "ensurepip" in source, (
"ModalEnvironment should include ensurepip fix "
"for Modal's legacy image builder"
)
assert "setup_dockerfile_commands" in source, (
"ModalEnvironment should use setup_dockerfile_commands "
"to fix pip before Modal's bootstrap"
)
def test_modal_environment_uses_native_sdk(self):
"""ModalEnvironment should use Modal SDK directly, not swe-rex."""
try:
from tools.environments.modal import ModalEnvironment
except ImportError:
pytest.skip("tools.environments.modal not importable")
import inspect
source = inspect.getsource(ModalEnvironment)
assert "swerex" not in source.lower(), (
"ModalEnvironment should not depend on swe-rex; "
"use Modal SDK directly via Sandbox.create() + exec()"
)
assert "Sandbox.create.aio" in source, (
"ModalEnvironment should use async Modal Sandbox.create.aio()"
)
assert "exec.aio" in source, (
"ModalEnvironment should use Sandbox.exec.aio() for command execution"
)
# =========================================================================
# Test 8: Host prefix list completeness
# =========================================================================
class TestHostPrefixList:
"""Verify the host prefix list catches common host-only paths."""
def test_all_common_host_prefixes_caught(self):
"""The host prefix check should catch /Users/, /home/, C:\\, C:/."""
# Read the actual source to verify the prefixes
import inspect
source = inspect.getsource(_tt_mod._get_env_config)
for prefix in ["/Users/", "/home/", 'C:\\\\"', "C:/"]:
# Normalize for source comparison
check = prefix.rstrip('"')
assert check in source or prefix in source, (
f"Host prefix {prefix!r} not found in _get_env_config. "
"Container backends need this to avoid using host paths."
)