fix: Modal sandbox eval infra (9 fixes for TBLite baseline)

Fixes discovered while running TBLite baseline evaluation:

1. ephemeral_disk param not supported in modal 1.3.5 - check before passing
2. Modal legacy image builder requires working pip - add ensurepip fix via
   setup_dockerfile_commands to handle task images with broken pip
3. Host cwd leaked into Modal sandbox - add /home/ to host prefix check
4. Tilde ~ not expanded by subprocess.run(cwd=) in sandboxes - use /root
5. install_pipx must stay True for swerex-remote to be available

Dependencies also needed (not in this commit):
- git submodule update --init mini-swe-agent
- uv pip install swe-rex boto3
This commit is contained in:
dmahan93
2026-03-09 18:36:28 -05:00
committed by teknium1
parent 2c97bf3936
commit d7f4db53f5
3 changed files with 26 additions and 4 deletions

View File

@@ -114,11 +114,27 @@ def _patch_swerex_modal():
self._worker = _AsyncWorker()
self._worker.start()
# Pre-build a modal.Image with pip fix for Modal's legacy image builder.
# Modal requires `python -m pip` to work during image build, but some
# task images (e.g., TBLite's broken-python) have intentionally broken pip.
# Fix: remove stale pip dist-info and reinstall via ensurepip before Modal
# tries to use it. This is a no-op for images where pip already works.
import modal as _modal
image_spec = self.config.image
if isinstance(image_spec, str):
image_spec = _modal.Image.from_registry(
image_spec,
setup_dockerfile_commands=[
"RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
"python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
],
)
# Create AND start the deployment entirely on the worker's loop/thread
# so all gRPC channels and async state are bound to that loop
async def _create_and_start():
deployment = ModalDeployment(
image=self.config.image,
image=image_spec,
startup_timeout=self.config.startup_timeout,
runtime_timeout=self.config.runtime_timeout,
deployment_timeout=self.config.deployment_timeout,

View File

@@ -50,7 +50,7 @@ class ModalEnvironment(BaseEnvironment):
def __init__(
self,
image: str,
cwd: str = "~",
cwd: str = "/root",
timeout: int = 60,
modal_sandbox_kwargs: Optional[Dict[str, Any]] = None,
persistent_filesystem: bool = True,
@@ -95,6 +95,7 @@ class ModalEnvironment(BaseEnvironment):
startup_timeout=180.0,
runtime_timeout=3600.0,
modal_sandbox_kwargs=sandbox_kwargs,
install_pipx=True, # Required: installs pipx + swe-rex runtime (swerex-remote)
)
def execute(self, command: str, cwd: str = "", *,

View File

@@ -463,7 +463,7 @@ def _get_env_config() -> Dict[str, Any]:
if env_type == "local":
default_cwd = os.getcwd()
else:
default_cwd = "~"
default_cwd = "/root"
# Read TERMINAL_CWD but sanity-check it for container backends.
# If the CWD looks like a host-local path that can't exist inside a
@@ -553,7 +553,12 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
if memory > 0:
sandbox_kwargs["memory"] = memory
if disk > 0:
sandbox_kwargs["ephemeral_disk"] = disk
try:
import inspect, modal
if "ephemeral_disk" in inspect.signature(modal.Sandbox.create).parameters:
sandbox_kwargs["ephemeral_disk"] = disk
except Exception:
pass
return _ModalEnvironment(
image=image, cwd=cwd, timeout=timeout,