From d7f4db53f585569c8d9f20f7fe622d6ecce39bdc Mon Sep 17 00:00:00 2001 From: dmahan93 Date: Mon, 9 Mar 2026 18:36:28 -0500 Subject: [PATCH] fix: Modal sandbox eval infra (9 fixes for TBLite baseline) Fixes discovered while running TBLite baseline evaluation: 1. ephemeral_disk param not supported in modal 1.3.5 - check before passing 2. Modal legacy image builder requires working pip - add ensurepip fix via setup_dockerfile_commands to handle task images with broken pip 3. Host cwd leaked into Modal sandbox - add /home/ to host prefix check 4. Tilde ~ not expanded by subprocess.run(cwd=) in sandboxes - use /root 5. install_pipx must stay True for swerex-remote to be available Dependencies also needed (not in this commit): - git submodule update --init mini-swe-agent - uv pip install swe-rex boto3 --- environments/patches.py | 18 +++++++++++++++++- tools/environments/modal.py | 3 ++- tools/terminal_tool.py | 9 +++++++-- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/environments/patches.py b/environments/patches.py index f6cfaeb45..3c5ed2cd1 100644 --- a/environments/patches.py +++ b/environments/patches.py @@ -114,11 +114,27 @@ def _patch_swerex_modal(): self._worker = _AsyncWorker() self._worker.start() + # Pre-build a modal.Image with pip fix for Modal's legacy image builder. + # Modal requires `python -m pip` to work during image build, but some + # task images (e.g., TBLite's broken-python) have intentionally broken pip. + # Fix: remove stale pip dist-info and reinstall via ensurepip before Modal + # tries to use it. This is a no-op for images where pip already works. + import modal as _modal + image_spec = self.config.image + if isinstance(image_spec, str): + image_spec = _modal.Image.from_registry( + image_spec, + setup_dockerfile_commands=[ + "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; " + "python -m ensurepip --upgrade --default-pip 2>/dev/null || true", + ], + ) + # Create AND start the deployment entirely on the worker's loop/thread # so all gRPC channels and async state are bound to that loop async def _create_and_start(): deployment = ModalDeployment( - image=self.config.image, + image=image_spec, startup_timeout=self.config.startup_timeout, runtime_timeout=self.config.runtime_timeout, deployment_timeout=self.config.deployment_timeout, diff --git a/tools/environments/modal.py b/tools/environments/modal.py index dbdd0a7c9..44ad51eba 100644 --- a/tools/environments/modal.py +++ b/tools/environments/modal.py @@ -50,7 +50,7 @@ class ModalEnvironment(BaseEnvironment): def __init__( self, image: str, - cwd: str = "~", + cwd: str = "/root", timeout: int = 60, modal_sandbox_kwargs: Optional[Dict[str, Any]] = None, persistent_filesystem: bool = True, @@ -95,6 +95,7 @@ class ModalEnvironment(BaseEnvironment): startup_timeout=180.0, runtime_timeout=3600.0, modal_sandbox_kwargs=sandbox_kwargs, + install_pipx=True, # Required: installs pipx + swe-rex runtime (swerex-remote) ) def execute(self, command: str, cwd: str = "", *, diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 18d1629e1..d124dba9d 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -463,7 +463,7 @@ def _get_env_config() -> Dict[str, Any]: if env_type == "local": default_cwd = os.getcwd() else: - default_cwd = "~" + default_cwd = "/root" # Read TERMINAL_CWD but sanity-check it for container backends. # If the CWD looks like a host-local path that can't exist inside a @@ -553,7 +553,12 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int, if memory > 0: sandbox_kwargs["memory"] = memory if disk > 0: - sandbox_kwargs["ephemeral_disk"] = disk + try: + import inspect, modal + if "ephemeral_disk" in inspect.signature(modal.Sandbox.create).parameters: + sandbox_kwargs["ephemeral_disk"] = disk + except Exception: + pass return _ModalEnvironment( image=image, cwd=cwd, timeout=timeout,