docs: add Daytona to batch_runner, process_registry, agent_loop, tool_context

Add daytona_image to batch_runner per-prompt container image overrides so batch processing works with the Daytona backend. Update inline comments in RL environment files (agent_loop, tool_context) and process_registry docstrings to include Daytona in backend lists.
2026-03-06 03:49:59 -08:00
parent 3982fcf095
commit 3670089a42
4 changed files with 8 additions and 7 deletions
--- a/batch_runner.py
+++ b/batch_runner.py
@@ -250,7 +250,7 @@ def _process_single_prompt(
    task_id = f"task_{prompt_index}"
    
    # Per-prompt container image override: if the dataset row has an 'image' field,
-    # register it for this task's sandbox. Works with Docker, Modal, and Singularity.
+    # register it for this task's sandbox. Works with Docker, Modal, Singularity, and Daytona.
    container_image = prompt_data.get("image") or prompt_data.get("docker_image")
    if container_image:
        # Verify the image is accessible before spending tokens on the agent loop.
@@ -292,6 +292,7 @@ def _process_single_prompt(
            "docker_image": container_image,
            "modal_image": container_image,
            "singularity_image": f"docker://{container_image}",
+            "daytona_image": container_image,
        }
        if prompt_data.get("cwd"):
            overrides["cwd"] = prompt_data["cwd"]
--- a/environments/agent_loop.py
+++ b/environments/agent_loop.py
@@ -23,7 +23,7 @@ from typing import Any, Dict, List, Optional, Set
 from model_tools import handle_function_call

 # Thread pool for running sync tool calls that internally use asyncio.run()
-# (e.g., mini-swe-agent's modal/docker backends). Running them in a separate
+# (e.g., mini-swe-agent's modal/docker/daytona backends). Running them in a separate
 # thread gives them a clean event loop so they don't deadlock inside Atropos's loop.
 # Size must be large enough for concurrent eval tasks (e.g., 89 TB2 tasks all
 # making tool calls). Too small = thread pool starvation, tasks queue for minutes.
@@ -336,7 +336,7 @@ class HermesAgentLoop:
                                tool_elapsed = _time.monotonic() - tool_submit_time
                            else:
                                # Run tool calls in a thread pool so backends that
-                                # use asyncio.run() internally (modal, docker) get
+                                # use asyncio.run() internally (modal, docker, daytona) get
                                # a clean event loop instead of deadlocking.
                                loop = asyncio.get_event_loop()
                                # Capture current tool_name/args for the lambda
--- a/environments/tool_context.py
+++ b/environments/tool_context.py
@@ -44,7 +44,7 @@ _tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
 def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str) -> str:
    """
    Run a tool call in a thread pool executor so backends that use asyncio.run()
-    internally (modal, docker) get a clean event loop.
+    internally (modal, docker, daytona) get a clean event loop.

    If we're already in an async context, executes handle_function_call() in a
    disposable worker thread and blocks for the result.
@@ -95,7 +95,7 @@ class ToolContext:
        backend = os.getenv("TERMINAL_ENV", "local")
        logger.debug("ToolContext.terminal [%s backend] task=%s: %s", backend, self.task_id[:8], command[:100])

-        # Run via thread helper so modal/docker backends' asyncio.run() doesn't deadlock
+        # Run via thread helper so modal/docker/daytona backends' asyncio.run() doesn't deadlock
        result = _run_tool_in_thread(
            "terminal",
            {"command": command, "timeout": timeout},
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@@ -11,7 +11,7 @@ Tracks processes spawned via terminal(background=true), providing:

 Background processes execute THROUGH the environment interface -- nothing
 runs on the host machine unless TERMINAL_ENV=local. For Docker, Singularity,
-Modal, and SSH backends, the command runs inside the sandbox.
+Modal, Daytona, and SSH backends, the command runs inside the sandbox.

 Usage:
    from tools.process_registry import process_registry
@@ -238,7 +238,7 @@ class ProcessRegistry:
        """
        Spawn a background process through a non-local environment backend.

-        For Docker/Singularity/Modal/SSH: runs the command inside the sandbox
+        For Docker/Singularity/Modal/Daytona/SSH: runs the command inside the sandbox
        using the environment's execute() interface. We wrap the command to
        capture the in-sandbox PID and redirect output to a log file inside
        the sandbox, then poll the log via subsequent execute() calls.