docs: add Daytona to batch_runner, process_registry, agent_loop, tool_context

Add daytona_image to batch_runner per-prompt container image overrides
so batch processing works with the Daytona backend. Update inline
comments in RL environment files (agent_loop, tool_context) and
process_registry docstrings to include Daytona in backend lists.
This commit is contained in:
teknium1
2026-03-06 03:49:59 -08:00
parent 3982fcf095
commit 3670089a42
4 changed files with 8 additions and 7 deletions

View File

@@ -250,7 +250,7 @@ def _process_single_prompt(
task_id = f"task_{prompt_index}"
# Per-prompt container image override: if the dataset row has an 'image' field,
# register it for this task's sandbox. Works with Docker, Modal, and Singularity.
# register it for this task's sandbox. Works with Docker, Modal, Singularity, and Daytona.
container_image = prompt_data.get("image") or prompt_data.get("docker_image")
if container_image:
# Verify the image is accessible before spending tokens on the agent loop.
@@ -292,6 +292,7 @@ def _process_single_prompt(
"docker_image": container_image,
"modal_image": container_image,
"singularity_image": f"docker://{container_image}",
"daytona_image": container_image,
}
if prompt_data.get("cwd"):
overrides["cwd"] = prompt_data["cwd"]

View File

@@ -23,7 +23,7 @@ from typing import Any, Dict, List, Optional, Set
from model_tools import handle_function_call
# Thread pool for running sync tool calls that internally use asyncio.run()
# (e.g., mini-swe-agent's modal/docker backends). Running them in a separate
# (e.g., mini-swe-agent's modal/docker/daytona backends). Running them in a separate
# thread gives them a clean event loop so they don't deadlock inside Atropos's loop.
# Size must be large enough for concurrent eval tasks (e.g., 89 TB2 tasks all
# making tool calls). Too small = thread pool starvation, tasks queue for minutes.
@@ -336,7 +336,7 @@ class HermesAgentLoop:
tool_elapsed = _time.monotonic() - tool_submit_time
else:
# Run tool calls in a thread pool so backends that
# use asyncio.run() internally (modal, docker) get
# use asyncio.run() internally (modal, docker, daytona) get
# a clean event loop instead of deadlocking.
loop = asyncio.get_event_loop()
# Capture current tool_name/args for the lambda

View File

@@ -44,7 +44,7 @@ _tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str) -> str:
"""
Run a tool call in a thread pool executor so backends that use asyncio.run()
internally (modal, docker) get a clean event loop.
internally (modal, docker, daytona) get a clean event loop.
If we're already in an async context, executes handle_function_call() in a
disposable worker thread and blocks for the result.
@@ -95,7 +95,7 @@ class ToolContext:
backend = os.getenv("TERMINAL_ENV", "local")
logger.debug("ToolContext.terminal [%s backend] task=%s: %s", backend, self.task_id[:8], command[:100])
# Run via thread helper so modal/docker backends' asyncio.run() doesn't deadlock
# Run via thread helper so modal/docker/daytona backends' asyncio.run() doesn't deadlock
result = _run_tool_in_thread(
"terminal",
{"command": command, "timeout": timeout},

View File

@@ -11,7 +11,7 @@ Tracks processes spawned via terminal(background=true), providing:
Background processes execute THROUGH the environment interface -- nothing
runs on the host machine unless TERMINAL_ENV=local. For Docker, Singularity,
Modal, and SSH backends, the command runs inside the sandbox.
Modal, Daytona, and SSH backends, the command runs inside the sandbox.
Usage:
from tools.process_registry import process_registry
@@ -238,7 +238,7 @@ class ProcessRegistry:
"""
Spawn a background process through a non-local environment backend.
For Docker/Singularity/Modal/SSH: runs the command inside the sandbox
For Docker/Singularity/Modal/Daytona/SSH: runs the command inside the sandbox
using the environment's execute() interface. We wrap the command to
capture the in-sandbox PID and redirect output to a log file inside
the sandbox, then poll the log via subsequent execute() calls.