diff --git a/AGENTS.md b/AGENTS.md index 3cde8bb4d..250b22076 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -44,7 +44,8 @@ hermes-agent/ │ │ ├── docker.py # Docker container execution │ │ ├── ssh.py # SSH remote execution │ │ ├── singularity.py # Singularity/Apptainer + SIF management -│ │ └── modal.py # Modal cloud execution +│ │ ├── modal.py # Modal cloud execution +│ │ └── daytona.py # Daytona cloud sandboxes │ ├── terminal_tool.py # Terminal orchestration (sudo, lifecycle, factory) │ ├── todo_tool.py # Planning & task management │ ├── process_registry.py # Background process management @@ -428,11 +429,13 @@ API keys are loaded from `~/.hermes/.env`: - `NOUS_API_KEY` - Vision and Mixture-of-Agents tools Terminal tool configuration (in `~/.hermes/config.yaml`): -- `terminal.backend` - Backend: local, docker, singularity, modal, or ssh +- `terminal.backend` - Backend: local, docker, singularity, modal, daytona, or ssh - `terminal.cwd` - Working directory ("." = host CWD for local only; for remote backends set an absolute path inside the target, or omit to use the backend's default) - `terminal.docker_image` - Image for Docker backend - `terminal.singularity_image` - Image for Singularity backend - `terminal.modal_image` - Image for Modal backend +- `terminal.daytona_image` - Image for Daytona backend +- `DAYTONA_API_KEY` - API key for Daytona backend (in .env) - SSH: `TERMINAL_SSH_HOST`, `TERMINAL_SSH_USER`, `TERMINAL_SSH_KEY` in .env Agent behavior (in `~/.hermes/.env`): @@ -496,7 +499,7 @@ terminal(command="pytest -v tests/", background=true) - `process(action="submit", session_id="proc_abc123", data="yes")` -- send + Enter **Key behaviors:** -- Background processes execute through the configured terminal backend (local/Docker/Modal/SSH/Singularity) -- never directly on the host unless `TERMINAL_ENV=local` +- Background processes execute through the configured terminal backend (local/Docker/Modal/Daytona/SSH/Singularity) -- never directly on the host unless `TERMINAL_ENV=local` - The `wait` action blocks the tool call until the process finishes, times out, or is interrupted by a new user message - PTY mode (`pty=true` on terminal) enables interactive CLI tools (Codex, Claude Code) - In RL training, background processes are auto-killed when the episode ends (`tool_context.cleanup()`) diff --git a/environments/README.md b/environments/README.md index 6eaf81ed4..862667bb6 100644 --- a/environments/README.md +++ b/environments/README.md @@ -40,7 +40,7 @@ This directory contains the integration layer between **hermes-agent's** tool-ca - `evaluate_log()` for saving eval results to JSON + samples.jsonl **HermesAgentBaseEnv** (`hermes_base_env.py`) extends BaseEnv with hermes-agent specifics: -- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, modal, ssh, singularity) +- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, modal, daytona, ssh, singularity) - Resolves hermes-agent toolsets via `_resolve_tools_for_group()` (calls `get_tool_definitions()` which queries `tools/registry.py`) - Implements `collect_trajectory()` which runs the full agent loop and computes rewards - Supports two-phase operation (Phase 1: OpenAI server, Phase 2: VLLM ManagedServer) @@ -324,7 +324,7 @@ For eval benchmarks, follow the pattern in `terminalbench2_env.py`: | `distribution` | Probabilistic toolset distribution name | `None` | | `max_agent_turns` | Max LLM calls per rollout | `30` | | `agent_temperature` | Sampling temperature | `1.0` | -| `terminal_backend` | `local`, `docker`, `modal`, `ssh`, `singularity` | `local` | +| `terminal_backend` | `local`, `docker`, `modal`, `daytona`, `ssh`, `singularity` | `local` | | `system_prompt` | System message for the agent | `None` | | `tool_call_parser` | Parser name for Phase 2 | `hermes` | | `eval_handling` | `STOP_TRAIN`, `LIMIT_TRAIN`, `NONE` | `STOP_TRAIN` | diff --git a/environments/hermes_base_env.py b/environments/hermes_base_env.py index 8fbfd50a5..9025edd21 100644 --- a/environments/hermes_base_env.py +++ b/environments/hermes_base_env.py @@ -114,8 +114,8 @@ class HermesAgentEnvConfig(BaseEnvConfig): # --- Terminal backend --- terminal_backend: str = Field( default="local", - description="Terminal backend: 'local', 'docker', 'modal', 'ssh', 'singularity'. " - "Modal recommended for production RL (cloud isolation per rollout).", + description="Terminal backend: 'local', 'docker', 'modal', 'daytona', 'ssh', 'singularity'. " + "Modal or Daytona recommended for production RL (cloud isolation per rollout).", ) terminal_timeout: int = Field( default=120, diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index a312a20fd..44e5e7e74 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -394,7 +394,7 @@ def _print_setup_summary(config: dict, hermes_home): def _prompt_container_resources(config: dict): - """Prompt for container resource settings (Docker, Singularity, Modal).""" + """Prompt for container resource settings (Docker, Singularity, Modal, Daytona).""" terminal = config.setdefault('terminal', {}) print() diff --git a/tools/__init__.py b/tools/__init__.py index 210ea35f9..04eabd023 100644 --- a/tools/__init__.py +++ b/tools/__init__.py @@ -6,7 +6,7 @@ This package contains all the specific tool implementations for the Hermes Agent Each module provides specialized functionality for different capabilities: - web_tools: Web search, content extraction, and crawling -- terminal_tool: Command execution using mini-swe-agent (local/docker/modal backends) +- terminal_tool: Command execution using mini-swe-agent (local/docker/modal/daytona backends) - vision_tools: Image analysis and understanding - mixture_of_agents_tool: Multi-model collaborative reasoning - image_generation_tool: Text-to-image generation with upscaling @@ -23,7 +23,7 @@ from .web_tools import ( check_firecrawl_api_key ) -# Primary terminal tool (mini-swe-agent backend: local/docker/singularity/modal) +# Primary terminal tool (mini-swe-agent backend: local/docker/singularity/modal/daytona) from .terminal_tool import ( terminal_tool, check_terminal_requirements, diff --git a/tools/file_operations.py b/tools/file_operations.py index 53ebe6346..182d35f5f 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -3,7 +3,7 @@ File Operations Module Provides file manipulation capabilities (read, write, patch, search) that work -across all terminal backends (local, docker, singularity, ssh, modal). +across all terminal backends (local, docker, singularity, ssh, modal, daytona). The key insight is that all file operations can be expressed as shell commands, so we wrap the terminal backend's execute() interface to provide a unified file API. @@ -294,7 +294,7 @@ class ShellFileOperations(FileOperations): File operations implemented via shell commands. Works with ANY terminal backend that has execute(command, cwd) method. - This includes local, docker, singularity, ssh, and modal environments. + This includes local, docker, singularity, ssh, modal, and daytona environments. """ def __init__(self, terminal_env, cwd: str = None): diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 022380fa7..c35baeeaf 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -44,6 +44,7 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `HONCHO_API_KEY` | Cross-session user modeling ([honcho.dev](https://honcho.dev/)) | | `TINKER_API_KEY` | RL training ([tinker-console.thinkingmachines.ai](https://tinker-console.thinkingmachines.ai/)) | | `WANDB_API_KEY` | RL training metrics ([wandb.ai](https://wandb.ai/)) | +| `DAYTONA_API_KEY` | Daytona cloud sandboxes ([daytona.io](https://daytona.io/)) | ## Terminal Backend @@ -54,6 +55,7 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `TERMINAL_DOCKER_VOLUMES` | Additional Docker volume mounts (comma-separated `host:container` pairs) | | `TERMINAL_SINGULARITY_IMAGE` | Singularity image or `.sif` path | | `TERMINAL_MODAL_IMAGE` | Modal container image | +| `TERMINAL_DAYTONA_IMAGE` | Daytona sandbox image | | `TERMINAL_TIMEOUT` | Command timeout in seconds | | `TERMINAL_LIFETIME_SECONDS` | Max lifetime for terminal sessions in seconds | | `TERMINAL_CWD` | Working directory for all terminal sessions | @@ -68,7 +70,7 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `TERMINAL_SSH_PORT` | SSH port (default: 22) | | `TERMINAL_SSH_KEY` | Path to private key | -## Container Resources (Docker, Singularity, Modal) +## Container Resources (Docker, Singularity, Modal, Daytona) | Variable | Description | |----------|-------------| diff --git a/website/docs/user-guide/features/tools.md b/website/docs/user-guide/features/tools.md index 1f1036fb5..daf982fea 100644 --- a/website/docs/user-guide/features/tools.md +++ b/website/docs/user-guide/features/tools.md @@ -13,7 +13,7 @@ Tools are functions that extend the agent's capabilities. They're organized into | Category | Tools | Description | |----------|-------|-------------| | **Web** | `web_search`, `web_extract` | Search the web, extract page content | -| **Terminal** | `terminal`, `process` | Execute commands (local/docker/singularity/modal/ssh backends), manage background processes | +| **Terminal** | `terminal`, `process` | Execute commands (local/docker/singularity/modal/daytona/ssh backends), manage background processes | | **File** | `read_file`, `write_file`, `patch`, `search_files` | Read, write, edit, and search files | | **Browser** | `browser_navigate`, `browser_click`, `browser_type`, etc. | Full browser automation via Browserbase | | **Vision** | `vision_analyze` | Image analysis via multimodal models | @@ -115,7 +115,7 @@ Configure CPU, memory, disk, and persistence for all container backends: ```yaml terminal: - backend: docker # or singularity, modal + backend: docker # or singularity, modal, daytona container_cpu: 1 # CPU cores (default: 1) container_memory: 5120 # Memory in MB (default: 5GB) container_disk: 51200 # Disk in MB (default: 50GB) diff --git a/website/docs/user-guide/security.md b/website/docs/user-guide/security.md index 03e02dd4e..9fcf527fd 100644 --- a/website/docs/user-guide/security.md +++ b/website/docs/user-guide/security.md @@ -45,7 +45,7 @@ The following patterns trigger approval prompts (defined in `tools/approval.py`) | Fork bomb patterns | Fork bombs | :::info -**Container bypass**: When running in `docker`, `singularity`, or `modal` backends, dangerous command checks are **skipped** because the container itself is the security boundary. Destructive commands inside a container can't harm the host. +**Container bypass**: When running in `docker`, `singularity`, `modal`, or `daytona` backends, dangerous command checks are **skipped** because the container itself is the security boundary. Destructive commands inside a container can't harm the host. ::: ### Approval Flow (CLI) @@ -224,7 +224,7 @@ terminal: - **Ephemeral mode** (`container_persistent: false`): Uses tmpfs for workspace — everything is lost on cleanup :::tip -For production gateway deployments, use `docker` or `modal` backend to isolate agent commands from your host system. This eliminates the need for dangerous command approval entirely. +For production gateway deployments, use `docker`, `modal`, or `daytona` backend to isolate agent commands from your host system. This eliminates the need for dangerous command approval entirely. ::: ## Terminal Backend Security Comparison @@ -236,6 +236,7 @@ For production gateway deployments, use `docker` or `modal` backend to isolate a | **docker** | Container | ❌ Skipped (container is boundary) | Production gateway | | **singularity** | Container | ❌ Skipped | HPC environments | | **modal** | Cloud sandbox | ❌ Skipped | Scalable cloud isolation | +| **daytona** | Cloud sandbox | ❌ Skipped | Persistent cloud workspaces | ## MCP Credential Handling