Add terminal configuration options and enhance environment setup

- Introduced terminal_timeout and terminal_lifetime parameters to control command execution and sandbox inactivity. - Updated environment variable handling to allow configuration overrides for terminal settings. - Enhanced logging to provide detailed information about terminal settings during initialization. - Added tool_pool_size parameter to dynamically resize the thread pool for tool execution, improving concurrency management.
2026-02-10 22:51:50 +00:00
parent 5ec75e38b9
commit 6b4a8d0b17
1 changed files with 35 additions and 2 deletions
--- a/environments/hermes_base_env.py
+++ b/environments/hermes_base_env.py
@@ -117,6 +117,18 @@ class HermesAgentEnvConfig(BaseEnvConfig):
        description="Terminal backend: 'local', 'docker', 'modal', 'ssh', 'singularity'. "
        "Modal recommended for production RL (cloud isolation per rollout).",
    )
+    terminal_timeout: int = Field(
+        default=120,
+        description="Per-command timeout in seconds for terminal tool calls. "
+        "Commands exceeding this are killed. Increase for tasks with long-running "
+        "commands (compilation, pip install, etc.).",
+    )
+    terminal_lifetime: int = Field(
+        default=3600,
+        description="Sandbox inactivity lifetime in seconds. The cleanup thread kills "
+        "sandboxes that have been idle longer than this. Must be longer than "
+        "the longest gap between tool calls (e.g., waiting for LLM response).",
+    )

    # --- Dataset ---
    dataset_name: Optional[str] = Field(
@@ -132,6 +144,14 @@ class HermesAgentEnvConfig(BaseEnvConfig):
        description="Which field in the dataset contains the prompt.",
    )

+    # --- Thread pool ---
+    tool_pool_size: int = Field(
+        default=128,
+        description="Thread pool size for tool execution. Each concurrent task needs a "
+        "thread for tool calls. Must be large enough for parallel evaluation. "
+        "Too small = thread pool starvation.",
+    )
+
    # --- Phase 2: Tool call parsing ---
    tool_call_parser: str = Field(
        default="hermes",
@@ -175,10 +195,23 @@ class HermesAgentBaseEnv(BaseEnv):
    ):
        super().__init__(config, server_configs, slurm, testing)

-        # Set terminal backend environment variable so hermes tools pick it up
+        # Set terminal environment variables so hermes tools pick them up.
+        # These can all be overridden per-environment via config fields instead
+        # of requiring users to set shell env vars.
        if config.terminal_backend:
            os.environ["TERMINAL_ENV"] = config.terminal_backend
-            print(f"🖥️  Terminal backend: {config.terminal_backend}")
+        os.environ["TERMINAL_TIMEOUT"] = str(config.terminal_timeout)
+        os.environ["TERMINAL_LIFETIME_SECONDS"] = str(config.terminal_lifetime)
+        print(
+            f"🖥️  Terminal: backend={config.terminal_backend}, "
+            f"timeout={config.terminal_timeout}s, lifetime={config.terminal_lifetime}s"
+        )
+
+        # Resize the agent loop's thread pool for tool execution.
+        # This must be large enough for the number of concurrent tasks
+        # (e.g., 89 parallel TB2 eval tasks each need a thread for tool calls).
+        from environments.agent_loop import resize_tool_pool
+        resize_tool_pool(config.tool_pool_size)

        # Current group's resolved tools (set in collect_trajectories)
        self._current_group_tools: Optional[Tuple[List[Dict], Set[str]]] = None