fix(terminal): cap foreground timeout to prevent session deadlocks

When the model calls terminal() in foreground mode without background=true (e.g. to start a server), the tool call blocks until the command exits or the timeout expires. Without an upper bound the model can request arbitrarily high timeouts (the schema had minimum=1 but no maximum), blocking the entire agent session for hours until the gateway idle watchdog kills it. Changes: - Add FOREGROUND_MAX_TIMEOUT (600s, configurable via TERMINAL_MAX_FOREGROUND_TIMEOUT env var) that caps foreground timeout - Clamp effective_timeout to the cap when background=false and timeout exceeds the limit - Include a timeout_note in the tool result when clamped, nudging the model to use background=true for long-running processes - Update schema description to show the max timeout value - Remove dead clamping code in the background branch that could never fire (max_timeout was set to effective_timeout, so timeout > max_timeout was always false) - Add 7 tests covering clamping, no-clamping, config-default-exceeds-cap edge case, background bypass, default timeout, constant value, and schema content Self-review fixes: - Fixed bug where timeout_note said 'Requested timeout Nones' when clamping fired from config default exceeding cap (timeout param is None). Now uses unclamped_timeout instead of the raw timeout param. - Removed unused pytest import from test file - Extracted test config dict into _make_env_config() helper - Fixed tautological test_default_value assertion - Added missing test for config default > cap with no model timeout
2026-04-09 22:36:11 +05:30
parent 51d826f889
commit 6c3565df57
2 changed files with 206 additions and 11 deletions
--- a/tests/tools/test_terminal_foreground_timeout_cap.py
+++ b/tests/tools/test_terminal_foreground_timeout_cap.py
@@ -0,0 +1,177 @@
+"""Tests for foreground timeout clamping in terminal_tool.
+
+Ensures that foreground commands have a hard timeout cap to prevent
+a single tool call from blocking the entire agent session.
+"""
+import json
+import os
+from unittest.mock import patch, MagicMock
+
+
+# ---------------------------------------------------------------------------
+# Shared test config dict — mirrors _get_env_config() return shape.
+# ---------------------------------------------------------------------------
+def _make_env_config(**overrides):
+    """Return a minimal _get_env_config()-shaped dict with optional overrides."""
+    config = {
+        "env_type": "local",
+        "timeout": 180,
+        "cwd": "/tmp",
+        "host_cwd": None,
+        "modal_mode": "auto",
+        "docker_image": "",
+        "singularity_image": "",
+        "modal_image": "",
+        "daytona_image": "",
+    }
+    config.update(overrides)
+    return config
+
+
+class TestForegroundTimeoutCap:
+    """FOREGROUND_MAX_TIMEOUT prevents foreground commands from blocking too long."""
+
+    def test_foreground_timeout_clamped_to_max(self):
+        """When model requests timeout > FOREGROUND_MAX_TIMEOUT, it's clamped."""
+        from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT
+
+        with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
+             patch("tools.terminal_tool._start_cleanup_thread"):
+
+            mock_env = MagicMock()
+            mock_env.execute.return_value = {"output": "done", "returncode": 0}
+
+            with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \
+                 patch("tools.terminal_tool._last_activity", {"default": 0}), \
+                 patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
+                result = json.loads(terminal_tool(
+                    command="echo hello",
+                    timeout=9999,  # Way above max
+                ))
+
+            # Verify the timeout was clamped
+            call_kwargs = mock_env.execute.call_args
+            assert call_kwargs[1]["timeout"] == FOREGROUND_MAX_TIMEOUT
+            assert result.get("timeout_note") is not None
+            assert "clamped" in result["timeout_note"]
+            assert "9999" in result["timeout_note"]
+            assert "background=true" in result["timeout_note"]
+
+    def test_foreground_timeout_within_max_not_clamped(self):
+        """When model requests timeout <= FOREGROUND_MAX_TIMEOUT, no clamping."""
+        from tools.terminal_tool import terminal_tool
+
+        with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
+             patch("tools.terminal_tool._start_cleanup_thread"):
+
+            mock_env = MagicMock()
+            mock_env.execute.return_value = {"output": "done", "returncode": 0}
+
+            with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \
+                 patch("tools.terminal_tool._last_activity", {"default": 0}), \
+                 patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
+                result = json.loads(terminal_tool(
+                    command="echo hello",
+                    timeout=300,  # Within max
+                ))
+
+            call_kwargs = mock_env.execute.call_args
+            assert call_kwargs[1]["timeout"] == 300
+            assert "timeout_note" not in result
+
+    def test_config_default_exceeds_cap_no_model_timeout(self):
+        """When config default timeout > cap and model passes no timeout, clamping fires."""
+        from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT
+
+        # User configured TERMINAL_TIMEOUT=900 in their env
+        with patch("tools.terminal_tool._get_env_config",
+                    return_value=_make_env_config(timeout=900)), \
+             patch("tools.terminal_tool._start_cleanup_thread"):
+
+            mock_env = MagicMock()
+            mock_env.execute.return_value = {"output": "done", "returncode": 0}
+
+            with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \
+                 patch("tools.terminal_tool._last_activity", {"default": 0}), \
+                 patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
+                result = json.loads(terminal_tool(command="make build"))
+
+            # Should be clamped
+            call_kwargs = mock_env.execute.call_args
+            assert call_kwargs[1]["timeout"] == FOREGROUND_MAX_TIMEOUT
+            # Note should reference the original 900s, NOT "None"
+            note = result.get("timeout_note", "")
+            assert "900" in note, f"Expected '900' in timeout_note but got: {note!r}"
+            assert "None" not in note, f"timeout_note contains 'None': {note!r}"
+            assert "clamped" in note
+
+    def test_background_not_clamped(self):
+        """Background commands should NOT be subject to foreground timeout cap."""
+        from tools.terminal_tool import terminal_tool
+
+        with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
+             patch("tools.terminal_tool._start_cleanup_thread"):
+
+            mock_env = MagicMock()
+            mock_env.env = {}
+            mock_proc_session = MagicMock()
+            mock_proc_session.id = "test-123"
+            mock_proc_session.pid = 1234
+
+            mock_registry = MagicMock()
+            mock_registry.spawn_local.return_value = mock_proc_session
+
+            with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \
+                 patch("tools.terminal_tool._last_activity", {"default": 0}), \
+                 patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}), \
+                 patch("tools.process_registry.process_registry", mock_registry), \
+                 patch("tools.approval.get_current_session_key", return_value=""):
+                result = json.loads(terminal_tool(
+                    command="python server.py",
+                    background=True,
+                    timeout=9999,
+                ))
+
+            # Background should NOT be clamped
+            assert result.get("timeout_note") is None
+
+    def test_default_timeout_not_clamped(self):
+        """Default timeout (180s) should not trigger clamping."""
+        from tools.terminal_tool import terminal_tool, FOREGROUND_MAX_TIMEOUT
+
+        # 180 < 600, so no clamping
+        assert 180 < FOREGROUND_MAX_TIMEOUT
+
+        with patch("tools.terminal_tool._get_env_config", return_value=_make_env_config()), \
+             patch("tools.terminal_tool._start_cleanup_thread"):
+
+            mock_env = MagicMock()
+            mock_env.execute.return_value = {"output": "done", "returncode": 0}
+
+            with patch("tools.terminal_tool._active_environments", {"default": mock_env}), \
+                 patch("tools.terminal_tool._last_activity", {"default": 0}), \
+                 patch("tools.terminal_tool._check_all_guards", return_value={"approved": True}):
+                result = json.loads(terminal_tool(command="echo hello"))
+
+            call_kwargs = mock_env.execute.call_args
+            assert call_kwargs[1]["timeout"] == 180
+            assert "timeout_note" not in result
+
+
+class TestForegroundMaxTimeoutConstant:
+    """Verify the FOREGROUND_MAX_TIMEOUT constant and schema."""
+
+    def test_default_value_is_600(self):
+        """Default FOREGROUND_MAX_TIMEOUT is 600 when env var is not set."""
+        from tools.terminal_tool import FOREGROUND_MAX_TIMEOUT
+        # Module-level constant should be 600 in a clean test environment.
+        # If TERMINAL_MAX_FOREGROUND_TIMEOUT is set, it may differ — but the
+        # conftest _isolate_hermes_home fixture ensures a clean env for tests.
+        assert FOREGROUND_MAX_TIMEOUT == 600
+
+    def test_schema_mentions_max(self):
+        """Tool schema description should mention the max timeout."""
+        from tools.terminal_tool import TERMINAL_SCHEMA, FOREGROUND_MAX_TIMEOUT
+        timeout_desc = TERMINAL_SCHEMA["parameters"]["properties"]["timeout"]["description"]
+        assert str(FOREGROUND_MAX_TIMEOUT) in timeout_desc
+        assert "max" in timeout_desc.lower()
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -75,6 +75,9 @@ from tools.tool_backend_helpers import (
 )


+# Hard cap on foreground timeout; override via TERMINAL_MAX_FOREGROUND_TIMEOUT env var.
+FOREGROUND_MAX_TIMEOUT = int(os.getenv("TERMINAL_MAX_FOREGROUND_TIMEOUT", "600"))
+
 # Disk usage warning threshold (in GB)
 DISK_USAGE_WARNING_THRESHOLD_GB = float(os.getenv("TERMINAL_DISK_WARNING_GB", "500"))

@@ -1207,6 +1210,16 @@ def terminal_tool(
        cwd = overrides.get("cwd") or config["cwd"]
        default_timeout = config["timeout"]
        effective_timeout = timeout or default_timeout
+        unclamped_timeout = effective_timeout
+
+        # Clamp foreground commands to FOREGROUND_MAX_TIMEOUT to prevent
+        # a single tool call from blocking the entire agent session.
+        if not background and effective_timeout > FOREGROUND_MAX_TIMEOUT:
+            logger.info(
+                "Clamping foreground timeout from %ds to %ds (max: TERMINAL_MAX_FOREGROUND_TIMEOUT=%d)",
+                effective_timeout, FOREGROUND_MAX_TIMEOUT, FOREGROUND_MAX_TIMEOUT,
+            )
+            effective_timeout = FOREGROUND_MAX_TIMEOUT

        # Start cleanup thread
        _start_cleanup_thread()
@@ -1398,14 +1411,6 @@ def terminal_tool(
                if pty_disabled_reason:
                    result_data["pty_note"] = pty_disabled_reason

-                # Transparent timeout clamping note
-                max_timeout = effective_timeout
-                if timeout and timeout > max_timeout:
-                    result_data["timeout_note"] = (
-                        f"Requested timeout {timeout}s was clamped to "
-                        f"configured limit of {max_timeout}s"
-                    )
-
                # Mark for agent notification on completion
                if notify_on_complete and background:
                    proc_session.notify_on_complete = True
@@ -1480,11 +1485,18 @@ def terminal_tool(
                except Exception as e:
                    error_str = str(e).lower()
                    if "timeout" in error_str:
-                        return json.dumps({
+                        timeout_result = {
                            "output": "",
                            "exit_code": 124,
                            "error": f"Command timed out after {effective_timeout} seconds"
-                        }, ensure_ascii=False)
+                        }
+                        if unclamped_timeout != effective_timeout:
+                            timeout_result["timeout_note"] = (
+                                f"Timeout of {unclamped_timeout}s was clamped to "
+                                f"the foreground maximum of {FOREGROUND_MAX_TIMEOUT}s. "
+                                f"Use background=true for long-running processes."
+                            )
+                        return json.dumps(timeout_result, ensure_ascii=False)
                    
                    # Retry on transient errors
                    if retry_count < max_retries:
@@ -1547,6 +1559,12 @@ def terminal_tool(
                result_dict["approval"] = approval_note
            if exit_note:
                result_dict["exit_code_meaning"] = exit_note
+            if unclamped_timeout != effective_timeout:
+                result_dict["timeout_note"] = (
+                    f"Timeout of {unclamped_timeout}s was clamped to "
+                    f"the foreground maximum of {FOREGROUND_MAX_TIMEOUT}s. "
+                    f"Use background=true for long-running processes."
+                )

            return json.dumps(result_dict, ensure_ascii=False)

@@ -1733,7 +1751,7 @@ TERMINAL_SCHEMA = {
            },
            "timeout": {
                "type": "integer",
-                "description": "Max seconds to wait (default: 180). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily.",
+                "description": f"Max seconds to wait (default: 180, max: {FOREGROUND_MAX_TIMEOUT}). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily.",
                "minimum": 1
            },
            "workdir": {