diff --git a/tests/tools/test_daytona_environment.py b/tests/tools/test_daytona_environment.py index 04e634795..7f5aa17ec 100644 --- a/tests/tools/test_daytona_environment.py +++ b/tests/tools/test_daytona_environment.py @@ -59,8 +59,8 @@ def daytona_sdk(monkeypatch): @pytest.fixture() def make_env(daytona_sdk, monkeypatch): """Factory that creates a DaytonaEnvironment with a mocked SDK.""" - # Prevent is_interrupted from interfering - monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False) + # Prevent is_interrupted from interfering — patch where it's used (base.py) + monkeypatch.setattr("tools.environments.base.is_interrupted", lambda: False) # Prevent skills/credential sync from consuming mock exec calls monkeypatch.setattr("tools.credential_files.get_credential_file_mounts", lambda: []) monkeypatch.setattr("tools.credential_files.get_skills_directory_mount", lambda **kw: None) @@ -221,41 +221,45 @@ class TestCleanup: class TestExecute: def test_basic_command(self, make_env): sb = _make_sandbox() - # First call: $HOME detection; subsequent calls: actual commands + # Calls: (1) $HOME detection, (2) init_session bootstrap, (3) actual command sb.process.exec.side_effect = [ _make_exec_response(result="/root"), # $HOME + _make_exec_response(result="", exit_code=0), # init_session _make_exec_response(result="hello", exit_code=0), # actual cmd ] sb.state = "started" env = make_env(sandbox=sb) result = env.execute("echo hello") - assert result["output"] == "hello" + assert "hello" in result["output"] assert result["returncode"] == 0 - def test_command_wrapped_with_shell_timeout(self, make_env): + def test_sdk_timeout_passed_to_exec(self, make_env): + """SDK native timeout is passed to sandbox.process.exec().""" sb = _make_sandbox() sb.process.exec.side_effect = [ _make_exec_response(result="/root"), + _make_exec_response(result="", exit_code=0), # init_session _make_exec_response(result="ok", exit_code=0), ] sb.state = "started" env = make_env(sandbox=sb, timeout=42) env.execute("echo hello") - # The command sent to exec should be wrapped with `timeout N sh -c '...'` + # The exec call should receive timeout= kwarg (SDK native timeout) call_args = sb.process.exec.call_args_list[-1] + assert call_args[1]["timeout"] == 42 + # The command should NOT have a shell `timeout` prefix cmd = call_args[0][0] - assert cmd.startswith("timeout 42 sh -c ") - # SDK timeout param should NOT be passed - assert "timeout" not in call_args[1] + assert not cmd.startswith("timeout ") def test_timeout_returns_exit_code_124(self, make_env): - """Shell timeout utility returns exit code 124.""" + """SDK-level timeout surfaces as exit code 124 via _wait_for_process.""" sb = _make_sandbox() sb.process.exec.side_effect = [ _make_exec_response(result="/root"), - _make_exec_response(result="", exit_code=124), + _make_exec_response(result="", exit_code=0), # init_session + _make_exec_response(result="", exit_code=124), # actual cmd ] sb.state = "started" env = make_env(sandbox=sb) @@ -267,6 +271,7 @@ class TestExecute: sb = _make_sandbox() sb.process.exec.side_effect = [ _make_exec_response(result="/root"), + _make_exec_response(result="", exit_code=0), # init_session _make_exec_response(result="not found", exit_code=127), ] sb.state = "started" @@ -279,6 +284,7 @@ class TestExecute: sb = _make_sandbox() sb.process.exec.side_effect = [ _make_exec_response(result="/root"), + _make_exec_response(result="", exit_code=0), # init_session _make_exec_response(result="ok", exit_code=0), ] sb.state = "started" @@ -286,39 +292,47 @@ class TestExecute: env.execute("python3", stdin_data="print('hi')") # Check that the command passed to exec contains heredoc markers - # (single quotes get shell-escaped by shlex.quote, so check components) + # Base class uses HERMES_STDIN_ prefix for heredoc delimiters call_args = sb.process.exec.call_args_list[-1] cmd = call_args[0][0] - assert "HERMES_EOF_" in cmd + assert "HERMES_STDIN_" in cmd assert "print" in cmd assert "hi" in cmd - def test_custom_cwd_passed_through(self, make_env): + def test_custom_cwd_in_command_wrapper(self, make_env): + """CWD is handled by _wrap_command() in the command string, not as a kwarg.""" sb = _make_sandbox() sb.process.exec.side_effect = [ _make_exec_response(result="/root"), + _make_exec_response(result="", exit_code=0), # init_session _make_exec_response(result="/tmp", exit_code=0), ] sb.state = "started" env = make_env(sandbox=sb) env.execute("pwd", cwd="/tmp") - call_kwargs = sb.process.exec.call_args_list[-1][1] - assert call_kwargs["cwd"] == "/tmp" + # CWD should be embedded in the command string via _wrap_command + call_args = sb.process.exec.call_args_list[-1] + cmd = call_args[0][0] + assert "cd /tmp" in cmd + # CWD should NOT be passed as a kwarg to exec + assert "cwd" not in call_args[1] def test_daytona_error_triggers_retry(self, make_env, daytona_sdk): sb = _make_sandbox() sb.state = "started" sb.process.exec.side_effect = [ _make_exec_response(result="/root"), # $HOME + _make_exec_response(result="", exit_code=0), # init_session daytona_sdk.DaytonaError("transient"), # first attempt fails _make_exec_response(result="ok", exit_code=0), # retry succeeds ] env = make_env(sandbox=sb) result = env.execute("echo retry") - assert result["output"] == "ok" - assert result["returncode"] == 0 + # DaytonaError now surfaces directly through _ThreadedProcessHandle + # (no retry logic) — the error becomes returncode=1 + assert result["returncode"] == 1 # --------------------------------------------------------------------------- @@ -359,14 +373,18 @@ class TestInterrupt: calls["n"] += 1 if calls["n"] == 1: return _make_exec_response(result="/root") # $HOME detection + if calls["n"] == 2: + return _make_exec_response(result="", exit_code=0) # init_session event.wait(timeout=5) # simulate long-running command return _make_exec_response(result="done", exit_code=0) sb.process.exec.side_effect = exec_side_effect env = make_env(sandbox=sb) + # is_interrupted is checked by base.py's _wait_for_process, + # patch where it's actually referenced (base.py's local binding) monkeypatch.setattr( - "tools.environments.daytona.is_interrupted", lambda: True + "tools.environments.base.is_interrupted", lambda: True ) try: result = env.execute("sleep 10") @@ -377,23 +395,24 @@ class TestInterrupt: # --------------------------------------------------------------------------- -# Retry exhaustion +# DaytonaError surfaces directly (no retry) # --------------------------------------------------------------------------- class TestRetryExhausted: def test_both_attempts_fail(self, make_env, daytona_sdk): + """DaytonaError surfaces directly as rc=1 (retry logic was removed).""" sb = _make_sandbox() sb.state = "started" sb.process.exec.side_effect = [ _make_exec_response(result="/root"), # $HOME - daytona_sdk.DaytonaError("fail1"), # first attempt - daytona_sdk.DaytonaError("fail2"), # retry + _make_exec_response(result="", exit_code=0), # init_session + daytona_sdk.DaytonaError("fail1"), # actual command fails ] env = make_env(sandbox=sb) result = env.execute("echo x") + # Error surfaces directly through _ThreadedProcessHandle (rc=1) assert result["returncode"] == 1 - assert "Daytona execution error" in result["output"] # --------------------------------------------------------------------------- diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py index ce98217cf..498ef9d50 100644 --- a/tests/tools/test_docker_environment.py +++ b/tests/tools/test_docker_environment.py @@ -245,43 +245,42 @@ def _make_execute_only_env(forward_env=None): env._timeout_result = lambda timeout: {"output": f"timed out after {timeout}", "returncode": 124} env._container_id = "test-container" env._docker_exe = "/usr/bin/docker" + # Base class attributes needed by unified execute() + env._session_id = "test123" + env._snapshot_path = "/tmp/hermes-snap-test123.sh" + env._cwd_file = "/tmp/hermes-cwd-test123.txt" + env._cwd_marker = "__HERMES_CWD_test123__" + env._snapshot_ready = True + env._last_sync_time = None + env._init_env_args = [] return env -def test_execute_uses_hermes_dotenv_for_allowlisted_env(monkeypatch): +def test_init_env_args_uses_hermes_dotenv_for_allowlisted_env(monkeypatch): + """_build_init_env_args picks up forwarded env vars from .env file at init time.""" env = _make_execute_only_env(["GITHUB_TOKEN"]) - popen_calls = [] - - def _fake_popen(cmd, **kwargs): - popen_calls.append(cmd) - return _FakePopen(cmd, **kwargs) monkeypatch.delenv("GITHUB_TOKEN", raising=False) monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"GITHUB_TOKEN": "value_from_dotenv"}) - monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen) - result = env.execute("echo hi") + args = env._build_init_env_args() + args_str = " ".join(args) - assert result["returncode"] == 0 - assert "GITHUB_TOKEN=value_from_dotenv" in popen_calls[0] + assert "GITHUB_TOKEN=value_from_dotenv" in args_str -def test_execute_prefers_shell_env_over_hermes_dotenv(monkeypatch): +def test_init_env_args_prefers_shell_env_over_hermes_dotenv(monkeypatch): + """Shell env vars take priority over .env file values in init env args.""" env = _make_execute_only_env(["GITHUB_TOKEN"]) - popen_calls = [] - - def _fake_popen(cmd, **kwargs): - popen_calls.append(cmd) - return _FakePopen(cmd, **kwargs) monkeypatch.setenv("GITHUB_TOKEN", "value_from_shell") monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"GITHUB_TOKEN": "value_from_dotenv"}) - monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen) - env.execute("echo hi") + args = env._build_init_env_args() + args_str = " ".join(args) - assert "GITHUB_TOKEN=value_from_shell" in popen_calls[0] - assert "GITHUB_TOKEN=value_from_dotenv" not in popen_calls[0] + assert "GITHUB_TOKEN=value_from_shell" in args_str + assert "value_from_dotenv" not in args_str # ── docker_env tests ────────────────────────────────────────────── @@ -302,64 +301,46 @@ def test_docker_env_appears_in_run_command(monkeypatch): assert "GNUPGHOME=/root/.gnupg" in run_args_str -def test_docker_env_appears_in_exec_command(monkeypatch): - """Explicit docker_env values should also be passed via -e at docker exec time.""" +def test_docker_env_appears_in_init_env_args(monkeypatch): + """Explicit docker_env values should appear in _build_init_env_args.""" env = _make_execute_only_env() env._env = {"MY_VAR": "my_value"} - popen_calls = [] - def _fake_popen(cmd, **kwargs): - popen_calls.append(cmd) - return _FakePopen(cmd, **kwargs) + args = env._build_init_env_args() + args_str = " ".join(args) - monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen) - - env.execute("echo hi") - - assert popen_calls, "Popen should have been called" - assert "MY_VAR=my_value" in popen_calls[0] + assert "MY_VAR=my_value" in args_str -def test_forward_env_overrides_docker_env(monkeypatch): +def test_forward_env_overrides_docker_env_in_init_args(monkeypatch): """docker_forward_env should override docker_env for the same key.""" env = _make_execute_only_env(forward_env=["MY_KEY"]) env._env = {"MY_KEY": "static_value"} - popen_calls = [] - - def _fake_popen(cmd, **kwargs): - popen_calls.append(cmd) - return _FakePopen(cmd, **kwargs) monkeypatch.setenv("MY_KEY", "dynamic_value") monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {}) - monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen) - env.execute("echo hi") + args = env._build_init_env_args() + args_str = " ".join(args) - cmd_str = " ".join(popen_calls[0]) - assert "MY_KEY=dynamic_value" in cmd_str - assert "MY_KEY=static_value" not in cmd_str + assert "MY_KEY=dynamic_value" in args_str + assert "MY_KEY=static_value" not in args_str -def test_docker_env_and_forward_env_merge(monkeypatch): +def test_docker_env_and_forward_env_merge_in_init_args(monkeypatch): """docker_env and docker_forward_env with different keys should both appear.""" env = _make_execute_only_env(forward_env=["TOKEN"]) env._env = {"SSH_AUTH_SOCK": "/run/user/1000/agent.sock"} - popen_calls = [] - - def _fake_popen(cmd, **kwargs): - popen_calls.append(cmd) - return _FakePopen(cmd, **kwargs) monkeypatch.setenv("TOKEN", "secret123") monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {}) - monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen) - env.execute("echo hi") + args = env._build_init_env_args() + args_str = " ".join(args) + + assert "SSH_AUTH_SOCK=/run/user/1000/agent.sock" in args_str + assert "TOKEN=secret123" in args_str - cmd_str = " ".join(popen_calls[0]) - assert "SSH_AUTH_SOCK=/run/user/1000/agent.sock" in cmd_str - assert "TOKEN=secret123" in cmd_str def test_normalize_env_dict_filters_invalid_keys(): diff --git a/tests/tools/test_modal_sandbox_fixes.py b/tests/tools/test_modal_sandbox_fixes.py index e1baf13d9..570ef5b21 100644 --- a/tests/tools/test_modal_sandbox_fixes.py +++ b/tests/tools/test_modal_sandbox_fixes.py @@ -231,20 +231,20 @@ class TestEnsurepipFix: """Verify the pip fix is applied in the ModalEnvironment init.""" def test_modal_environment_creates_image_with_setup_commands(self): - """ModalEnvironment.__init__ should create a modal.Image with pip fix.""" + """_resolve_modal_image should create a modal.Image with pip fix.""" try: - from tools.environments.modal import ModalEnvironment + from tools.environments.modal import _resolve_modal_image except ImportError: pytest.skip("tools.environments.modal not importable") import inspect - source = inspect.getsource(ModalEnvironment.__init__) + source = inspect.getsource(_resolve_modal_image) assert "ensurepip" in source, ( - "ModalEnvironment should include ensurepip fix " + "_resolve_modal_image should include ensurepip fix " "for Modal's legacy image builder" ) assert "setup_dockerfile_commands" in source, ( - "ModalEnvironment should use setup_dockerfile_commands " + "_resolve_modal_image should use setup_dockerfile_commands " "to fix pip before Modal's bootstrap" ) diff --git a/tests/tools/test_modal_snapshot_isolation.py b/tests/tools/test_modal_snapshot_isolation.py index a3d0eeacd..b58454cc0 100644 --- a/tests/tools/test_modal_snapshot_isolation.py +++ b/tests/tools/test_modal_snapshot_isolation.py @@ -85,11 +85,47 @@ def _install_modal_test_modules( def _prepare_command(self, command: str): return command, None - sys.modules["tools.environments.base"] = types.SimpleNamespace(BaseEnvironment=_DummyBaseEnvironment) + def init_session(self): + pass + + # Stub _ThreadedProcessHandle: modal.py imports it but only uses it at + # runtime inside _run_bash; the snapshot-isolation tests never call _run_bash, + # so a class placeholder is sufficient. + class _DummyThreadedProcessHandle: + def __init__(self, exec_fn, cancel_fn=None): + pass + + def _load_json_store(path): + if path.exists(): + try: + return json.loads(path.read_text()) + except Exception: + pass + return {} + + def _save_json_store(path, data): + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(data, indent=2)) + + def _file_mtime_key(host_path): + try: + st = Path(host_path).stat() + return (st.st_mtime, st.st_size) + except OSError: + return None + + sys.modules["tools.environments.base"] = types.SimpleNamespace( + BaseEnvironment=_DummyBaseEnvironment, + _ThreadedProcessHandle=_DummyThreadedProcessHandle, + _load_json_store=_load_json_store, + _save_json_store=_save_json_store, + _file_mtime_key=_file_mtime_key, + ) sys.modules["tools.interrupt"] = types.SimpleNamespace(is_interrupted=lambda: False) sys.modules["tools.credential_files"] = types.SimpleNamespace( get_credential_file_mounts=lambda: [], iter_skills_files=lambda: [], + iter_cache_files=lambda: [], ) from_id_calls: list[str] = [] diff --git a/tests/tools/test_ssh_environment.py b/tests/tools/test_ssh_environment.py index 9f514e9a9..f6ee96717 100644 --- a/tests/tools/test_ssh_environment.py +++ b/tests/tools/test_ssh_environment.py @@ -43,7 +43,7 @@ class TestBuildSSHCommand: lambda *a, **k: MagicMock(stdout=iter([]), stderr=iter([]), stdin=MagicMock())) - monkeypatch.setattr("tools.environments.ssh.time.sleep", lambda _: None) + monkeypatch.setattr("tools.environments.base.time.sleep", lambda _: None) def test_base_flags(self): env = SSHEnvironment(host="h", user="u")