diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a54be8b17..1e45193b8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -19,6 +19,9 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Install system dependencies + run: sudo apt-get update && sudo apt-get install -y ripgrep + - name: Install uv uses: astral-sh/setup-uv@v5 diff --git a/tests/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py similarity index 100% rename from tests/test_anthropic_adapter.py rename to tests/agent/test_anthropic_adapter.py diff --git a/tests/test_auxiliary_config_bridge.py b/tests/agent/test_auxiliary_config_bridge.py similarity index 98% rename from tests/test_auxiliary_config_bridge.py rename to tests/agent/test_auxiliary_config_bridge.py index 0151daf2a..91dea15af 100644 --- a/tests/test_auxiliary_config_bridge.py +++ b/tests/agent/test_auxiliary_config_bridge.py @@ -13,7 +13,7 @@ from unittest.mock import patch, MagicMock import pytest import yaml -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) def _run_auxiliary_bridge(config_dict, monkeypatch): @@ -199,7 +199,7 @@ class TestGatewayBridgeCodeParity: def test_gateway_has_auxiliary_bridge(self): """The gateway config bridge must include auxiliary.* bridging.""" - gateway_path = Path(__file__).parent.parent / "gateway" / "run.py" + gateway_path = Path(__file__).parent.parent.parent / "gateway" / "run.py" content = gateway_path.read_text() # Check for key patterns that indicate the bridge is present assert "AUXILIARY_VISION_PROVIDER" in content @@ -213,7 +213,7 @@ class TestGatewayBridgeCodeParity: def test_gateway_no_compression_env_bridge(self): """Gateway should NOT bridge compression config to env vars (config-only).""" - gateway_path = Path(__file__).parent.parent / "gateway" / "run.py" + gateway_path = Path(__file__).parent.parent.parent / "gateway" / "run.py" content = gateway_path.read_text() assert "CONTEXT_COMPRESSION_PROVIDER" not in content assert "CONTEXT_COMPRESSION_MODEL" not in content diff --git a/tests/test_context_references.py b/tests/agent/test_context_references.py similarity index 100% rename from tests/test_context_references.py rename to tests/agent/test_context_references.py diff --git a/tests/test_credential_pool.py b/tests/agent/test_credential_pool.py similarity index 100% rename from tests/test_credential_pool.py rename to tests/agent/test_credential_pool.py diff --git a/tests/test_credential_pool_routing.py b/tests/agent/test_credential_pool_routing.py similarity index 100% rename from tests/test_credential_pool_routing.py rename to tests/agent/test_credential_pool_routing.py diff --git a/tests/test_crossloop_client_cache.py b/tests/agent/test_crossloop_client_cache.py similarity index 100% rename from tests/test_crossloop_client_cache.py rename to tests/agent/test_crossloop_client_cache.py diff --git a/tests/test_display.py b/tests/agent/test_display.py similarity index 100% rename from tests/test_display.py rename to tests/agent/test_display.py diff --git a/tests/test_insights.py b/tests/agent/test_insights.py similarity index 100% rename from tests/test_insights.py rename to tests/agent/test_insights.py diff --git a/tests/test_model_metadata_local_ctx.py b/tests/agent/test_model_metadata_local_ctx.py similarity index 100% rename from tests/test_model_metadata_local_ctx.py rename to tests/agent/test_model_metadata_local_ctx.py diff --git a/tests/cli/__init__.py b/tests/cli/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_branch_command.py b/tests/cli/test_branch_command.py similarity index 100% rename from tests/test_branch_command.py rename to tests/cli/test_branch_command.py diff --git a/tests/test_cli_approval_ui.py b/tests/cli/test_cli_approval_ui.py similarity index 100% rename from tests/test_cli_approval_ui.py rename to tests/cli/test_cli_approval_ui.py diff --git a/tests/test_cli_background_tui_refresh.py b/tests/cli/test_cli_background_tui_refresh.py similarity index 100% rename from tests/test_cli_background_tui_refresh.py rename to tests/cli/test_cli_background_tui_refresh.py diff --git a/tests/test_cli_browser_connect.py b/tests/cli/test_cli_browser_connect.py similarity index 100% rename from tests/test_cli_browser_connect.py rename to tests/cli/test_cli_browser_connect.py diff --git a/tests/test_cli_context_warning.py b/tests/cli/test_cli_context_warning.py similarity index 100% rename from tests/test_cli_context_warning.py rename to tests/cli/test_cli_context_warning.py diff --git a/tests/test_cli_extension_hooks.py b/tests/cli/test_cli_extension_hooks.py similarity index 100% rename from tests/test_cli_extension_hooks.py rename to tests/cli/test_cli_extension_hooks.py diff --git a/tests/test_cli_file_drop.py b/tests/cli/test_cli_file_drop.py similarity index 100% rename from tests/test_cli_file_drop.py rename to tests/cli/test_cli_file_drop.py diff --git a/tests/test_cli_init.py b/tests/cli/test_cli_init.py similarity index 100% rename from tests/test_cli_init.py rename to tests/cli/test_cli_init.py diff --git a/tests/test_cli_interrupt_subagent.py b/tests/cli/test_cli_interrupt_subagent.py similarity index 100% rename from tests/test_cli_interrupt_subagent.py rename to tests/cli/test_cli_interrupt_subagent.py diff --git a/tests/test_cli_loading_indicator.py b/tests/cli/test_cli_loading_indicator.py similarity index 100% rename from tests/test_cli_loading_indicator.py rename to tests/cli/test_cli_loading_indicator.py diff --git a/tests/test_cli_mcp_config_watch.py b/tests/cli/test_cli_mcp_config_watch.py similarity index 100% rename from tests/test_cli_mcp_config_watch.py rename to tests/cli/test_cli_mcp_config_watch.py diff --git a/tests/test_cli_new_session.py b/tests/cli/test_cli_new_session.py similarity index 100% rename from tests/test_cli_new_session.py rename to tests/cli/test_cli_new_session.py diff --git a/tests/test_cli_plan_command.py b/tests/cli/test_cli_plan_command.py similarity index 100% rename from tests/test_cli_plan_command.py rename to tests/cli/test_cli_plan_command.py diff --git a/tests/test_cli_prefix_matching.py b/tests/cli/test_cli_prefix_matching.py similarity index 100% rename from tests/test_cli_prefix_matching.py rename to tests/cli/test_cli_prefix_matching.py diff --git a/tests/test_cli_preloaded_skills.py b/tests/cli/test_cli_preloaded_skills.py similarity index 100% rename from tests/test_cli_preloaded_skills.py rename to tests/cli/test_cli_preloaded_skills.py diff --git a/tests/test_cli_provider_resolution.py b/tests/cli/test_cli_provider_resolution.py similarity index 99% rename from tests/test_cli_provider_resolution.py rename to tests/cli/test_cli_provider_resolution.py index bd78a98ea..353b3234e 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/cli/test_cli_provider_resolution.py @@ -330,7 +330,7 @@ def test_model_flow_nous_prints_subscription_guidance_without_mutating_explicit_ "hermes_cli.auth.fetch_nous_models", lambda *args, **kwargs: ["claude-opus-4-6"], ) - monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None: "claude-opus-4-6") + monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None, **kw: "claude-opus-4-6") monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: None) monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda provider, url: None) monkeypatch.setattr( @@ -368,7 +368,7 @@ def test_model_flow_nous_applies_managed_tts_default_when_unconfigured(monkeypat "hermes_cli.auth.fetch_nous_models", lambda *args, **kwargs: ["claude-opus-4-6"], ) - monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None: "claude-opus-4-6") + monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None, **kw: "claude-opus-4-6") monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: None) monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda provider, url: None) monkeypatch.setattr( diff --git a/tests/test_cli_retry.py b/tests/cli/test_cli_retry.py similarity index 96% rename from tests/test_cli_retry.py rename to tests/cli/test_cli_retry.py index 74e2512bf..b287b4575 100644 --- a/tests/test_cli_retry.py +++ b/tests/cli/test_cli_retry.py @@ -1,6 +1,6 @@ """Regression tests for CLI /retry history replacement semantics.""" -from tests.test_cli_init import _make_cli +from tests.cli.test_cli_init import _make_cli def test_retry_last_truncates_history_before_requeueing_message(): diff --git a/tests/test_cli_save_config_value.py b/tests/cli/test_cli_save_config_value.py similarity index 100% rename from tests/test_cli_save_config_value.py rename to tests/cli/test_cli_save_config_value.py diff --git a/tests/test_cli_secret_capture.py b/tests/cli/test_cli_secret_capture.py similarity index 100% rename from tests/test_cli_secret_capture.py rename to tests/cli/test_cli_secret_capture.py diff --git a/tests/test_cli_skin_integration.py b/tests/cli/test_cli_skin_integration.py similarity index 100% rename from tests/test_cli_skin_integration.py rename to tests/cli/test_cli_skin_integration.py diff --git a/tests/test_cli_status_bar.py b/tests/cli/test_cli_status_bar.py similarity index 100% rename from tests/test_cli_status_bar.py rename to tests/cli/test_cli_status_bar.py diff --git a/tests/test_cli_tools_command.py b/tests/cli/test_cli_tools_command.py similarity index 100% rename from tests/test_cli_tools_command.py rename to tests/cli/test_cli_tools_command.py diff --git a/tests/test_personality_none.py b/tests/cli/test_personality_none.py similarity index 100% rename from tests/test_personality_none.py rename to tests/cli/test_personality_none.py diff --git a/tests/test_quick_commands.py b/tests/cli/test_quick_commands.py similarity index 100% rename from tests/test_quick_commands.py rename to tests/cli/test_quick_commands.py diff --git a/tests/test_reasoning_command.py b/tests/cli/test_reasoning_command.py similarity index 100% rename from tests/test_reasoning_command.py rename to tests/cli/test_reasoning_command.py diff --git a/tests/test_resume_display.py b/tests/cli/test_resume_display.py similarity index 100% rename from tests/test_resume_display.py rename to tests/cli/test_resume_display.py diff --git a/tests/test_surrogate_sanitization.py b/tests/cli/test_surrogate_sanitization.py similarity index 100% rename from tests/test_surrogate_sanitization.py rename to tests/cli/test_surrogate_sanitization.py diff --git a/tests/test_worktree.py b/tests/cli/test_worktree.py similarity index 100% rename from tests/test_worktree.py rename to tests/cli/test_worktree.py diff --git a/tests/test_worktree_security.py b/tests/cli/test_worktree_security.py similarity index 100% rename from tests/test_worktree_security.py rename to tests/cli/test_worktree_security.py diff --git a/tests/test_codex_execution_paths.py b/tests/cron/test_codex_execution_paths.py similarity index 100% rename from tests/test_codex_execution_paths.py rename to tests/cron/test_codex_execution_paths.py diff --git a/tests/test_file_permissions.py b/tests/cron/test_file_permissions.py similarity index 100% rename from tests/test_file_permissions.py rename to tests/cron/test_file_permissions.py diff --git a/tests/gateway/test_telegram_approval_buttons.py b/tests/gateway/test_telegram_approval_buttons.py index 1b8249bc2..98d3cdc31 100644 --- a/tests/gateway/test_telegram_approval_buttons.py +++ b/tests/gateway/test_telegram_approval_buttons.py @@ -33,8 +33,15 @@ def _ensure_telegram_mock(): mod.constants.ChatType.GROUP = "group" mod.constants.ChatType.SUPERGROUP = "supergroup" mod.constants.ChatType.CHANNEL = "channel" - for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request", "telegram.error"): + # Provide real exception classes so ``except (NetworkError, ...)`` in + # connect() doesn't blow up under xdist when this mock leaks. + mod.error.NetworkError = type("NetworkError", (OSError,), {}) + mod.error.TimedOut = type("TimedOut", (OSError,), {}) + mod.error.BadRequest = type("BadRequest", (Exception,), {}) + + for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"): sys.modules.setdefault(name, mod) + sys.modules.setdefault("telegram.error", mod.error) _ensure_telegram_mock() diff --git a/tests/gateway/test_telegram_conflict.py b/tests/gateway/test_telegram_conflict.py index 7a480d9fc..47a67f229 100644 --- a/tests/gateway/test_telegram_conflict.py +++ b/tests/gateway/test_telegram_conflict.py @@ -20,8 +20,16 @@ def _ensure_telegram_mock(): telegram_mod.constants.ChatType.CHANNEL = "channel" telegram_mod.constants.ChatType.PRIVATE = "private" + # Provide real exception classes so ``except (NetworkError, ...)`` in + # connect() doesn't blow up with "catching classes that do not inherit + # from BaseException" when another xdist worker pollutes sys.modules. + telegram_mod.error.NetworkError = type("NetworkError", (OSError,), {}) + telegram_mod.error.TimedOut = type("TimedOut", (OSError,), {}) + telegram_mod.error.BadRequest = type("BadRequest", (Exception,), {}) + for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"): sys.modules.setdefault(name, telegram_mod) + sys.modules.setdefault("telegram.error", telegram_mod.error) _ensure_telegram_mock() diff --git a/tests/test_anthropic_oauth_flow.py b/tests/hermes_cli/test_anthropic_oauth_flow.py similarity index 100% rename from tests/test_anthropic_oauth_flow.py rename to tests/hermes_cli/test_anthropic_oauth_flow.py diff --git a/tests/test_anthropic_provider_persistence.py b/tests/hermes_cli/test_anthropic_provider_persistence.py similarity index 100% rename from tests/test_anthropic_provider_persistence.py rename to tests/hermes_cli/test_anthropic_provider_persistence.py diff --git a/tests/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py similarity index 100% rename from tests/test_api_key_providers.py rename to tests/hermes_cli/test_api_key_providers.py diff --git a/tests/test_atomic_json_write.py b/tests/hermes_cli/test_atomic_json_write.py similarity index 100% rename from tests/test_atomic_json_write.py rename to tests/hermes_cli/test_atomic_json_write.py diff --git a/tests/test_atomic_yaml_write.py b/tests/hermes_cli/test_atomic_yaml_write.py similarity index 100% rename from tests/test_atomic_yaml_write.py rename to tests/hermes_cli/test_atomic_yaml_write.py diff --git a/tests/test_auth_codex_provider.py b/tests/hermes_cli/test_auth_codex_provider.py similarity index 100% rename from tests/test_auth_codex_provider.py rename to tests/hermes_cli/test_auth_codex_provider.py diff --git a/tests/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py similarity index 100% rename from tests/test_auth_commands.py rename to tests/hermes_cli/test_auth_commands.py diff --git a/tests/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py similarity index 100% rename from tests/test_auth_nous_provider.py rename to tests/hermes_cli/test_auth_nous_provider.py diff --git a/tests/test_codex_models.py b/tests/hermes_cli/test_codex_models.py similarity index 100% rename from tests/test_codex_models.py rename to tests/hermes_cli/test_codex_models.py diff --git a/tests/test_config_env_expansion.py b/tests/hermes_cli/test_config_env_expansion.py similarity index 100% rename from tests/test_config_env_expansion.py rename to tests/hermes_cli/test_config_env_expansion.py diff --git a/tests/test_external_credential_detection.py b/tests/hermes_cli/test_external_credential_detection.py similarity index 100% rename from tests/test_external_credential_detection.py rename to tests/hermes_cli/test_external_credential_detection.py diff --git a/tests/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py similarity index 100% rename from tests/test_gemini_provider.py rename to tests/hermes_cli/test_gemini_provider.py diff --git a/tests/test_model_normalize.py b/tests/hermes_cli/test_model_normalize.py similarity index 100% rename from tests/test_model_normalize.py rename to tests/hermes_cli/test_model_normalize.py diff --git a/tests/test_model_provider_persistence.py b/tests/hermes_cli/test_model_provider_persistence.py similarity index 100% rename from tests/test_model_provider_persistence.py rename to tests/hermes_cli/test_model_provider_persistence.py diff --git a/tests/test_ollama_cloud_auth.py b/tests/hermes_cli/test_ollama_cloud_auth.py similarity index 100% rename from tests/test_ollama_cloud_auth.py rename to tests/hermes_cli/test_ollama_cloud_auth.py diff --git a/tests/test_plugin_cli_registration.py b/tests/hermes_cli/test_plugin_cli_registration.py similarity index 100% rename from tests/test_plugin_cli_registration.py rename to tests/hermes_cli/test_plugin_cli_registration.py diff --git a/tests/test_plugins.py b/tests/hermes_cli/test_plugins.py similarity index 100% rename from tests/test_plugins.py rename to tests/hermes_cli/test_plugins.py diff --git a/tests/test_plugins_cmd.py b/tests/hermes_cli/test_plugins_cmd.py similarity index 100% rename from tests/test_plugins_cmd.py rename to tests/hermes_cli/test_plugins_cmd.py diff --git a/tests/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py similarity index 100% rename from tests/test_runtime_provider_resolution.py rename to tests/hermes_cli/test_runtime_provider_resolution.py diff --git a/tests/test_setup_model_selection.py b/tests/hermes_cli/test_setup_model_selection.py similarity index 100% rename from tests/test_setup_model_selection.py rename to tests/hermes_cli/test_setup_model_selection.py diff --git a/tests/hermes_cli/test_update_check.py b/tests/hermes_cli/test_update_check.py index b7d6de6ff..368bb1b07 100644 --- a/tests/hermes_cli/test_update_check.py +++ b/tests/hermes_cli/test_update_check.py @@ -15,7 +15,7 @@ def test_version_string_no_v_prefix(): assert not __version__.startswith("v"), f"__version__ should not start with 'v', got {__version__!r}" -def test_check_for_updates_uses_cache(tmp_path): +def test_check_for_updates_uses_cache(tmp_path, monkeypatch): """When cache is fresh, check_for_updates should return cached value without calling git.""" from hermes_cli.banner import check_for_updates @@ -27,15 +27,15 @@ def test_check_for_updates_uses_cache(tmp_path): cache_file = tmp_path / ".update_check" cache_file.write_text(json.dumps({"ts": time.time(), "behind": 3})) - with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)): - with patch("hermes_cli.banner.subprocess.run") as mock_run: - result = check_for_updates() + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + with patch("hermes_cli.banner.subprocess.run") as mock_run: + result = check_for_updates() assert result == 3 mock_run.assert_not_called() -def test_check_for_updates_expired_cache(tmp_path): +def test_check_for_updates_expired_cache(tmp_path, monkeypatch): """When cache is expired, check_for_updates should call git fetch.""" from hermes_cli.banner import check_for_updates @@ -49,15 +49,15 @@ def test_check_for_updates_expired_cache(tmp_path): mock_result = MagicMock(returncode=0, stdout="5\n") - with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)): - with patch("hermes_cli.banner.subprocess.run", return_value=mock_result) as mock_run: - result = check_for_updates() + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + with patch("hermes_cli.banner.subprocess.run", return_value=mock_result) as mock_run: + result = check_for_updates() assert result == 5 assert mock_run.call_count == 2 # git fetch + git rev-list -def test_check_for_updates_no_git_dir(tmp_path): +def test_check_for_updates_no_git_dir(tmp_path, monkeypatch): """Returns None when .git directory doesn't exist anywhere.""" import hermes_cli.banner as banner @@ -66,19 +66,15 @@ def test_check_for_updates_no_git_dir(tmp_path): fake_banner.parent.mkdir(parents=True, exist_ok=True) fake_banner.touch() - original = banner.__file__ - try: - banner.__file__ = str(fake_banner) - with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)): - with patch("hermes_cli.banner.subprocess.run") as mock_run: - result = banner.check_for_updates() - assert result is None - mock_run.assert_not_called() - finally: - banner.__file__ = original + monkeypatch.setattr(banner, "__file__", str(fake_banner)) + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + with patch("hermes_cli.banner.subprocess.run") as mock_run: + result = banner.check_for_updates() + assert result is None + mock_run.assert_not_called() -def test_check_for_updates_fallback_to_project_root(): +def test_check_for_updates_fallback_to_project_root(tmp_path, monkeypatch): """Dev install: falls back to Path(__file__).parent.parent when HERMES_HOME has no git repo.""" import hermes_cli.banner as banner @@ -87,14 +83,12 @@ def test_check_for_updates_fallback_to_project_root(): pytest.skip("Not running from a git checkout") # Point HERMES_HOME at a temp dir with no hermes-agent/.git - import tempfile - with tempfile.TemporaryDirectory() as td: - with patch("hermes_cli.banner.os.getenv", return_value=td): - with patch("hermes_cli.banner.subprocess.run") as mock_run: - mock_run.return_value = MagicMock(returncode=0, stdout="0\n") - result = banner.check_for_updates() - # Should have fallen back to project root and run git commands - assert mock_run.call_count >= 1 + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + with patch("hermes_cli.banner.subprocess.run") as mock_run: + mock_run.return_value = MagicMock(returncode=0, stdout="0\n") + result = banner.check_for_updates() + # Should have fallen back to project root and run git commands + assert mock_run.call_count >= 1 def test_prefetch_non_blocking(): diff --git a/tests/run_agent/__init__.py b/tests/run_agent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/test_1630_context_overflow_loop.py b/tests/run_agent/test_1630_context_overflow_loop.py similarity index 100% rename from tests/test_1630_context_overflow_loop.py rename to tests/run_agent/test_1630_context_overflow_loop.py diff --git a/tests/test_413_compression.py b/tests/run_agent/test_413_compression.py similarity index 100% rename from tests/test_413_compression.py rename to tests/run_agent/test_413_compression.py diff --git a/tests/test_860_dedup.py b/tests/run_agent/test_860_dedup.py similarity index 100% rename from tests/test_860_dedup.py rename to tests/run_agent/test_860_dedup.py diff --git a/tests/test_agent_guardrails.py b/tests/run_agent/test_agent_guardrails.py similarity index 100% rename from tests/test_agent_guardrails.py rename to tests/run_agent/test_agent_guardrails.py diff --git a/tests/test_agent_loop.py b/tests/run_agent/test_agent_loop.py similarity index 99% rename from tests/test_agent_loop.py rename to tests/run_agent/test_agent_loop.py index b95ff7808..bd9e41b91 100644 --- a/tests/test_agent_loop.py +++ b/tests/run_agent/test_agent_loop.py @@ -16,7 +16,7 @@ from unittest.mock import MagicMock import pytest # Ensure repo root is importable -sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) try: from environments.agent_loop import ( diff --git a/tests/test_agent_loop_tool_calling.py b/tests/run_agent/test_agent_loop_tool_calling.py similarity index 99% rename from tests/test_agent_loop_tool_calling.py rename to tests/run_agent/test_agent_loop_tool_calling.py index 74e67c0be..3b8d6ac59 100644 --- a/tests/test_agent_loop_tool_calling.py +++ b/tests/run_agent/test_agent_loop_tool_calling.py @@ -31,7 +31,7 @@ import pytest # pytestmark removed — tests skip gracefully via OPENROUTER_API_KEY check on line 59 # Ensure repo root is importable -_repo_root = Path(__file__).resolve().parent.parent +_repo_root = Path(__file__).resolve().parent.parent.parent if str(_repo_root) not in sys.path: sys.path.insert(0, str(_repo_root)) diff --git a/tests/test_agent_loop_vllm.py b/tests/run_agent/test_agent_loop_vllm.py similarity index 99% rename from tests/test_agent_loop_vllm.py rename to tests/run_agent/test_agent_loop_vllm.py index d47478ecb..d42849094 100644 --- a/tests/test_agent_loop_vllm.py +++ b/tests/run_agent/test_agent_loop_vllm.py @@ -30,7 +30,7 @@ import pytest import requests # Ensure repo root is importable -_repo_root = Path(__file__).resolve().parent.parent +_repo_root = Path(__file__).resolve().parent.parent.parent if str(_repo_root) not in sys.path: sys.path.insert(0, str(_repo_root)) diff --git a/tests/test_anthropic_error_handling.py b/tests/run_agent/test_anthropic_error_handling.py similarity index 100% rename from tests/test_anthropic_error_handling.py rename to tests/run_agent/test_anthropic_error_handling.py diff --git a/tests/test_async_httpx_del_neuter.py b/tests/run_agent/test_async_httpx_del_neuter.py similarity index 100% rename from tests/test_async_httpx_del_neuter.py rename to tests/run_agent/test_async_httpx_del_neuter.py diff --git a/tests/test_compression_boundary.py b/tests/run_agent/test_compression_boundary.py similarity index 100% rename from tests/test_compression_boundary.py rename to tests/run_agent/test_compression_boundary.py diff --git a/tests/test_compression_persistence.py b/tests/run_agent/test_compression_persistence.py similarity index 100% rename from tests/test_compression_persistence.py rename to tests/run_agent/test_compression_persistence.py diff --git a/tests/test_compressor_fallback_update.py b/tests/run_agent/test_compressor_fallback_update.py similarity index 100% rename from tests/test_compressor_fallback_update.py rename to tests/run_agent/test_compressor_fallback_update.py diff --git a/tests/test_context_pressure.py b/tests/run_agent/test_context_pressure.py similarity index 100% rename from tests/test_context_pressure.py rename to tests/run_agent/test_context_pressure.py diff --git a/tests/test_context_token_tracking.py b/tests/run_agent/test_context_token_tracking.py similarity index 100% rename from tests/test_context_token_tracking.py rename to tests/run_agent/test_context_token_tracking.py diff --git a/tests/test_dict_tool_call_args.py b/tests/run_agent/test_dict_tool_call_args.py similarity index 100% rename from tests/test_dict_tool_call_args.py rename to tests/run_agent/test_dict_tool_call_args.py diff --git a/tests/test_exit_cleanup_interrupt.py b/tests/run_agent/test_exit_cleanup_interrupt.py similarity index 100% rename from tests/test_exit_cleanup_interrupt.py rename to tests/run_agent/test_exit_cleanup_interrupt.py diff --git a/tests/test_fallback_model.py b/tests/run_agent/test_fallback_model.py similarity index 100% rename from tests/test_fallback_model.py rename to tests/run_agent/test_fallback_model.py diff --git a/tests/test_flush_memories_codex.py b/tests/run_agent/test_flush_memories_codex.py similarity index 100% rename from tests/test_flush_memories_codex.py rename to tests/run_agent/test_flush_memories_codex.py diff --git a/tests/test_interactive_interrupt.py b/tests/run_agent/test_interactive_interrupt.py similarity index 98% rename from tests/test_interactive_interrupt.py rename to tests/run_agent/test_interactive_interrupt.py index 8c0d328c2..762621f22 100644 --- a/tests/test_interactive_interrupt.py +++ b/tests/run_agent/test_interactive_interrupt.py @@ -23,7 +23,7 @@ logging.basicConfig(level=logging.DEBUG, stream=sys.stderr, format="%(asctime)s [%(threadName)s] %(message)s") log = logging.getLogger("interrupt_test") -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) from unittest.mock import MagicMock, patch from run_agent import AIAgent, IterationBudget diff --git a/tests/test_interrupt_propagation.py b/tests/run_agent/test_interrupt_propagation.py similarity index 100% rename from tests/test_interrupt_propagation.py rename to tests/run_agent/test_interrupt_propagation.py diff --git a/tests/test_large_tool_result.py b/tests/run_agent/test_large_tool_result.py similarity index 100% rename from tests/test_large_tool_result.py rename to tests/run_agent/test_large_tool_result.py diff --git a/tests/test_long_context_tier_429.py b/tests/run_agent/test_long_context_tier_429.py similarity index 100% rename from tests/test_long_context_tier_429.py rename to tests/run_agent/test_long_context_tier_429.py diff --git a/tests/test_openai_client_lifecycle.py b/tests/run_agent/test_openai_client_lifecycle.py similarity index 100% rename from tests/test_openai_client_lifecycle.py rename to tests/run_agent/test_openai_client_lifecycle.py diff --git a/tests/test_percentage_clamp.py b/tests/run_agent/test_percentage_clamp.py similarity index 98% rename from tests/test_percentage_clamp.py rename to tests/run_agent/test_percentage_clamp.py index 67d119149..fcf1e39e5 100644 --- a/tests/test_percentage_clamp.py +++ b/tests/run_agent/test_percentage_clamp.py @@ -122,7 +122,7 @@ class TestSourceLinesAreClamped: @staticmethod def _read_file(rel_path: str) -> str: import os - base = os.path.dirname(os.path.dirname(__file__)) + base = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) with open(os.path.join(base, rel_path)) as f: return f.read() diff --git a/tests/test_primary_runtime_restore.py b/tests/run_agent/test_primary_runtime_restore.py similarity index 100% rename from tests/test_primary_runtime_restore.py rename to tests/run_agent/test_primary_runtime_restore.py diff --git a/tests/test_provider_fallback.py b/tests/run_agent/test_provider_fallback.py similarity index 100% rename from tests/test_provider_fallback.py rename to tests/run_agent/test_provider_fallback.py diff --git a/tests/test_provider_parity.py b/tests/run_agent/test_provider_parity.py similarity index 100% rename from tests/test_provider_parity.py rename to tests/run_agent/test_provider_parity.py diff --git a/tests/test_real_interrupt_subagent.py b/tests/run_agent/test_real_interrupt_subagent.py similarity index 100% rename from tests/test_real_interrupt_subagent.py rename to tests/run_agent/test_real_interrupt_subagent.py diff --git a/tests/test_redirect_stdout_issue.py b/tests/run_agent/test_redirect_stdout_issue.py similarity index 100% rename from tests/test_redirect_stdout_issue.py rename to tests/run_agent/test_redirect_stdout_issue.py diff --git a/tests/test_run_agent.py b/tests/run_agent/test_run_agent.py similarity index 100% rename from tests/test_run_agent.py rename to tests/run_agent/test_run_agent.py diff --git a/tests/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py similarity index 100% rename from tests/test_run_agent_codex_responses.py rename to tests/run_agent/test_run_agent_codex_responses.py diff --git a/tests/test_session_meta_filtering.py b/tests/run_agent/test_session_meta_filtering.py similarity index 100% rename from tests/test_session_meta_filtering.py rename to tests/run_agent/test_session_meta_filtering.py diff --git a/tests/test_session_reset_fix.py b/tests/run_agent/test_session_reset_fix.py similarity index 98% rename from tests/test_session_reset_fix.py rename to tests/run_agent/test_session_reset_fix.py index ee65ed90d..1fd1223ce 100644 --- a/tests/test_session_reset_fix.py +++ b/tests/run_agent/test_session_reset_fix.py @@ -13,7 +13,7 @@ from pathlib import Path import pytest # Ensure repo root is importable -sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) # Stub out optional heavy dependencies not installed in the test environment sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None)) diff --git a/tests/test_streaming.py b/tests/run_agent/test_streaming.py similarity index 100% rename from tests/test_streaming.py rename to tests/run_agent/test_streaming.py diff --git a/tests/test_strict_api_validation.py b/tests/run_agent/test_strict_api_validation.py similarity index 100% rename from tests/test_strict_api_validation.py rename to tests/run_agent/test_strict_api_validation.py diff --git a/tests/test_token_persistence_non_cli.py b/tests/run_agent/test_token_persistence_non_cli.py similarity index 100% rename from tests/test_token_persistence_non_cli.py rename to tests/run_agent/test_token_persistence_non_cli.py diff --git a/tests/test_tool_arg_coercion.py b/tests/run_agent/test_tool_arg_coercion.py similarity index 100% rename from tests/test_tool_arg_coercion.py rename to tests/run_agent/test_tool_arg_coercion.py diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py index 085ffad29..5ac3fd872 100644 --- a/tests/tools/test_code_execution.py +++ b/tests/tools/test_code_execution.py @@ -18,10 +18,18 @@ import pytest import json import os -# Force local terminal backend for ALL tests in this file. -# Under xdist, another test may leak TERMINAL_ENV=modal/docker, sending -# execute_code down the remote path → modal.exception.AuthError. os.environ["TERMINAL_ENV"] = "local" + + +@pytest.fixture(autouse=True) +def _force_local_terminal(monkeypatch): + """Re-set TERMINAL_ENV=local before every test. + + The module-level assignment above covers import time, but under xdist + another worker can overwrite os.environ between tests. monkeypatch + ensures each test starts (and ends) with the correct value. + """ + monkeypatch.setenv("TERMINAL_ENV", "local") import sys import time import threading diff --git a/tests/tools/test_managed_browserbase_and_modal.py b/tests/tools/test_managed_browserbase_and_modal.py index d07dcb367..5ae24f01a 100644 --- a/tests/tools/test_managed_browserbase_and_modal.py +++ b/tests/tools/test_managed_browserbase_and_modal.py @@ -91,7 +91,11 @@ def _install_fake_tools_package(): def register(self, **kwargs): return None - sys.modules["tools.registry"] = types.SimpleNamespace(registry=_Registry()) + from tools.registry import tool_error + + sys.modules["tools.registry"] = types.SimpleNamespace( + registry=_Registry(), tool_error=tool_error, + ) class _DummyEnvironment: def __init__(self, *args, **kwargs): diff --git a/tests/test_managed_server_tool_support.py b/tests/tools/test_managed_server_tool_support.py similarity index 100% rename from tests/test_managed_server_tool_support.py rename to tests/tools/test_managed_server_tool_support.py diff --git a/tests/tools/test_modal_sandbox_fixes.py b/tests/tools/test_modal_sandbox_fixes.py index 7e3feb5cf..e1baf13d9 100644 --- a/tests/tools/test_modal_sandbox_fixes.py +++ b/tests/tools/test_modal_sandbox_fixes.py @@ -12,8 +12,6 @@ Covers the bugs discovered while setting up TBLite evaluation: import os import sys from pathlib import Path -from unittest.mock import patch, MagicMock - import pytest # Ensure repo root is importable @@ -64,89 +62,72 @@ class TestToolResolution: class TestCwdHandling: """Verify host paths are sanitized for container backends.""" - def test_home_path_replaced_for_modal(self): + def test_home_path_replaced_for_modal(self, monkeypatch): """TERMINAL_CWD=/home/user/... should be replaced with /root for modal.""" - with patch.dict(os.environ, { - "TERMINAL_ENV": "modal", - "TERMINAL_CWD": "/home/dakota/github/hermes-agent", - }): - config = _tt_mod._get_env_config() - assert config["cwd"] == "/root", ( - f"Expected /root, got {config['cwd']}. " - "/home/ paths should be replaced for modal backend." - ) + monkeypatch.setenv("TERMINAL_ENV", "modal") + monkeypatch.setenv("TERMINAL_CWD", "/home/dakota/github/hermes-agent") + config = _tt_mod._get_env_config() + assert config["cwd"] == "/root", ( + f"Expected /root, got {config['cwd']}. " + "/home/ paths should be replaced for modal backend." + ) - def test_users_path_replaced_for_docker_by_default(self): + def test_users_path_replaced_for_docker_by_default(self, monkeypatch): """Docker should keep host paths out of the sandbox unless explicitly enabled.""" - with patch.dict(os.environ, { - "TERMINAL_ENV": "docker", - "TERMINAL_CWD": "/Users/someone/projects", - }): - config = _tt_mod._get_env_config() - assert config["cwd"] == "/root", ( - f"Expected /root, got {config['cwd']}. " - "Host paths should be discarded for docker backend by default." - ) - assert config["host_cwd"] is None - assert config["docker_mount_cwd_to_workspace"] is False + monkeypatch.setenv("TERMINAL_ENV", "docker") + monkeypatch.setenv("TERMINAL_CWD", "/Users/someone/projects") + config = _tt_mod._get_env_config() + assert config["cwd"] == "/root", ( + f"Expected /root, got {config['cwd']}. " + "Host paths should be discarded for docker backend by default." + ) + assert config["host_cwd"] is None + assert config["docker_mount_cwd_to_workspace"] is False - def test_users_path_maps_to_workspace_for_docker_when_enabled(self): + def test_users_path_maps_to_workspace_for_docker_when_enabled(self, monkeypatch): """Docker should map the host cwd into /workspace only when explicitly enabled.""" - with patch.dict(os.environ, { - "TERMINAL_ENV": "docker", - "TERMINAL_CWD": "/Users/someone/projects", - "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true", - }): - config = _tt_mod._get_env_config() - assert config["cwd"] == "/workspace" - assert config["host_cwd"] == "/Users/someone/projects" - assert config["docker_mount_cwd_to_workspace"] is True + monkeypatch.setenv("TERMINAL_ENV", "docker") + monkeypatch.setenv("TERMINAL_CWD", "/Users/someone/projects") + monkeypatch.setenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "true") + config = _tt_mod._get_env_config() + assert config["cwd"] == "/workspace" + assert config["host_cwd"] == "/Users/someone/projects" + assert config["docker_mount_cwd_to_workspace"] is True - def test_windows_path_replaced_for_modal(self): + def test_windows_path_replaced_for_modal(self, monkeypatch): """TERMINAL_CWD=C:\\Users\\... should be replaced for modal.""" - with patch.dict(os.environ, { - "TERMINAL_ENV": "modal", - "TERMINAL_CWD": "C:\\Users\\someone\\projects", - }): - config = _tt_mod._get_env_config() - assert config["cwd"] == "/root" + monkeypatch.setenv("TERMINAL_ENV", "modal") + monkeypatch.setenv("TERMINAL_CWD", "C:\\Users\\someone\\projects") + config = _tt_mod._get_env_config() + assert config["cwd"] == "/root" - def test_default_cwd_is_root_for_container_backends(self): + @pytest.mark.parametrize("backend", ["modal", "docker", "singularity", "daytona"]) + def test_default_cwd_is_root_for_container_backends(self, backend, monkeypatch): """Container backends should default to /root, not ~.""" - for backend in ("modal", "docker", "singularity", "daytona"): - with patch.dict(os.environ, {"TERMINAL_ENV": backend}, clear=False): - # Remove TERMINAL_CWD so it uses default - env = os.environ.copy() - env.pop("TERMINAL_CWD", None) - env.pop("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", None) - with patch.dict(os.environ, env, clear=True): - config = _tt_mod._get_env_config() - assert config["cwd"] == "/root", ( - f"Backend {backend}: expected /root default, got {config['cwd']}" - ) + monkeypatch.setenv("TERMINAL_ENV", backend) + monkeypatch.delenv("TERMINAL_CWD", raising=False) + monkeypatch.delenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", raising=False) + config = _tt_mod._get_env_config() + assert config["cwd"] == "/root", ( + f"Backend {backend}: expected /root default, got {config['cwd']}" + ) - def test_docker_default_cwd_maps_current_directory_when_enabled(self): + def test_docker_default_cwd_maps_current_directory_when_enabled(self, monkeypatch): """Docker should use /workspace when cwd mounting is explicitly enabled.""" - with patch("tools.terminal_tool.os.getcwd", return_value="/home/user/project"): - with patch.dict(os.environ, { - "TERMINAL_ENV": "docker", - "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true", - }, clear=False): - env = os.environ.copy() - env.pop("TERMINAL_CWD", None) - with patch.dict(os.environ, env, clear=True): - config = _tt_mod._get_env_config() - assert config["cwd"] == "/workspace" - assert config["host_cwd"] == "/home/user/project" + monkeypatch.setattr("tools.terminal_tool.os.getcwd", lambda: "/home/user/project") + monkeypatch.setenv("TERMINAL_ENV", "docker") + monkeypatch.setenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "true") + monkeypatch.delenv("TERMINAL_CWD", raising=False) + config = _tt_mod._get_env_config() + assert config["cwd"] == "/workspace" + assert config["host_cwd"] == "/home/user/project" - def test_local_backend_uses_getcwd(self): + def test_local_backend_uses_getcwd(self, monkeypatch): """Local backend should use os.getcwd(), not /root.""" - with patch.dict(os.environ, {"TERMINAL_ENV": "local"}, clear=False): - env = os.environ.copy() - env.pop("TERMINAL_CWD", None) - with patch.dict(os.environ, env, clear=True): - config = _tt_mod._get_env_config() - assert config["cwd"] == os.getcwd() + monkeypatch.setenv("TERMINAL_ENV", "local") + monkeypatch.delenv("TERMINAL_CWD", raising=False) + config = _tt_mod._get_env_config() + assert config["cwd"] == os.getcwd() def test_create_environment_passes_docker_host_cwd_and_flag(self, monkeypatch): """Docker host cwd and mount flag should reach DockerEnvironment.""" @@ -173,18 +154,16 @@ class TestCwdHandling: assert captured["host_cwd"] == "/home/user/project" assert captured["auto_mount_cwd"] is True - def test_ssh_preserves_home_paths(self): + def test_ssh_preserves_home_paths(self, monkeypatch): """SSH backend should NOT replace /home/ paths (they're valid remotely).""" - with patch.dict(os.environ, { - "TERMINAL_ENV": "ssh", - "TERMINAL_CWD": "/home/remote-user/work", - "TERMINAL_SSH_HOST": "example.com", - "TERMINAL_SSH_USER": "user", - }): - config = _tt_mod._get_env_config() - assert config["cwd"] == "/home/remote-user/work", ( - "SSH backend should preserve /home/ paths" - ) + monkeypatch.setenv("TERMINAL_ENV", "ssh") + monkeypatch.setenv("TERMINAL_CWD", "/home/remote-user/work") + monkeypatch.setenv("TERMINAL_SSH_HOST", "example.com") + monkeypatch.setenv("TERMINAL_SSH_USER", "user") + config = _tt_mod._get_env_config() + assert config["cwd"] == "/home/remote-user/work", ( + "SSH backend should preserve /home/ paths" + ) # ========================================================================= @@ -194,12 +173,8 @@ class TestCwdHandling: class TestEphemeralDiskCheck: """Verify ephemeral_disk is only passed when modal supports it.""" - def test_ephemeral_disk_skipped_when_unsupported(self): + def test_ephemeral_disk_skipped_when_unsupported(self, monkeypatch): """If modal.Sandbox.create doesn't have ephemeral_disk param, skip it.""" - # Mock the modal import and Sandbox.create signature - mock_modal = MagicMock() - mock_sandbox_create = MagicMock() - # Simulate a signature WITHOUT ephemeral_disk import inspect mock_params = { "args": inspect.Parameter("args", inspect.Parameter.VAR_POSITIONAL), @@ -208,26 +183,25 @@ class TestEphemeralDiskCheck: "cpu": inspect.Parameter("cpu", inspect.Parameter.KEYWORD_ONLY), "memory": inspect.Parameter("memory", inspect.Parameter.KEYWORD_ONLY), } - mock_sig = inspect.Signature(parameters=list(mock_params.values())) - with patch.dict(os.environ, {"TERMINAL_ENV": "modal"}): - config = _tt_mod._get_env_config() - # The config has container_disk default of 51200 - disk = config.get("container_disk", 51200) - assert disk > 0, "disk should default to > 0" + monkeypatch.setenv("TERMINAL_ENV", "modal") + config = _tt_mod._get_env_config() + # The config has container_disk default of 51200 + disk = config.get("container_disk", 51200) + assert disk > 0, "disk should default to > 0" - # Simulate the version check logic from terminal_tool.py - sandbox_kwargs = {} - if disk > 0: - try: - if "ephemeral_disk" in mock_params: - sandbox_kwargs["ephemeral_disk"] = disk - except Exception: - pass + # Simulate the version check logic from terminal_tool.py + sandbox_kwargs = {} + if disk > 0: + try: + if "ephemeral_disk" in mock_params: + sandbox_kwargs["ephemeral_disk"] = disk + except Exception: + pass - assert "ephemeral_disk" not in sandbox_kwargs, ( - "ephemeral_disk should not be set when Sandbox.create doesn't support it" - ) + assert "ephemeral_disk" not in sandbox_kwargs, ( + "ephemeral_disk should not be set when Sandbox.create doesn't support it" + ) # ========================================================================= diff --git a/tests/test_tool_call_parsers.py b/tests/tools/test_tool_call_parsers.py similarity index 100% rename from tests/test_tool_call_parsers.py rename to tests/tools/test_tool_call_parsers.py diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py index 08f26f509..226e99b56 100644 --- a/tools/browser_camofox.py +++ b/tools/browser_camofox.py @@ -34,6 +34,7 @@ import requests from hermes_cli.config import load_config from tools.browser_camofox_state import get_camofox_identity +from tools.registry import tool_error logger = logging.getLogger(__name__) diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index 2ed8ba210..1ff42e77b 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -1317,6 +1317,8 @@ def _make_read_resource_handler(server_name: str, tool_timeout: float): """Return a sync handler that reads a resource by URI from an MCP server.""" def _handler(args: dict, **kwargs) -> str: + from tools.registry import tool_error + with _lock: server = _servers.get(server_name) if not server or not server.session: @@ -1406,6 +1408,8 @@ def _make_get_prompt_handler(server_name: str, tool_timeout: float): """Return a sync handler that gets a prompt by name from an MCP server.""" def _handler(args: dict, **kwargs) -> str: + from tools.registry import tool_error + with _lock: server = _servers.get(server_name) if not server or not server.session: