From f3006ebef9759d6d1002310e40d3b79a8293f074 Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:19:07 -0700
Subject: [PATCH] refactor(tests): re-architect tests + fix CI failures (#5946)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: re-architect tests to mirror the codebase

* Update tests.yml

* fix: add missing tool_error imports after registry refactor

* fix(tests): replace patch.dict with monkeypatch to prevent env var leaks under xdist

patch.dict(os.environ) can leak TERMINAL_ENV across xdist workers,
causing test_code_execution tests to hit the Modal remote path.

* fix(tests): fix update_check and telegram xdist failures

- test_update_check: replace patch("hermes_cli.banner.os.getenv") with
  monkeypatch.setenv("HERMES_HOME") — banner.py no longer imports os
  directly, it uses get_hermes_home() from hermes_constants.

- test_telegram_conflict/approval_buttons: provide real exception classes
  for telegram.error mock (NetworkError, TimedOut, BadRequest) so the
  except clause in connect() doesn't fail with "catching classes that do
  not inherit from BaseException" when xdist pollutes sys.modules.

* fix(tests): accept unavailable_models kwarg in _prompt_model_selection mock
---
 .github/workflows/tests.yml                   |   3 +
 tests/{ => agent}/test_anthropic_adapter.py   |   0
 .../test_auxiliary_config_bridge.py           |   6 +-
 tests/{ => agent}/test_context_references.py  |   0
 tests/{ => agent}/test_credential_pool.py     |   0
 .../test_credential_pool_routing.py           |   0
 .../test_crossloop_client_cache.py            |   0
 tests/{ => agent}/test_display.py             |   0
 tests/{ => agent}/test_insights.py            |   0
 .../test_model_metadata_local_ctx.py          |   0
 tests/cli/__init__.py                         |   0
 tests/{ => cli}/test_branch_command.py        |   0
 tests/{ => cli}/test_cli_approval_ui.py       |   0
 .../test_cli_background_tui_refresh.py        |   0
 tests/{ => cli}/test_cli_browser_connect.py   |   0
 tests/{ => cli}/test_cli_context_warning.py   |   0
 tests/{ => cli}/test_cli_extension_hooks.py   |   0
 tests/{ => cli}/test_cli_file_drop.py         |   0
 tests/{ => cli}/test_cli_init.py              |   0
 .../{ => cli}/test_cli_interrupt_subagent.py  |   0
 tests/{ => cli}/test_cli_loading_indicator.py |   0
 tests/{ => cli}/test_cli_mcp_config_watch.py  |   0
 tests/{ => cli}/test_cli_new_session.py       |   0
 tests/{ => cli}/test_cli_plan_command.py      |   0
 tests/{ => cli}/test_cli_prefix_matching.py   |   0
 tests/{ => cli}/test_cli_preloaded_skills.py  |   0
 .../{ => cli}/test_cli_provider_resolution.py |   4 +-
 tests/{ => cli}/test_cli_retry.py             |   2 +-
 tests/{ => cli}/test_cli_save_config_value.py |   0
 tests/{ => cli}/test_cli_secret_capture.py    |   0
 tests/{ => cli}/test_cli_skin_integration.py  |   0
 tests/{ => cli}/test_cli_status_bar.py        |   0
 tests/{ => cli}/test_cli_tools_command.py     |   0
 tests/{ => cli}/test_personality_none.py      |   0
 tests/{ => cli}/test_quick_commands.py        |   0
 tests/{ => cli}/test_reasoning_command.py     |   0
 tests/{ => cli}/test_resume_display.py        |   0
 .../{ => cli}/test_surrogate_sanitization.py  |   0
 tests/{ => cli}/test_worktree.py              |   0
 tests/{ => cli}/test_worktree_security.py     |   0
 .../{ => cron}/test_codex_execution_paths.py  |   0
 tests/{ => cron}/test_file_permissions.py     |   0
 .../gateway/test_telegram_approval_buttons.py |   9 +-
 tests/gateway/test_telegram_conflict.py       |   8 +
 .../test_anthropic_oauth_flow.py              |   0
 .../test_anthropic_provider_persistence.py    |   0
 .../test_api_key_providers.py                 |   0
 .../test_atomic_json_write.py                 |   0
 .../test_atomic_yaml_write.py                 |   0
 .../test_auth_codex_provider.py               |   0
 tests/{ => hermes_cli}/test_auth_commands.py  |   0
 .../test_auth_nous_provider.py                |   0
 tests/{ => hermes_cli}/test_codex_models.py   |   0
 .../test_config_env_expansion.py              |   0
 .../test_external_credential_detection.py     |   0
 .../{ => hermes_cli}/test_gemini_provider.py  |   0
 .../{ => hermes_cli}/test_model_normalize.py  |   0
 .../test_model_provider_persistence.py        |   0
 .../test_ollama_cloud_auth.py                 |   0
 .../test_plugin_cli_registration.py           |   0
 tests/{ => hermes_cli}/test_plugins.py        |   0
 tests/{ => hermes_cli}/test_plugins_cmd.py    |   0
 .../test_runtime_provider_resolution.py       |   0
 .../test_setup_model_selection.py             |   0
 tests/hermes_cli/test_update_check.py         |  50 +++--
 tests/run_agent/__init__.py                   |   0
 .../test_1630_context_overflow_loop.py        |   0
 tests/{ => run_agent}/test_413_compression.py |   0
 tests/{ => run_agent}/test_860_dedup.py       |   0
 .../{ => run_agent}/test_agent_guardrails.py  |   0
 tests/{ => run_agent}/test_agent_loop.py      |   2 +-
 .../test_agent_loop_tool_calling.py           |   2 +-
 tests/{ => run_agent}/test_agent_loop_vllm.py |   2 +-
 .../test_anthropic_error_handling.py          |   0
 .../test_async_httpx_del_neuter.py            |   0
 .../test_compression_boundary.py              |   0
 .../test_compression_persistence.py           |   0
 .../test_compressor_fallback_update.py        |   0
 .../{ => run_agent}/test_context_pressure.py  |   0
 .../test_context_token_tracking.py            |   0
 .../test_dict_tool_call_args.py               |   0
 .../test_exit_cleanup_interrupt.py            |   0
 tests/{ => run_agent}/test_fallback_model.py  |   0
 .../test_flush_memories_codex.py              |   0
 .../test_interactive_interrupt.py             |   2 +-
 .../test_interrupt_propagation.py             |   0
 .../{ => run_agent}/test_large_tool_result.py |   0
 .../test_long_context_tier_429.py             |   0
 .../test_openai_client_lifecycle.py           |   0
 .../{ => run_agent}/test_percentage_clamp.py  |   2 +-
 .../test_primary_runtime_restore.py           |   0
 .../{ => run_agent}/test_provider_fallback.py |   0
 tests/{ => run_agent}/test_provider_parity.py |   0
 .../test_real_interrupt_subagent.py           |   0
 .../test_redirect_stdout_issue.py             |   0
 tests/{ => run_agent}/test_run_agent.py       |   0
 .../test_run_agent_codex_responses.py         |   0
 .../test_session_meta_filtering.py            |   0
 .../{ => run_agent}/test_session_reset_fix.py |   2 +-
 tests/{ => run_agent}/test_streaming.py       |   0
 .../test_strict_api_validation.py             |   0
 .../test_token_persistence_non_cli.py         |   0
 .../{ => run_agent}/test_tool_arg_coercion.py |   0
 tests/tools/test_code_execution.py            |  14 +-
 .../test_managed_browserbase_and_modal.py     |   6 +-
 .../test_managed_server_tool_support.py       |   0
 tests/tools/test_modal_sandbox_fixes.py       | 184 ++++++++----------
 tests/{ => tools}/test_tool_call_parsers.py   |   0
 tools/browser_camofox.py                      |   1 +
 tools/mcp_tool.py                             |   4 +
 110 files changed, 153 insertions(+), 150 deletions(-)
 rename tests/{ => agent}/test_anthropic_adapter.py (100%)
 rename tests/{ => agent}/test_auxiliary_config_bridge.py (98%)
 rename tests/{ => agent}/test_context_references.py (100%)
 rename tests/{ => agent}/test_credential_pool.py (100%)
 rename tests/{ => agent}/test_credential_pool_routing.py (100%)
 rename tests/{ => agent}/test_crossloop_client_cache.py (100%)
 rename tests/{ => agent}/test_display.py (100%)
 rename tests/{ => agent}/test_insights.py (100%)
 rename tests/{ => agent}/test_model_metadata_local_ctx.py (100%)
 create mode 100644 tests/cli/__init__.py
 rename tests/{ => cli}/test_branch_command.py (100%)
 rename tests/{ => cli}/test_cli_approval_ui.py (100%)
 rename tests/{ => cli}/test_cli_background_tui_refresh.py (100%)
 rename tests/{ => cli}/test_cli_browser_connect.py (100%)
 rename tests/{ => cli}/test_cli_context_warning.py (100%)
 rename tests/{ => cli}/test_cli_extension_hooks.py (100%)
 rename tests/{ => cli}/test_cli_file_drop.py (100%)
 rename tests/{ => cli}/test_cli_init.py (100%)
 rename tests/{ => cli}/test_cli_interrupt_subagent.py (100%)
 rename tests/{ => cli}/test_cli_loading_indicator.py (100%)
 rename tests/{ => cli}/test_cli_mcp_config_watch.py (100%)
 rename tests/{ => cli}/test_cli_new_session.py (100%)
 rename tests/{ => cli}/test_cli_plan_command.py (100%)
 rename tests/{ => cli}/test_cli_prefix_matching.py (100%)
 rename tests/{ => cli}/test_cli_preloaded_skills.py (100%)
 rename tests/{ => cli}/test_cli_provider_resolution.py (99%)
 rename tests/{ => cli}/test_cli_retry.py (96%)
 rename tests/{ => cli}/test_cli_save_config_value.py (100%)
 rename tests/{ => cli}/test_cli_secret_capture.py (100%)
 rename tests/{ => cli}/test_cli_skin_integration.py (100%)
 rename tests/{ => cli}/test_cli_status_bar.py (100%)
 rename tests/{ => cli}/test_cli_tools_command.py (100%)
 rename tests/{ => cli}/test_personality_none.py (100%)
 rename tests/{ => cli}/test_quick_commands.py (100%)
 rename tests/{ => cli}/test_reasoning_command.py (100%)
 rename tests/{ => cli}/test_resume_display.py (100%)
 rename tests/{ => cli}/test_surrogate_sanitization.py (100%)
 rename tests/{ => cli}/test_worktree.py (100%)
 rename tests/{ => cli}/test_worktree_security.py (100%)
 rename tests/{ => cron}/test_codex_execution_paths.py (100%)
 rename tests/{ => cron}/test_file_permissions.py (100%)
 rename tests/{ => hermes_cli}/test_anthropic_oauth_flow.py (100%)
 rename tests/{ => hermes_cli}/test_anthropic_provider_persistence.py (100%)
 rename tests/{ => hermes_cli}/test_api_key_providers.py (100%)
 rename tests/{ => hermes_cli}/test_atomic_json_write.py (100%)
 rename tests/{ => hermes_cli}/test_atomic_yaml_write.py (100%)
 rename tests/{ => hermes_cli}/test_auth_codex_provider.py (100%)
 rename tests/{ => hermes_cli}/test_auth_commands.py (100%)
 rename tests/{ => hermes_cli}/test_auth_nous_provider.py (100%)
 rename tests/{ => hermes_cli}/test_codex_models.py (100%)
 rename tests/{ => hermes_cli}/test_config_env_expansion.py (100%)
 rename tests/{ => hermes_cli}/test_external_credential_detection.py (100%)
 rename tests/{ => hermes_cli}/test_gemini_provider.py (100%)
 rename tests/{ => hermes_cli}/test_model_normalize.py (100%)
 rename tests/{ => hermes_cli}/test_model_provider_persistence.py (100%)
 rename tests/{ => hermes_cli}/test_ollama_cloud_auth.py (100%)
 rename tests/{ => hermes_cli}/test_plugin_cli_registration.py (100%)
 rename tests/{ => hermes_cli}/test_plugins.py (100%)
 rename tests/{ => hermes_cli}/test_plugins_cmd.py (100%)
 rename tests/{ => hermes_cli}/test_runtime_provider_resolution.py (100%)
 rename tests/{ => hermes_cli}/test_setup_model_selection.py (100%)
 create mode 100644 tests/run_agent/__init__.py
 rename tests/{ => run_agent}/test_1630_context_overflow_loop.py (100%)
 rename tests/{ => run_agent}/test_413_compression.py (100%)
 rename tests/{ => run_agent}/test_860_dedup.py (100%)
 rename tests/{ => run_agent}/test_agent_guardrails.py (100%)
 rename tests/{ => run_agent}/test_agent_loop.py (99%)
 rename tests/{ => run_agent}/test_agent_loop_tool_calling.py (99%)
 rename tests/{ => run_agent}/test_agent_loop_vllm.py (99%)
 rename tests/{ => run_agent}/test_anthropic_error_handling.py (100%)
 rename tests/{ => run_agent}/test_async_httpx_del_neuter.py (100%)
 rename tests/{ => run_agent}/test_compression_boundary.py (100%)
 rename tests/{ => run_agent}/test_compression_persistence.py (100%)
 rename tests/{ => run_agent}/test_compressor_fallback_update.py (100%)
 rename tests/{ => run_agent}/test_context_pressure.py (100%)
 rename tests/{ => run_agent}/test_context_token_tracking.py (100%)
 rename tests/{ => run_agent}/test_dict_tool_call_args.py (100%)
 rename tests/{ => run_agent}/test_exit_cleanup_interrupt.py (100%)
 rename tests/{ => run_agent}/test_fallback_model.py (100%)
 rename tests/{ => run_agent}/test_flush_memories_codex.py (100%)
 rename tests/{ => run_agent}/test_interactive_interrupt.py (98%)
 rename tests/{ => run_agent}/test_interrupt_propagation.py (100%)
 rename tests/{ => run_agent}/test_large_tool_result.py (100%)
 rename tests/{ => run_agent}/test_long_context_tier_429.py (100%)
 rename tests/{ => run_agent}/test_openai_client_lifecycle.py (100%)
 rename tests/{ => run_agent}/test_percentage_clamp.py (98%)
 rename tests/{ => run_agent}/test_primary_runtime_restore.py (100%)
 rename tests/{ => run_agent}/test_provider_fallback.py (100%)
 rename tests/{ => run_agent}/test_provider_parity.py (100%)
 rename tests/{ => run_agent}/test_real_interrupt_subagent.py (100%)
 rename tests/{ => run_agent}/test_redirect_stdout_issue.py (100%)
 rename tests/{ => run_agent}/test_run_agent.py (100%)
 rename tests/{ => run_agent}/test_run_agent_codex_responses.py (100%)
 rename tests/{ => run_agent}/test_session_meta_filtering.py (100%)
 rename tests/{ => run_agent}/test_session_reset_fix.py (98%)
 rename tests/{ => run_agent}/test_streaming.py (100%)
 rename tests/{ => run_agent}/test_strict_api_validation.py (100%)
 rename tests/{ => run_agent}/test_token_persistence_non_cli.py (100%)
 rename tests/{ => run_agent}/test_tool_arg_coercion.py (100%)
 rename tests/{ => tools}/test_managed_server_tool_support.py (100%)
 rename tests/{ => tools}/test_tool_call_parsers.py (100%)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a54be8b17..1e45193b8 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -19,6 +19,9 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v4
 
+      - name: Install system dependencies
+        run: sudo apt-get update && sudo apt-get install -y ripgrep
+
       - name: Install uv
         uses: astral-sh/setup-uv@v5
 
diff --git a/tests/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
similarity index 100%
rename from tests/test_anthropic_adapter.py
rename to tests/agent/test_anthropic_adapter.py
diff --git a/tests/test_auxiliary_config_bridge.py b/tests/agent/test_auxiliary_config_bridge.py
similarity index 98%
rename from tests/test_auxiliary_config_bridge.py
rename to tests/agent/test_auxiliary_config_bridge.py
index 0151daf2a..91dea15af 100644
--- a/tests/test_auxiliary_config_bridge.py
+++ b/tests/agent/test_auxiliary_config_bridge.py
@@ -13,7 +13,7 @@ from unittest.mock import patch, MagicMock
 import pytest
 import yaml
 
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 
 
 def _run_auxiliary_bridge(config_dict, monkeypatch):
@@ -199,7 +199,7 @@ class TestGatewayBridgeCodeParity:
 
     def test_gateway_has_auxiliary_bridge(self):
         """The gateway config bridge must include auxiliary.* bridging."""
-        gateway_path = Path(__file__).parent.parent / "gateway" / "run.py"
+        gateway_path = Path(__file__).parent.parent.parent / "gateway" / "run.py"
         content = gateway_path.read_text()
         # Check for key patterns that indicate the bridge is present
         assert "AUXILIARY_VISION_PROVIDER" in content
@@ -213,7 +213,7 @@ class TestGatewayBridgeCodeParity:
 
     def test_gateway_no_compression_env_bridge(self):
         """Gateway should NOT bridge compression config to env vars (config-only)."""
-        gateway_path = Path(__file__).parent.parent / "gateway" / "run.py"
+        gateway_path = Path(__file__).parent.parent.parent / "gateway" / "run.py"
         content = gateway_path.read_text()
         assert "CONTEXT_COMPRESSION_PROVIDER" not in content
         assert "CONTEXT_COMPRESSION_MODEL" not in content
diff --git a/tests/test_context_references.py b/tests/agent/test_context_references.py
similarity index 100%
rename from tests/test_context_references.py
rename to tests/agent/test_context_references.py
diff --git a/tests/test_credential_pool.py b/tests/agent/test_credential_pool.py
similarity index 100%
rename from tests/test_credential_pool.py
rename to tests/agent/test_credential_pool.py
diff --git a/tests/test_credential_pool_routing.py b/tests/agent/test_credential_pool_routing.py
similarity index 100%
rename from tests/test_credential_pool_routing.py
rename to tests/agent/test_credential_pool_routing.py
diff --git a/tests/test_crossloop_client_cache.py b/tests/agent/test_crossloop_client_cache.py
similarity index 100%
rename from tests/test_crossloop_client_cache.py
rename to tests/agent/test_crossloop_client_cache.py
diff --git a/tests/test_display.py b/tests/agent/test_display.py
similarity index 100%
rename from tests/test_display.py
rename to tests/agent/test_display.py
diff --git a/tests/test_insights.py b/tests/agent/test_insights.py
similarity index 100%
rename from tests/test_insights.py
rename to tests/agent/test_insights.py
diff --git a/tests/test_model_metadata_local_ctx.py b/tests/agent/test_model_metadata_local_ctx.py
similarity index 100%
rename from tests/test_model_metadata_local_ctx.py
rename to tests/agent/test_model_metadata_local_ctx.py
diff --git a/tests/cli/__init__.py b/tests/cli/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_branch_command.py b/tests/cli/test_branch_command.py
similarity index 100%
rename from tests/test_branch_command.py
rename to tests/cli/test_branch_command.py
diff --git a/tests/test_cli_approval_ui.py b/tests/cli/test_cli_approval_ui.py
similarity index 100%
rename from tests/test_cli_approval_ui.py
rename to tests/cli/test_cli_approval_ui.py
diff --git a/tests/test_cli_background_tui_refresh.py b/tests/cli/test_cli_background_tui_refresh.py
similarity index 100%
rename from tests/test_cli_background_tui_refresh.py
rename to tests/cli/test_cli_background_tui_refresh.py
diff --git a/tests/test_cli_browser_connect.py b/tests/cli/test_cli_browser_connect.py
similarity index 100%
rename from tests/test_cli_browser_connect.py
rename to tests/cli/test_cli_browser_connect.py
diff --git a/tests/test_cli_context_warning.py b/tests/cli/test_cli_context_warning.py
similarity index 100%
rename from tests/test_cli_context_warning.py
rename to tests/cli/test_cli_context_warning.py
diff --git a/tests/test_cli_extension_hooks.py b/tests/cli/test_cli_extension_hooks.py
similarity index 100%
rename from tests/test_cli_extension_hooks.py
rename to tests/cli/test_cli_extension_hooks.py
diff --git a/tests/test_cli_file_drop.py b/tests/cli/test_cli_file_drop.py
similarity index 100%
rename from tests/test_cli_file_drop.py
rename to tests/cli/test_cli_file_drop.py
diff --git a/tests/test_cli_init.py b/tests/cli/test_cli_init.py
similarity index 100%
rename from tests/test_cli_init.py
rename to tests/cli/test_cli_init.py
diff --git a/tests/test_cli_interrupt_subagent.py b/tests/cli/test_cli_interrupt_subagent.py
similarity index 100%
rename from tests/test_cli_interrupt_subagent.py
rename to tests/cli/test_cli_interrupt_subagent.py
diff --git a/tests/test_cli_loading_indicator.py b/tests/cli/test_cli_loading_indicator.py
similarity index 100%
rename from tests/test_cli_loading_indicator.py
rename to tests/cli/test_cli_loading_indicator.py
diff --git a/tests/test_cli_mcp_config_watch.py b/tests/cli/test_cli_mcp_config_watch.py
similarity index 100%
rename from tests/test_cli_mcp_config_watch.py
rename to tests/cli/test_cli_mcp_config_watch.py
diff --git a/tests/test_cli_new_session.py b/tests/cli/test_cli_new_session.py
similarity index 100%
rename from tests/test_cli_new_session.py
rename to tests/cli/test_cli_new_session.py
diff --git a/tests/test_cli_plan_command.py b/tests/cli/test_cli_plan_command.py
similarity index 100%
rename from tests/test_cli_plan_command.py
rename to tests/cli/test_cli_plan_command.py
diff --git a/tests/test_cli_prefix_matching.py b/tests/cli/test_cli_prefix_matching.py
similarity index 100%
rename from tests/test_cli_prefix_matching.py
rename to tests/cli/test_cli_prefix_matching.py
diff --git a/tests/test_cli_preloaded_skills.py b/tests/cli/test_cli_preloaded_skills.py
similarity index 100%
rename from tests/test_cli_preloaded_skills.py
rename to tests/cli/test_cli_preloaded_skills.py
diff --git a/tests/test_cli_provider_resolution.py b/tests/cli/test_cli_provider_resolution.py
similarity index 99%
rename from tests/test_cli_provider_resolution.py
rename to tests/cli/test_cli_provider_resolution.py
index bd78a98ea..353b3234e 100644
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/cli/test_cli_provider_resolution.py
@@ -330,7 +330,7 @@ def test_model_flow_nous_prints_subscription_guidance_without_mutating_explicit_
         "hermes_cli.auth.fetch_nous_models",
         lambda *args, **kwargs: ["claude-opus-4-6"],
     )
-    monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None: "claude-opus-4-6")
+    monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None, **kw: "claude-opus-4-6")
     monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: None)
     monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda provider, url: None)
     monkeypatch.setattr(
@@ -368,7 +368,7 @@ def test_model_flow_nous_applies_managed_tts_default_when_unconfigured(monkeypat
         "hermes_cli.auth.fetch_nous_models",
         lambda *args, **kwargs: ["claude-opus-4-6"],
     )
-    monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None: "claude-opus-4-6")
+    monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None, **kw: "claude-opus-4-6")
     monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: None)
     monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda provider, url: None)
     monkeypatch.setattr(
diff --git a/tests/test_cli_retry.py b/tests/cli/test_cli_retry.py
similarity index 96%
rename from tests/test_cli_retry.py
rename to tests/cli/test_cli_retry.py
index 74e2512bf..b287b4575 100644
--- a/tests/test_cli_retry.py
+++ b/tests/cli/test_cli_retry.py
@@ -1,6 +1,6 @@
 """Regression tests for CLI /retry history replacement semantics."""
 
-from tests.test_cli_init import _make_cli
+from tests.cli.test_cli_init import _make_cli
 
 
 def test_retry_last_truncates_history_before_requeueing_message():
diff --git a/tests/test_cli_save_config_value.py b/tests/cli/test_cli_save_config_value.py
similarity index 100%
rename from tests/test_cli_save_config_value.py
rename to tests/cli/test_cli_save_config_value.py
diff --git a/tests/test_cli_secret_capture.py b/tests/cli/test_cli_secret_capture.py
similarity index 100%
rename from tests/test_cli_secret_capture.py
rename to tests/cli/test_cli_secret_capture.py
diff --git a/tests/test_cli_skin_integration.py b/tests/cli/test_cli_skin_integration.py
similarity index 100%
rename from tests/test_cli_skin_integration.py
rename to tests/cli/test_cli_skin_integration.py
diff --git a/tests/test_cli_status_bar.py b/tests/cli/test_cli_status_bar.py
similarity index 100%
rename from tests/test_cli_status_bar.py
rename to tests/cli/test_cli_status_bar.py
diff --git a/tests/test_cli_tools_command.py b/tests/cli/test_cli_tools_command.py
similarity index 100%
rename from tests/test_cli_tools_command.py
rename to tests/cli/test_cli_tools_command.py
diff --git a/tests/test_personality_none.py b/tests/cli/test_personality_none.py
similarity index 100%
rename from tests/test_personality_none.py
rename to tests/cli/test_personality_none.py
diff --git a/tests/test_quick_commands.py b/tests/cli/test_quick_commands.py
similarity index 100%
rename from tests/test_quick_commands.py
rename to tests/cli/test_quick_commands.py
diff --git a/tests/test_reasoning_command.py b/tests/cli/test_reasoning_command.py
similarity index 100%
rename from tests/test_reasoning_command.py
rename to tests/cli/test_reasoning_command.py
diff --git a/tests/test_resume_display.py b/tests/cli/test_resume_display.py
similarity index 100%
rename from tests/test_resume_display.py
rename to tests/cli/test_resume_display.py
diff --git a/tests/test_surrogate_sanitization.py b/tests/cli/test_surrogate_sanitization.py
similarity index 100%
rename from tests/test_surrogate_sanitization.py
rename to tests/cli/test_surrogate_sanitization.py
diff --git a/tests/test_worktree.py b/tests/cli/test_worktree.py
similarity index 100%
rename from tests/test_worktree.py
rename to tests/cli/test_worktree.py
diff --git a/tests/test_worktree_security.py b/tests/cli/test_worktree_security.py
similarity index 100%
rename from tests/test_worktree_security.py
rename to tests/cli/test_worktree_security.py
diff --git a/tests/test_codex_execution_paths.py b/tests/cron/test_codex_execution_paths.py
similarity index 100%
rename from tests/test_codex_execution_paths.py
rename to tests/cron/test_codex_execution_paths.py
diff --git a/tests/test_file_permissions.py b/tests/cron/test_file_permissions.py
similarity index 100%
rename from tests/test_file_permissions.py
rename to tests/cron/test_file_permissions.py
diff --git a/tests/gateway/test_telegram_approval_buttons.py b/tests/gateway/test_telegram_approval_buttons.py
index 1b8249bc2..98d3cdc31 100644
--- a/tests/gateway/test_telegram_approval_buttons.py
+++ b/tests/gateway/test_telegram_approval_buttons.py
@@ -33,8 +33,15 @@ def _ensure_telegram_mock():
     mod.constants.ChatType.GROUP = "group"
     mod.constants.ChatType.SUPERGROUP = "supergroup"
     mod.constants.ChatType.CHANNEL = "channel"
-    for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request", "telegram.error"):
+    # Provide real exception classes so ``except (NetworkError, ...)`` in
+    # connect() doesn't blow up under xdist when this mock leaks.
+    mod.error.NetworkError = type("NetworkError", (OSError,), {})
+    mod.error.TimedOut = type("TimedOut", (OSError,), {})
+    mod.error.BadRequest = type("BadRequest", (Exception,), {})
+
+    for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
         sys.modules.setdefault(name, mod)
+    sys.modules.setdefault("telegram.error", mod.error)
 
 
 _ensure_telegram_mock()
diff --git a/tests/gateway/test_telegram_conflict.py b/tests/gateway/test_telegram_conflict.py
index 7a480d9fc..47a67f229 100644
--- a/tests/gateway/test_telegram_conflict.py
+++ b/tests/gateway/test_telegram_conflict.py
@@ -20,8 +20,16 @@ def _ensure_telegram_mock():
     telegram_mod.constants.ChatType.CHANNEL = "channel"
     telegram_mod.constants.ChatType.PRIVATE = "private"
 
+    # Provide real exception classes so ``except (NetworkError, ...)`` in
+    # connect() doesn't blow up with "catching classes that do not inherit
+    # from BaseException" when another xdist worker pollutes sys.modules.
+    telegram_mod.error.NetworkError = type("NetworkError", (OSError,), {})
+    telegram_mod.error.TimedOut = type("TimedOut", (OSError,), {})
+    telegram_mod.error.BadRequest = type("BadRequest", (Exception,), {})
+
     for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
         sys.modules.setdefault(name, telegram_mod)
+    sys.modules.setdefault("telegram.error", telegram_mod.error)
 
 
 _ensure_telegram_mock()
diff --git a/tests/test_anthropic_oauth_flow.py b/tests/hermes_cli/test_anthropic_oauth_flow.py
similarity index 100%
rename from tests/test_anthropic_oauth_flow.py
rename to tests/hermes_cli/test_anthropic_oauth_flow.py
diff --git a/tests/test_anthropic_provider_persistence.py b/tests/hermes_cli/test_anthropic_provider_persistence.py
similarity index 100%
rename from tests/test_anthropic_provider_persistence.py
rename to tests/hermes_cli/test_anthropic_provider_persistence.py
diff --git a/tests/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py
similarity index 100%
rename from tests/test_api_key_providers.py
rename to tests/hermes_cli/test_api_key_providers.py
diff --git a/tests/test_atomic_json_write.py b/tests/hermes_cli/test_atomic_json_write.py
similarity index 100%
rename from tests/test_atomic_json_write.py
rename to tests/hermes_cli/test_atomic_json_write.py
diff --git a/tests/test_atomic_yaml_write.py b/tests/hermes_cli/test_atomic_yaml_write.py
similarity index 100%
rename from tests/test_atomic_yaml_write.py
rename to tests/hermes_cli/test_atomic_yaml_write.py
diff --git a/tests/test_auth_codex_provider.py b/tests/hermes_cli/test_auth_codex_provider.py
similarity index 100%
rename from tests/test_auth_codex_provider.py
rename to tests/hermes_cli/test_auth_codex_provider.py
diff --git a/tests/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py
similarity index 100%
rename from tests/test_auth_commands.py
rename to tests/hermes_cli/test_auth_commands.py
diff --git a/tests/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py
similarity index 100%
rename from tests/test_auth_nous_provider.py
rename to tests/hermes_cli/test_auth_nous_provider.py
diff --git a/tests/test_codex_models.py b/tests/hermes_cli/test_codex_models.py
similarity index 100%
rename from tests/test_codex_models.py
rename to tests/hermes_cli/test_codex_models.py
diff --git a/tests/test_config_env_expansion.py b/tests/hermes_cli/test_config_env_expansion.py
similarity index 100%
rename from tests/test_config_env_expansion.py
rename to tests/hermes_cli/test_config_env_expansion.py
diff --git a/tests/test_external_credential_detection.py b/tests/hermes_cli/test_external_credential_detection.py
similarity index 100%
rename from tests/test_external_credential_detection.py
rename to tests/hermes_cli/test_external_credential_detection.py
diff --git a/tests/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py
similarity index 100%
rename from tests/test_gemini_provider.py
rename to tests/hermes_cli/test_gemini_provider.py
diff --git a/tests/test_model_normalize.py b/tests/hermes_cli/test_model_normalize.py
similarity index 100%
rename from tests/test_model_normalize.py
rename to tests/hermes_cli/test_model_normalize.py
diff --git a/tests/test_model_provider_persistence.py b/tests/hermes_cli/test_model_provider_persistence.py
similarity index 100%
rename from tests/test_model_provider_persistence.py
rename to tests/hermes_cli/test_model_provider_persistence.py
diff --git a/tests/test_ollama_cloud_auth.py b/tests/hermes_cli/test_ollama_cloud_auth.py
similarity index 100%
rename from tests/test_ollama_cloud_auth.py
rename to tests/hermes_cli/test_ollama_cloud_auth.py
diff --git a/tests/test_plugin_cli_registration.py b/tests/hermes_cli/test_plugin_cli_registration.py
similarity index 100%
rename from tests/test_plugin_cli_registration.py
rename to tests/hermes_cli/test_plugin_cli_registration.py
diff --git a/tests/test_plugins.py b/tests/hermes_cli/test_plugins.py
similarity index 100%
rename from tests/test_plugins.py
rename to tests/hermes_cli/test_plugins.py
diff --git a/tests/test_plugins_cmd.py b/tests/hermes_cli/test_plugins_cmd.py
similarity index 100%
rename from tests/test_plugins_cmd.py
rename to tests/hermes_cli/test_plugins_cmd.py
diff --git a/tests/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py
similarity index 100%
rename from tests/test_runtime_provider_resolution.py
rename to tests/hermes_cli/test_runtime_provider_resolution.py
diff --git a/tests/test_setup_model_selection.py b/tests/hermes_cli/test_setup_model_selection.py
similarity index 100%
rename from tests/test_setup_model_selection.py
rename to tests/hermes_cli/test_setup_model_selection.py
diff --git a/tests/hermes_cli/test_update_check.py b/tests/hermes_cli/test_update_check.py
index b7d6de6ff..368bb1b07 100644
--- a/tests/hermes_cli/test_update_check.py
+++ b/tests/hermes_cli/test_update_check.py
@@ -15,7 +15,7 @@ def test_version_string_no_v_prefix():
     assert not __version__.startswith("v"), f"__version__ should not start with 'v', got {__version__!r}"
 
 
-def test_check_for_updates_uses_cache(tmp_path):
+def test_check_for_updates_uses_cache(tmp_path, monkeypatch):
     """When cache is fresh, check_for_updates should return cached value without calling git."""
     from hermes_cli.banner import check_for_updates
 
@@ -27,15 +27,15 @@ def test_check_for_updates_uses_cache(tmp_path):
     cache_file = tmp_path / ".update_check"
     cache_file.write_text(json.dumps({"ts": time.time(), "behind": 3}))
 
-    with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)):
-        with patch("hermes_cli.banner.subprocess.run") as mock_run:
-            result = check_for_updates()
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    with patch("hermes_cli.banner.subprocess.run") as mock_run:
+        result = check_for_updates()
 
     assert result == 3
     mock_run.assert_not_called()
 
 
-def test_check_for_updates_expired_cache(tmp_path):
+def test_check_for_updates_expired_cache(tmp_path, monkeypatch):
     """When cache is expired, check_for_updates should call git fetch."""
     from hermes_cli.banner import check_for_updates
 
@@ -49,15 +49,15 @@ def test_check_for_updates_expired_cache(tmp_path):
 
     mock_result = MagicMock(returncode=0, stdout="5\n")
 
-    with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)):
-        with patch("hermes_cli.banner.subprocess.run", return_value=mock_result) as mock_run:
-            result = check_for_updates()
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    with patch("hermes_cli.banner.subprocess.run", return_value=mock_result) as mock_run:
+        result = check_for_updates()
 
     assert result == 5
     assert mock_run.call_count == 2  # git fetch + git rev-list
 
 
-def test_check_for_updates_no_git_dir(tmp_path):
+def test_check_for_updates_no_git_dir(tmp_path, monkeypatch):
     """Returns None when .git directory doesn't exist anywhere."""
     import hermes_cli.banner as banner
 
@@ -66,19 +66,15 @@ def test_check_for_updates_no_git_dir(tmp_path):
     fake_banner.parent.mkdir(parents=True, exist_ok=True)
     fake_banner.touch()
 
-    original = banner.__file__
-    try:
-        banner.__file__ = str(fake_banner)
-        with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)):
-            with patch("hermes_cli.banner.subprocess.run") as mock_run:
-                result = banner.check_for_updates()
-        assert result is None
-        mock_run.assert_not_called()
-    finally:
-        banner.__file__ = original
+    monkeypatch.setattr(banner, "__file__", str(fake_banner))
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    with patch("hermes_cli.banner.subprocess.run") as mock_run:
+        result = banner.check_for_updates()
+    assert result is None
+    mock_run.assert_not_called()
 
 
-def test_check_for_updates_fallback_to_project_root():
+def test_check_for_updates_fallback_to_project_root(tmp_path, monkeypatch):
     """Dev install: falls back to Path(__file__).parent.parent when HERMES_HOME has no git repo."""
     import hermes_cli.banner as banner
 
@@ -87,14 +83,12 @@ def test_check_for_updates_fallback_to_project_root():
         pytest.skip("Not running from a git checkout")
 
     # Point HERMES_HOME at a temp dir with no hermes-agent/.git
-    import tempfile
-    with tempfile.TemporaryDirectory() as td:
-        with patch("hermes_cli.banner.os.getenv", return_value=td):
-            with patch("hermes_cli.banner.subprocess.run") as mock_run:
-                mock_run.return_value = MagicMock(returncode=0, stdout="0\n")
-                result = banner.check_for_updates()
-        # Should have fallen back to project root and run git commands
-        assert mock_run.call_count >= 1
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    with patch("hermes_cli.banner.subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="0\n")
+        result = banner.check_for_updates()
+    # Should have fallen back to project root and run git commands
+    assert mock_run.call_count >= 1
 
 
 def test_prefetch_non_blocking():
diff --git a/tests/run_agent/__init__.py b/tests/run_agent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_1630_context_overflow_loop.py b/tests/run_agent/test_1630_context_overflow_loop.py
similarity index 100%
rename from tests/test_1630_context_overflow_loop.py
rename to tests/run_agent/test_1630_context_overflow_loop.py
diff --git a/tests/test_413_compression.py b/tests/run_agent/test_413_compression.py
similarity index 100%
rename from tests/test_413_compression.py
rename to tests/run_agent/test_413_compression.py
diff --git a/tests/test_860_dedup.py b/tests/run_agent/test_860_dedup.py
similarity index 100%
rename from tests/test_860_dedup.py
rename to tests/run_agent/test_860_dedup.py
diff --git a/tests/test_agent_guardrails.py b/tests/run_agent/test_agent_guardrails.py
similarity index 100%
rename from tests/test_agent_guardrails.py
rename to tests/run_agent/test_agent_guardrails.py
diff --git a/tests/test_agent_loop.py b/tests/run_agent/test_agent_loop.py
similarity index 99%
rename from tests/test_agent_loop.py
rename to tests/run_agent/test_agent_loop.py
index b95ff7808..bd9e41b91 100644
--- a/tests/test_agent_loop.py
+++ b/tests/run_agent/test_agent_loop.py
@@ -16,7 +16,7 @@ from unittest.mock import MagicMock
 import pytest
 
 # Ensure repo root is importable
-sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
 
 try:
     from environments.agent_loop import (
diff --git a/tests/test_agent_loop_tool_calling.py b/tests/run_agent/test_agent_loop_tool_calling.py
similarity index 99%
rename from tests/test_agent_loop_tool_calling.py
rename to tests/run_agent/test_agent_loop_tool_calling.py
index 74e67c0be..3b8d6ac59 100644
--- a/tests/test_agent_loop_tool_calling.py
+++ b/tests/run_agent/test_agent_loop_tool_calling.py
@@ -31,7 +31,7 @@ import pytest
 # pytestmark removed — tests skip gracefully via OPENROUTER_API_KEY check on line 59
 
 # Ensure repo root is importable
-_repo_root = Path(__file__).resolve().parent.parent
+_repo_root = Path(__file__).resolve().parent.parent.parent
 if str(_repo_root) not in sys.path:
     sys.path.insert(0, str(_repo_root))
 
diff --git a/tests/test_agent_loop_vllm.py b/tests/run_agent/test_agent_loop_vllm.py
similarity index 99%
rename from tests/test_agent_loop_vllm.py
rename to tests/run_agent/test_agent_loop_vllm.py
index d47478ecb..d42849094 100644
--- a/tests/test_agent_loop_vllm.py
+++ b/tests/run_agent/test_agent_loop_vllm.py
@@ -30,7 +30,7 @@ import pytest
 import requests
 
 # Ensure repo root is importable
-_repo_root = Path(__file__).resolve().parent.parent
+_repo_root = Path(__file__).resolve().parent.parent.parent
 if str(_repo_root) not in sys.path:
     sys.path.insert(0, str(_repo_root))
 
diff --git a/tests/test_anthropic_error_handling.py b/tests/run_agent/test_anthropic_error_handling.py
similarity index 100%
rename from tests/test_anthropic_error_handling.py
rename to tests/run_agent/test_anthropic_error_handling.py
diff --git a/tests/test_async_httpx_del_neuter.py b/tests/run_agent/test_async_httpx_del_neuter.py
similarity index 100%
rename from tests/test_async_httpx_del_neuter.py
rename to tests/run_agent/test_async_httpx_del_neuter.py
diff --git a/tests/test_compression_boundary.py b/tests/run_agent/test_compression_boundary.py
similarity index 100%
rename from tests/test_compression_boundary.py
rename to tests/run_agent/test_compression_boundary.py
diff --git a/tests/test_compression_persistence.py b/tests/run_agent/test_compression_persistence.py
similarity index 100%
rename from tests/test_compression_persistence.py
rename to tests/run_agent/test_compression_persistence.py
diff --git a/tests/test_compressor_fallback_update.py b/tests/run_agent/test_compressor_fallback_update.py
similarity index 100%
rename from tests/test_compressor_fallback_update.py
rename to tests/run_agent/test_compressor_fallback_update.py
diff --git a/tests/test_context_pressure.py b/tests/run_agent/test_context_pressure.py
similarity index 100%
rename from tests/test_context_pressure.py
rename to tests/run_agent/test_context_pressure.py
diff --git a/tests/test_context_token_tracking.py b/tests/run_agent/test_context_token_tracking.py
similarity index 100%
rename from tests/test_context_token_tracking.py
rename to tests/run_agent/test_context_token_tracking.py
diff --git a/tests/test_dict_tool_call_args.py b/tests/run_agent/test_dict_tool_call_args.py
similarity index 100%
rename from tests/test_dict_tool_call_args.py
rename to tests/run_agent/test_dict_tool_call_args.py
diff --git a/tests/test_exit_cleanup_interrupt.py b/tests/run_agent/test_exit_cleanup_interrupt.py
similarity index 100%
rename from tests/test_exit_cleanup_interrupt.py
rename to tests/run_agent/test_exit_cleanup_interrupt.py
diff --git a/tests/test_fallback_model.py b/tests/run_agent/test_fallback_model.py
similarity index 100%
rename from tests/test_fallback_model.py
rename to tests/run_agent/test_fallback_model.py
diff --git a/tests/test_flush_memories_codex.py b/tests/run_agent/test_flush_memories_codex.py
similarity index 100%
rename from tests/test_flush_memories_codex.py
rename to tests/run_agent/test_flush_memories_codex.py
diff --git a/tests/test_interactive_interrupt.py b/tests/run_agent/test_interactive_interrupt.py
similarity index 98%
rename from tests/test_interactive_interrupt.py
rename to tests/run_agent/test_interactive_interrupt.py
index 8c0d328c2..762621f22 100644
--- a/tests/test_interactive_interrupt.py
+++ b/tests/run_agent/test_interactive_interrupt.py
@@ -23,7 +23,7 @@ logging.basicConfig(level=logging.DEBUG, stream=sys.stderr,
                     format="%(asctime)s [%(threadName)s] %(message)s")
 log = logging.getLogger("interrupt_test")
 
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 
 from unittest.mock import MagicMock, patch
 from run_agent import AIAgent, IterationBudget
diff --git a/tests/test_interrupt_propagation.py b/tests/run_agent/test_interrupt_propagation.py
similarity index 100%
rename from tests/test_interrupt_propagation.py
rename to tests/run_agent/test_interrupt_propagation.py
diff --git a/tests/test_large_tool_result.py b/tests/run_agent/test_large_tool_result.py
similarity index 100%
rename from tests/test_large_tool_result.py
rename to tests/run_agent/test_large_tool_result.py
diff --git a/tests/test_long_context_tier_429.py b/tests/run_agent/test_long_context_tier_429.py
similarity index 100%
rename from tests/test_long_context_tier_429.py
rename to tests/run_agent/test_long_context_tier_429.py
diff --git a/tests/test_openai_client_lifecycle.py b/tests/run_agent/test_openai_client_lifecycle.py
similarity index 100%
rename from tests/test_openai_client_lifecycle.py
rename to tests/run_agent/test_openai_client_lifecycle.py
diff --git a/tests/test_percentage_clamp.py b/tests/run_agent/test_percentage_clamp.py
similarity index 98%
rename from tests/test_percentage_clamp.py
rename to tests/run_agent/test_percentage_clamp.py
index 67d119149..fcf1e39e5 100644
--- a/tests/test_percentage_clamp.py
+++ b/tests/run_agent/test_percentage_clamp.py
@@ -122,7 +122,7 @@ class TestSourceLinesAreClamped:
     @staticmethod
     def _read_file(rel_path: str) -> str:
         import os
-        base = os.path.dirname(os.path.dirname(__file__))
+        base = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
         with open(os.path.join(base, rel_path)) as f:
             return f.read()
 
diff --git a/tests/test_primary_runtime_restore.py b/tests/run_agent/test_primary_runtime_restore.py
similarity index 100%
rename from tests/test_primary_runtime_restore.py
rename to tests/run_agent/test_primary_runtime_restore.py
diff --git a/tests/test_provider_fallback.py b/tests/run_agent/test_provider_fallback.py
similarity index 100%
rename from tests/test_provider_fallback.py
rename to tests/run_agent/test_provider_fallback.py
diff --git a/tests/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
similarity index 100%
rename from tests/test_provider_parity.py
rename to tests/run_agent/test_provider_parity.py
diff --git a/tests/test_real_interrupt_subagent.py b/tests/run_agent/test_real_interrupt_subagent.py
similarity index 100%
rename from tests/test_real_interrupt_subagent.py
rename to tests/run_agent/test_real_interrupt_subagent.py
diff --git a/tests/test_redirect_stdout_issue.py b/tests/run_agent/test_redirect_stdout_issue.py
similarity index 100%
rename from tests/test_redirect_stdout_issue.py
rename to tests/run_agent/test_redirect_stdout_issue.py
diff --git a/tests/test_run_agent.py b/tests/run_agent/test_run_agent.py
similarity index 100%
rename from tests/test_run_agent.py
rename to tests/run_agent/test_run_agent.py
diff --git a/tests/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
similarity index 100%
rename from tests/test_run_agent_codex_responses.py
rename to tests/run_agent/test_run_agent_codex_responses.py
diff --git a/tests/test_session_meta_filtering.py b/tests/run_agent/test_session_meta_filtering.py
similarity index 100%
rename from tests/test_session_meta_filtering.py
rename to tests/run_agent/test_session_meta_filtering.py
diff --git a/tests/test_session_reset_fix.py b/tests/run_agent/test_session_reset_fix.py
similarity index 98%
rename from tests/test_session_reset_fix.py
rename to tests/run_agent/test_session_reset_fix.py
index ee65ed90d..1fd1223ce 100644
--- a/tests/test_session_reset_fix.py
+++ b/tests/run_agent/test_session_reset_fix.py
@@ -13,7 +13,7 @@ from pathlib import Path
 import pytest
 
 # Ensure repo root is importable
-sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
 
 # Stub out optional heavy dependencies not installed in the test environment
 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
diff --git a/tests/test_streaming.py b/tests/run_agent/test_streaming.py
similarity index 100%
rename from tests/test_streaming.py
rename to tests/run_agent/test_streaming.py
diff --git a/tests/test_strict_api_validation.py b/tests/run_agent/test_strict_api_validation.py
similarity index 100%
rename from tests/test_strict_api_validation.py
rename to tests/run_agent/test_strict_api_validation.py
diff --git a/tests/test_token_persistence_non_cli.py b/tests/run_agent/test_token_persistence_non_cli.py
similarity index 100%
rename from tests/test_token_persistence_non_cli.py
rename to tests/run_agent/test_token_persistence_non_cli.py
diff --git a/tests/test_tool_arg_coercion.py b/tests/run_agent/test_tool_arg_coercion.py
similarity index 100%
rename from tests/test_tool_arg_coercion.py
rename to tests/run_agent/test_tool_arg_coercion.py
diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py
index 085ffad29..5ac3fd872 100644
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@@ -18,10 +18,18 @@ import pytest
 import json
 import os
 
-# Force local terminal backend for ALL tests in this file.
-# Under xdist, another test may leak TERMINAL_ENV=modal/docker, sending
-# execute_code down the remote path → modal.exception.AuthError.
 os.environ["TERMINAL_ENV"] = "local"
+
+
+@pytest.fixture(autouse=True)
+def _force_local_terminal(monkeypatch):
+    """Re-set TERMINAL_ENV=local before every test.
+
+    The module-level assignment above covers import time, but under xdist
+    another worker can overwrite os.environ between tests.  monkeypatch
+    ensures each test starts (and ends) with the correct value.
+    """
+    monkeypatch.setenv("TERMINAL_ENV", "local")
 import sys
 import time
 import threading
diff --git a/tests/tools/test_managed_browserbase_and_modal.py b/tests/tools/test_managed_browserbase_and_modal.py
index d07dcb367..5ae24f01a 100644
--- a/tests/tools/test_managed_browserbase_and_modal.py
+++ b/tests/tools/test_managed_browserbase_and_modal.py
@@ -91,7 +91,11 @@ def _install_fake_tools_package():
         def register(self, **kwargs):
             return None
 
-    sys.modules["tools.registry"] = types.SimpleNamespace(registry=_Registry())
+    from tools.registry import tool_error
+
+    sys.modules["tools.registry"] = types.SimpleNamespace(
+        registry=_Registry(), tool_error=tool_error,
+    )
 
     class _DummyEnvironment:
         def __init__(self, *args, **kwargs):
diff --git a/tests/test_managed_server_tool_support.py b/tests/tools/test_managed_server_tool_support.py
similarity index 100%
rename from tests/test_managed_server_tool_support.py
rename to tests/tools/test_managed_server_tool_support.py
diff --git a/tests/tools/test_modal_sandbox_fixes.py b/tests/tools/test_modal_sandbox_fixes.py
index 7e3feb5cf..e1baf13d9 100644
--- a/tests/tools/test_modal_sandbox_fixes.py
+++ b/tests/tools/test_modal_sandbox_fixes.py
@@ -12,8 +12,6 @@ Covers the bugs discovered while setting up TBLite evaluation:
 import os
 import sys
 from pathlib import Path
-from unittest.mock import patch, MagicMock
-
 import pytest
 
 # Ensure repo root is importable
@@ -64,89 +62,72 @@ class TestToolResolution:
 class TestCwdHandling:
     """Verify host paths are sanitized for container backends."""
 
-    def test_home_path_replaced_for_modal(self):
+    def test_home_path_replaced_for_modal(self, monkeypatch):
         """TERMINAL_CWD=/home/user/... should be replaced with /root for modal."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "modal",
-            "TERMINAL_CWD": "/home/dakota/github/hermes-agent",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/root", (
-                f"Expected /root, got {config['cwd']}. "
-                "/home/ paths should be replaced for modal backend."
-            )
+        monkeypatch.setenv("TERMINAL_ENV", "modal")
+        monkeypatch.setenv("TERMINAL_CWD", "/home/dakota/github/hermes-agent")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/root", (
+            f"Expected /root, got {config['cwd']}. "
+            "/home/ paths should be replaced for modal backend."
+        )
 
-    def test_users_path_replaced_for_docker_by_default(self):
+    def test_users_path_replaced_for_docker_by_default(self, monkeypatch):
         """Docker should keep host paths out of the sandbox unless explicitly enabled."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "docker",
-            "TERMINAL_CWD": "/Users/someone/projects",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/root", (
-                f"Expected /root, got {config['cwd']}. "
-                "Host paths should be discarded for docker backend by default."
-            )
-            assert config["host_cwd"] is None
-            assert config["docker_mount_cwd_to_workspace"] is False
+        monkeypatch.setenv("TERMINAL_ENV", "docker")
+        monkeypatch.setenv("TERMINAL_CWD", "/Users/someone/projects")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/root", (
+            f"Expected /root, got {config['cwd']}. "
+            "Host paths should be discarded for docker backend by default."
+        )
+        assert config["host_cwd"] is None
+        assert config["docker_mount_cwd_to_workspace"] is False
 
-    def test_users_path_maps_to_workspace_for_docker_when_enabled(self):
+    def test_users_path_maps_to_workspace_for_docker_when_enabled(self, monkeypatch):
         """Docker should map the host cwd into /workspace only when explicitly enabled."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "docker",
-            "TERMINAL_CWD": "/Users/someone/projects",
-            "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/workspace"
-            assert config["host_cwd"] == "/Users/someone/projects"
-            assert config["docker_mount_cwd_to_workspace"] is True
+        monkeypatch.setenv("TERMINAL_ENV", "docker")
+        monkeypatch.setenv("TERMINAL_CWD", "/Users/someone/projects")
+        monkeypatch.setenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "true")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/workspace"
+        assert config["host_cwd"] == "/Users/someone/projects"
+        assert config["docker_mount_cwd_to_workspace"] is True
 
-    def test_windows_path_replaced_for_modal(self):
+    def test_windows_path_replaced_for_modal(self, monkeypatch):
         """TERMINAL_CWD=C:\\Users\\... should be replaced for modal."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "modal",
-            "TERMINAL_CWD": "C:\\Users\\someone\\projects",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/root"
+        monkeypatch.setenv("TERMINAL_ENV", "modal")
+        monkeypatch.setenv("TERMINAL_CWD", "C:\\Users\\someone\\projects")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/root"
 
-    def test_default_cwd_is_root_for_container_backends(self):
+    @pytest.mark.parametrize("backend", ["modal", "docker", "singularity", "daytona"])
+    def test_default_cwd_is_root_for_container_backends(self, backend, monkeypatch):
         """Container backends should default to /root, not ~."""
-        for backend in ("modal", "docker", "singularity", "daytona"):
-            with patch.dict(os.environ, {"TERMINAL_ENV": backend}, clear=False):
-                # Remove TERMINAL_CWD so it uses default
-                env = os.environ.copy()
-                env.pop("TERMINAL_CWD", None)
-                env.pop("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", None)
-                with patch.dict(os.environ, env, clear=True):
-                    config = _tt_mod._get_env_config()
-                    assert config["cwd"] == "/root", (
-                        f"Backend {backend}: expected /root default, got {config['cwd']}"
-                    )
+        monkeypatch.setenv("TERMINAL_ENV", backend)
+        monkeypatch.delenv("TERMINAL_CWD", raising=False)
+        monkeypatch.delenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", raising=False)
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/root", (
+            f"Backend {backend}: expected /root default, got {config['cwd']}"
+        )
 
-    def test_docker_default_cwd_maps_current_directory_when_enabled(self):
+    def test_docker_default_cwd_maps_current_directory_when_enabled(self, monkeypatch):
         """Docker should use /workspace when cwd mounting is explicitly enabled."""
-        with patch("tools.terminal_tool.os.getcwd", return_value="/home/user/project"):
-            with patch.dict(os.environ, {
-                "TERMINAL_ENV": "docker",
-                "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true",
-            }, clear=False):
-                env = os.environ.copy()
-                env.pop("TERMINAL_CWD", None)
-                with patch.dict(os.environ, env, clear=True):
-                    config = _tt_mod._get_env_config()
-                    assert config["cwd"] == "/workspace"
-                    assert config["host_cwd"] == "/home/user/project"
+        monkeypatch.setattr("tools.terminal_tool.os.getcwd", lambda: "/home/user/project")
+        monkeypatch.setenv("TERMINAL_ENV", "docker")
+        monkeypatch.setenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "true")
+        monkeypatch.delenv("TERMINAL_CWD", raising=False)
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/workspace"
+        assert config["host_cwd"] == "/home/user/project"
 
-    def test_local_backend_uses_getcwd(self):
+    def test_local_backend_uses_getcwd(self, monkeypatch):
         """Local backend should use os.getcwd(), not /root."""
-        with patch.dict(os.environ, {"TERMINAL_ENV": "local"}, clear=False):
-            env = os.environ.copy()
-            env.pop("TERMINAL_CWD", None)
-            with patch.dict(os.environ, env, clear=True):
-                config = _tt_mod._get_env_config()
-                assert config["cwd"] == os.getcwd()
+        monkeypatch.setenv("TERMINAL_ENV", "local")
+        monkeypatch.delenv("TERMINAL_CWD", raising=False)
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == os.getcwd()
 
     def test_create_environment_passes_docker_host_cwd_and_flag(self, monkeypatch):
         """Docker host cwd and mount flag should reach DockerEnvironment."""
@@ -173,18 +154,16 @@ class TestCwdHandling:
         assert captured["host_cwd"] == "/home/user/project"
         assert captured["auto_mount_cwd"] is True
 
-    def test_ssh_preserves_home_paths(self):
+    def test_ssh_preserves_home_paths(self, monkeypatch):
         """SSH backend should NOT replace /home/ paths (they're valid remotely)."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "ssh",
-            "TERMINAL_CWD": "/home/remote-user/work",
-            "TERMINAL_SSH_HOST": "example.com",
-            "TERMINAL_SSH_USER": "user",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/home/remote-user/work", (
-                "SSH backend should preserve /home/ paths"
-            )
+        monkeypatch.setenv("TERMINAL_ENV", "ssh")
+        monkeypatch.setenv("TERMINAL_CWD", "/home/remote-user/work")
+        monkeypatch.setenv("TERMINAL_SSH_HOST", "example.com")
+        monkeypatch.setenv("TERMINAL_SSH_USER", "user")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/home/remote-user/work", (
+            "SSH backend should preserve /home/ paths"
+        )
 
 
 # =========================================================================
@@ -194,12 +173,8 @@ class TestCwdHandling:
 class TestEphemeralDiskCheck:
     """Verify ephemeral_disk is only passed when modal supports it."""
 
-    def test_ephemeral_disk_skipped_when_unsupported(self):
+    def test_ephemeral_disk_skipped_when_unsupported(self, monkeypatch):
         """If modal.Sandbox.create doesn't have ephemeral_disk param, skip it."""
-        # Mock the modal import and Sandbox.create signature
-        mock_modal = MagicMock()
-        mock_sandbox_create = MagicMock()
-        # Simulate a signature WITHOUT ephemeral_disk
         import inspect
         mock_params = {
             "args": inspect.Parameter("args", inspect.Parameter.VAR_POSITIONAL),
@@ -208,26 +183,25 @@ class TestEphemeralDiskCheck:
             "cpu": inspect.Parameter("cpu", inspect.Parameter.KEYWORD_ONLY),
             "memory": inspect.Parameter("memory", inspect.Parameter.KEYWORD_ONLY),
         }
-        mock_sig = inspect.Signature(parameters=list(mock_params.values()))
 
-        with patch.dict(os.environ, {"TERMINAL_ENV": "modal"}):
-            config = _tt_mod._get_env_config()
-            # The config has container_disk default of 51200
-            disk = config.get("container_disk", 51200)
-            assert disk > 0, "disk should default to > 0"
+        monkeypatch.setenv("TERMINAL_ENV", "modal")
+        config = _tt_mod._get_env_config()
+        # The config has container_disk default of 51200
+        disk = config.get("container_disk", 51200)
+        assert disk > 0, "disk should default to > 0"
 
-            # Simulate the version check logic from terminal_tool.py
-            sandbox_kwargs = {}
-            if disk > 0:
-                try:
-                    if "ephemeral_disk" in mock_params:
-                        sandbox_kwargs["ephemeral_disk"] = disk
-                except Exception:
-                    pass
+        # Simulate the version check logic from terminal_tool.py
+        sandbox_kwargs = {}
+        if disk > 0:
+            try:
+                if "ephemeral_disk" in mock_params:
+                    sandbox_kwargs["ephemeral_disk"] = disk
+            except Exception:
+                pass
 
-            assert "ephemeral_disk" not in sandbox_kwargs, (
-                "ephemeral_disk should not be set when Sandbox.create doesn't support it"
-            )
+        assert "ephemeral_disk" not in sandbox_kwargs, (
+            "ephemeral_disk should not be set when Sandbox.create doesn't support it"
+        )
 
 
 # =========================================================================
diff --git a/tests/test_tool_call_parsers.py b/tests/tools/test_tool_call_parsers.py
similarity index 100%
rename from tests/test_tool_call_parsers.py
rename to tests/tools/test_tool_call_parsers.py
diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py
index 08f26f509..226e99b56 100644
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@@ -34,6 +34,7 @@ import requests
 
 from hermes_cli.config import load_config
 from tools.browser_camofox_state import get_camofox_identity
+from tools.registry import tool_error
 
 logger = logging.getLogger(__name__)
 
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index 2ed8ba210..1ff42e77b 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -1317,6 +1317,8 @@ def _make_read_resource_handler(server_name: str, tool_timeout: float):
     """Return a sync handler that reads a resource by URI from an MCP server."""
 
     def _handler(args: dict, **kwargs) -> str:
+        from tools.registry import tool_error
+
         with _lock:
             server = _servers.get(server_name)
         if not server or not server.session:
@@ -1406,6 +1408,8 @@ def _make_get_prompt_handler(server_name: str, tool_timeout: float):
     """Return a sync handler that gets a prompt by name from an MCP server."""
 
     def _handler(args: dict, **kwargs) -> str:
+        from tools.registry import tool_error
+
         with _lock:
             server = _servers.get(server_name)
         if not server or not server.session: