diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a54be8b17..1e45193b8 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -19,6 +19,9 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v4
 
+      - name: Install system dependencies
+        run: sudo apt-get update && sudo apt-get install -y ripgrep
+
       - name: Install uv
         uses: astral-sh/setup-uv@v5
 
diff --git a/tests/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
similarity index 100%
rename from tests/test_anthropic_adapter.py
rename to tests/agent/test_anthropic_adapter.py
diff --git a/tests/test_auxiliary_config_bridge.py b/tests/agent/test_auxiliary_config_bridge.py
similarity index 98%
rename from tests/test_auxiliary_config_bridge.py
rename to tests/agent/test_auxiliary_config_bridge.py
index 0151daf2a..91dea15af 100644
--- a/tests/test_auxiliary_config_bridge.py
+++ b/tests/agent/test_auxiliary_config_bridge.py
@@ -13,7 +13,7 @@ from unittest.mock import patch, MagicMock
 import pytest
 import yaml
 
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 
 
 def _run_auxiliary_bridge(config_dict, monkeypatch):
@@ -199,7 +199,7 @@ class TestGatewayBridgeCodeParity:
 
     def test_gateway_has_auxiliary_bridge(self):
         """The gateway config bridge must include auxiliary.* bridging."""
-        gateway_path = Path(__file__).parent.parent / "gateway" / "run.py"
+        gateway_path = Path(__file__).parent.parent.parent / "gateway" / "run.py"
         content = gateway_path.read_text()
         # Check for key patterns that indicate the bridge is present
         assert "AUXILIARY_VISION_PROVIDER" in content
@@ -213,7 +213,7 @@ class TestGatewayBridgeCodeParity:
 
     def test_gateway_no_compression_env_bridge(self):
         """Gateway should NOT bridge compression config to env vars (config-only)."""
-        gateway_path = Path(__file__).parent.parent / "gateway" / "run.py"
+        gateway_path = Path(__file__).parent.parent.parent / "gateway" / "run.py"
         content = gateway_path.read_text()
         assert "CONTEXT_COMPRESSION_PROVIDER" not in content
         assert "CONTEXT_COMPRESSION_MODEL" not in content
diff --git a/tests/test_context_references.py b/tests/agent/test_context_references.py
similarity index 100%
rename from tests/test_context_references.py
rename to tests/agent/test_context_references.py
diff --git a/tests/test_credential_pool.py b/tests/agent/test_credential_pool.py
similarity index 100%
rename from tests/test_credential_pool.py
rename to tests/agent/test_credential_pool.py
diff --git a/tests/test_credential_pool_routing.py b/tests/agent/test_credential_pool_routing.py
similarity index 100%
rename from tests/test_credential_pool_routing.py
rename to tests/agent/test_credential_pool_routing.py
diff --git a/tests/test_crossloop_client_cache.py b/tests/agent/test_crossloop_client_cache.py
similarity index 100%
rename from tests/test_crossloop_client_cache.py
rename to tests/agent/test_crossloop_client_cache.py
diff --git a/tests/test_display.py b/tests/agent/test_display.py
similarity index 100%
rename from tests/test_display.py
rename to tests/agent/test_display.py
diff --git a/tests/test_insights.py b/tests/agent/test_insights.py
similarity index 100%
rename from tests/test_insights.py
rename to tests/agent/test_insights.py
diff --git a/tests/test_model_metadata_local_ctx.py b/tests/agent/test_model_metadata_local_ctx.py
similarity index 100%
rename from tests/test_model_metadata_local_ctx.py
rename to tests/agent/test_model_metadata_local_ctx.py
diff --git a/tests/cli/__init__.py b/tests/cli/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_branch_command.py b/tests/cli/test_branch_command.py
similarity index 100%
rename from tests/test_branch_command.py
rename to tests/cli/test_branch_command.py
diff --git a/tests/test_cli_approval_ui.py b/tests/cli/test_cli_approval_ui.py
similarity index 100%
rename from tests/test_cli_approval_ui.py
rename to tests/cli/test_cli_approval_ui.py
diff --git a/tests/test_cli_background_tui_refresh.py b/tests/cli/test_cli_background_tui_refresh.py
similarity index 100%
rename from tests/test_cli_background_tui_refresh.py
rename to tests/cli/test_cli_background_tui_refresh.py
diff --git a/tests/test_cli_browser_connect.py b/tests/cli/test_cli_browser_connect.py
similarity index 100%
rename from tests/test_cli_browser_connect.py
rename to tests/cli/test_cli_browser_connect.py
diff --git a/tests/test_cli_context_warning.py b/tests/cli/test_cli_context_warning.py
similarity index 100%
rename from tests/test_cli_context_warning.py
rename to tests/cli/test_cli_context_warning.py
diff --git a/tests/test_cli_extension_hooks.py b/tests/cli/test_cli_extension_hooks.py
similarity index 100%
rename from tests/test_cli_extension_hooks.py
rename to tests/cli/test_cli_extension_hooks.py
diff --git a/tests/test_cli_file_drop.py b/tests/cli/test_cli_file_drop.py
similarity index 100%
rename from tests/test_cli_file_drop.py
rename to tests/cli/test_cli_file_drop.py
diff --git a/tests/test_cli_init.py b/tests/cli/test_cli_init.py
similarity index 100%
rename from tests/test_cli_init.py
rename to tests/cli/test_cli_init.py
diff --git a/tests/test_cli_interrupt_subagent.py b/tests/cli/test_cli_interrupt_subagent.py
similarity index 100%
rename from tests/test_cli_interrupt_subagent.py
rename to tests/cli/test_cli_interrupt_subagent.py
diff --git a/tests/test_cli_loading_indicator.py b/tests/cli/test_cli_loading_indicator.py
similarity index 100%
rename from tests/test_cli_loading_indicator.py
rename to tests/cli/test_cli_loading_indicator.py
diff --git a/tests/test_cli_mcp_config_watch.py b/tests/cli/test_cli_mcp_config_watch.py
similarity index 100%
rename from tests/test_cli_mcp_config_watch.py
rename to tests/cli/test_cli_mcp_config_watch.py
diff --git a/tests/test_cli_new_session.py b/tests/cli/test_cli_new_session.py
similarity index 100%
rename from tests/test_cli_new_session.py
rename to tests/cli/test_cli_new_session.py
diff --git a/tests/test_cli_plan_command.py b/tests/cli/test_cli_plan_command.py
similarity index 100%
rename from tests/test_cli_plan_command.py
rename to tests/cli/test_cli_plan_command.py
diff --git a/tests/test_cli_prefix_matching.py b/tests/cli/test_cli_prefix_matching.py
similarity index 100%
rename from tests/test_cli_prefix_matching.py
rename to tests/cli/test_cli_prefix_matching.py
diff --git a/tests/test_cli_preloaded_skills.py b/tests/cli/test_cli_preloaded_skills.py
similarity index 100%
rename from tests/test_cli_preloaded_skills.py
rename to tests/cli/test_cli_preloaded_skills.py
diff --git a/tests/test_cli_provider_resolution.py b/tests/cli/test_cli_provider_resolution.py
similarity index 99%
rename from tests/test_cli_provider_resolution.py
rename to tests/cli/test_cli_provider_resolution.py
index bd78a98ea..353b3234e 100644
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/cli/test_cli_provider_resolution.py
@@ -330,7 +330,7 @@ def test_model_flow_nous_prints_subscription_guidance_without_mutating_explicit_
         "hermes_cli.auth.fetch_nous_models",
         lambda *args, **kwargs: ["claude-opus-4-6"],
     )
-    monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None: "claude-opus-4-6")
+    monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None, **kw: "claude-opus-4-6")
     monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: None)
     monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda provider, url: None)
     monkeypatch.setattr(
@@ -368,7 +368,7 @@ def test_model_flow_nous_applies_managed_tts_default_when_unconfigured(monkeypat
         "hermes_cli.auth.fetch_nous_models",
         lambda *args, **kwargs: ["claude-opus-4-6"],
     )
-    monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None: "claude-opus-4-6")
+    monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None, **kw: "claude-opus-4-6")
     monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: None)
     monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda provider, url: None)
     monkeypatch.setattr(
diff --git a/tests/test_cli_retry.py b/tests/cli/test_cli_retry.py
similarity index 96%
rename from tests/test_cli_retry.py
rename to tests/cli/test_cli_retry.py
index 74e2512bf..b287b4575 100644
--- a/tests/test_cli_retry.py
+++ b/tests/cli/test_cli_retry.py
@@ -1,6 +1,6 @@
 """Regression tests for CLI /retry history replacement semantics."""
 
-from tests.test_cli_init import _make_cli
+from tests.cli.test_cli_init import _make_cli
 
 
 def test_retry_last_truncates_history_before_requeueing_message():
diff --git a/tests/test_cli_save_config_value.py b/tests/cli/test_cli_save_config_value.py
similarity index 100%
rename from tests/test_cli_save_config_value.py
rename to tests/cli/test_cli_save_config_value.py
diff --git a/tests/test_cli_secret_capture.py b/tests/cli/test_cli_secret_capture.py
similarity index 100%
rename from tests/test_cli_secret_capture.py
rename to tests/cli/test_cli_secret_capture.py
diff --git a/tests/test_cli_skin_integration.py b/tests/cli/test_cli_skin_integration.py
similarity index 100%
rename from tests/test_cli_skin_integration.py
rename to tests/cli/test_cli_skin_integration.py
diff --git a/tests/test_cli_status_bar.py b/tests/cli/test_cli_status_bar.py
similarity index 100%
rename from tests/test_cli_status_bar.py
rename to tests/cli/test_cli_status_bar.py
diff --git a/tests/test_cli_tools_command.py b/tests/cli/test_cli_tools_command.py
similarity index 100%
rename from tests/test_cli_tools_command.py
rename to tests/cli/test_cli_tools_command.py
diff --git a/tests/test_personality_none.py b/tests/cli/test_personality_none.py
similarity index 100%
rename from tests/test_personality_none.py
rename to tests/cli/test_personality_none.py
diff --git a/tests/test_quick_commands.py b/tests/cli/test_quick_commands.py
similarity index 100%
rename from tests/test_quick_commands.py
rename to tests/cli/test_quick_commands.py
diff --git a/tests/test_reasoning_command.py b/tests/cli/test_reasoning_command.py
similarity index 100%
rename from tests/test_reasoning_command.py
rename to tests/cli/test_reasoning_command.py
diff --git a/tests/test_resume_display.py b/tests/cli/test_resume_display.py
similarity index 100%
rename from tests/test_resume_display.py
rename to tests/cli/test_resume_display.py
diff --git a/tests/test_surrogate_sanitization.py b/tests/cli/test_surrogate_sanitization.py
similarity index 100%
rename from tests/test_surrogate_sanitization.py
rename to tests/cli/test_surrogate_sanitization.py
diff --git a/tests/test_worktree.py b/tests/cli/test_worktree.py
similarity index 100%
rename from tests/test_worktree.py
rename to tests/cli/test_worktree.py
diff --git a/tests/test_worktree_security.py b/tests/cli/test_worktree_security.py
similarity index 100%
rename from tests/test_worktree_security.py
rename to tests/cli/test_worktree_security.py
diff --git a/tests/test_codex_execution_paths.py b/tests/cron/test_codex_execution_paths.py
similarity index 100%
rename from tests/test_codex_execution_paths.py
rename to tests/cron/test_codex_execution_paths.py
diff --git a/tests/test_file_permissions.py b/tests/cron/test_file_permissions.py
similarity index 100%
rename from tests/test_file_permissions.py
rename to tests/cron/test_file_permissions.py
diff --git a/tests/gateway/test_telegram_approval_buttons.py b/tests/gateway/test_telegram_approval_buttons.py
index 1b8249bc2..98d3cdc31 100644
--- a/tests/gateway/test_telegram_approval_buttons.py
+++ b/tests/gateway/test_telegram_approval_buttons.py
@@ -33,8 +33,15 @@ def _ensure_telegram_mock():
     mod.constants.ChatType.GROUP = "group"
     mod.constants.ChatType.SUPERGROUP = "supergroup"
     mod.constants.ChatType.CHANNEL = "channel"
-    for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request", "telegram.error"):
+    # Provide real exception classes so ``except (NetworkError, ...)`` in
+    # connect() doesn't blow up under xdist when this mock leaks.
+    mod.error.NetworkError = type("NetworkError", (OSError,), {})
+    mod.error.TimedOut = type("TimedOut", (OSError,), {})
+    mod.error.BadRequest = type("BadRequest", (Exception,), {})
+
+    for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
         sys.modules.setdefault(name, mod)
+    sys.modules.setdefault("telegram.error", mod.error)
 
 
 _ensure_telegram_mock()
diff --git a/tests/gateway/test_telegram_conflict.py b/tests/gateway/test_telegram_conflict.py
index 7a480d9fc..47a67f229 100644
--- a/tests/gateway/test_telegram_conflict.py
+++ b/tests/gateway/test_telegram_conflict.py
@@ -20,8 +20,16 @@ def _ensure_telegram_mock():
     telegram_mod.constants.ChatType.CHANNEL = "channel"
     telegram_mod.constants.ChatType.PRIVATE = "private"
 
+    # Provide real exception classes so ``except (NetworkError, ...)`` in
+    # connect() doesn't blow up with "catching classes that do not inherit
+    # from BaseException" when another xdist worker pollutes sys.modules.
+    telegram_mod.error.NetworkError = type("NetworkError", (OSError,), {})
+    telegram_mod.error.TimedOut = type("TimedOut", (OSError,), {})
+    telegram_mod.error.BadRequest = type("BadRequest", (Exception,), {})
+
     for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
         sys.modules.setdefault(name, telegram_mod)
+    sys.modules.setdefault("telegram.error", telegram_mod.error)
 
 
 _ensure_telegram_mock()
diff --git a/tests/test_anthropic_oauth_flow.py b/tests/hermes_cli/test_anthropic_oauth_flow.py
similarity index 100%
rename from tests/test_anthropic_oauth_flow.py
rename to tests/hermes_cli/test_anthropic_oauth_flow.py
diff --git a/tests/test_anthropic_provider_persistence.py b/tests/hermes_cli/test_anthropic_provider_persistence.py
similarity index 100%
rename from tests/test_anthropic_provider_persistence.py
rename to tests/hermes_cli/test_anthropic_provider_persistence.py
diff --git a/tests/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py
similarity index 100%
rename from tests/test_api_key_providers.py
rename to tests/hermes_cli/test_api_key_providers.py
diff --git a/tests/test_atomic_json_write.py b/tests/hermes_cli/test_atomic_json_write.py
similarity index 100%
rename from tests/test_atomic_json_write.py
rename to tests/hermes_cli/test_atomic_json_write.py
diff --git a/tests/test_atomic_yaml_write.py b/tests/hermes_cli/test_atomic_yaml_write.py
similarity index 100%
rename from tests/test_atomic_yaml_write.py
rename to tests/hermes_cli/test_atomic_yaml_write.py
diff --git a/tests/test_auth_codex_provider.py b/tests/hermes_cli/test_auth_codex_provider.py
similarity index 100%
rename from tests/test_auth_codex_provider.py
rename to tests/hermes_cli/test_auth_codex_provider.py
diff --git a/tests/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py
similarity index 100%
rename from tests/test_auth_commands.py
rename to tests/hermes_cli/test_auth_commands.py
diff --git a/tests/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py
similarity index 100%
rename from tests/test_auth_nous_provider.py
rename to tests/hermes_cli/test_auth_nous_provider.py
diff --git a/tests/test_codex_models.py b/tests/hermes_cli/test_codex_models.py
similarity index 100%
rename from tests/test_codex_models.py
rename to tests/hermes_cli/test_codex_models.py
diff --git a/tests/test_config_env_expansion.py b/tests/hermes_cli/test_config_env_expansion.py
similarity index 100%
rename from tests/test_config_env_expansion.py
rename to tests/hermes_cli/test_config_env_expansion.py
diff --git a/tests/test_external_credential_detection.py b/tests/hermes_cli/test_external_credential_detection.py
similarity index 100%
rename from tests/test_external_credential_detection.py
rename to tests/hermes_cli/test_external_credential_detection.py
diff --git a/tests/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py
similarity index 100%
rename from tests/test_gemini_provider.py
rename to tests/hermes_cli/test_gemini_provider.py
diff --git a/tests/test_model_normalize.py b/tests/hermes_cli/test_model_normalize.py
similarity index 100%
rename from tests/test_model_normalize.py
rename to tests/hermes_cli/test_model_normalize.py
diff --git a/tests/test_model_provider_persistence.py b/tests/hermes_cli/test_model_provider_persistence.py
similarity index 100%
rename from tests/test_model_provider_persistence.py
rename to tests/hermes_cli/test_model_provider_persistence.py
diff --git a/tests/test_ollama_cloud_auth.py b/tests/hermes_cli/test_ollama_cloud_auth.py
similarity index 100%
rename from tests/test_ollama_cloud_auth.py
rename to tests/hermes_cli/test_ollama_cloud_auth.py
diff --git a/tests/test_plugin_cli_registration.py b/tests/hermes_cli/test_plugin_cli_registration.py
similarity index 100%
rename from tests/test_plugin_cli_registration.py
rename to tests/hermes_cli/test_plugin_cli_registration.py
diff --git a/tests/test_plugins.py b/tests/hermes_cli/test_plugins.py
similarity index 100%
rename from tests/test_plugins.py
rename to tests/hermes_cli/test_plugins.py
diff --git a/tests/test_plugins_cmd.py b/tests/hermes_cli/test_plugins_cmd.py
similarity index 100%
rename from tests/test_plugins_cmd.py
rename to tests/hermes_cli/test_plugins_cmd.py
diff --git a/tests/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py
similarity index 100%
rename from tests/test_runtime_provider_resolution.py
rename to tests/hermes_cli/test_runtime_provider_resolution.py
diff --git a/tests/test_setup_model_selection.py b/tests/hermes_cli/test_setup_model_selection.py
similarity index 100%
rename from tests/test_setup_model_selection.py
rename to tests/hermes_cli/test_setup_model_selection.py
diff --git a/tests/hermes_cli/test_update_check.py b/tests/hermes_cli/test_update_check.py
index b7d6de6ff..368bb1b07 100644
--- a/tests/hermes_cli/test_update_check.py
+++ b/tests/hermes_cli/test_update_check.py
@@ -15,7 +15,7 @@ def test_version_string_no_v_prefix():
     assert not __version__.startswith("v"), f"__version__ should not start with 'v', got {__version__!r}"
 
 
-def test_check_for_updates_uses_cache(tmp_path):
+def test_check_for_updates_uses_cache(tmp_path, monkeypatch):
     """When cache is fresh, check_for_updates should return cached value without calling git."""
     from hermes_cli.banner import check_for_updates
 
@@ -27,15 +27,15 @@ def test_check_for_updates_uses_cache(tmp_path):
     cache_file = tmp_path / ".update_check"
     cache_file.write_text(json.dumps({"ts": time.time(), "behind": 3}))
 
-    with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)):
-        with patch("hermes_cli.banner.subprocess.run") as mock_run:
-            result = check_for_updates()
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    with patch("hermes_cli.banner.subprocess.run") as mock_run:
+        result = check_for_updates()
 
     assert result == 3
     mock_run.assert_not_called()
 
 
-def test_check_for_updates_expired_cache(tmp_path):
+def test_check_for_updates_expired_cache(tmp_path, monkeypatch):
     """When cache is expired, check_for_updates should call git fetch."""
     from hermes_cli.banner import check_for_updates
 
@@ -49,15 +49,15 @@ def test_check_for_updates_expired_cache(tmp_path):
 
     mock_result = MagicMock(returncode=0, stdout="5\n")
 
-    with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)):
-        with patch("hermes_cli.banner.subprocess.run", return_value=mock_result) as mock_run:
-            result = check_for_updates()
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    with patch("hermes_cli.banner.subprocess.run", return_value=mock_result) as mock_run:
+        result = check_for_updates()
 
     assert result == 5
     assert mock_run.call_count == 2  # git fetch + git rev-list
 
 
-def test_check_for_updates_no_git_dir(tmp_path):
+def test_check_for_updates_no_git_dir(tmp_path, monkeypatch):
     """Returns None when .git directory doesn't exist anywhere."""
     import hermes_cli.banner as banner
 
@@ -66,19 +66,15 @@ def test_check_for_updates_no_git_dir(tmp_path):
     fake_banner.parent.mkdir(parents=True, exist_ok=True)
     fake_banner.touch()
 
-    original = banner.__file__
-    try:
-        banner.__file__ = str(fake_banner)
-        with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)):
-            with patch("hermes_cli.banner.subprocess.run") as mock_run:
-                result = banner.check_for_updates()
-        assert result is None
-        mock_run.assert_not_called()
-    finally:
-        banner.__file__ = original
+    monkeypatch.setattr(banner, "__file__", str(fake_banner))
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    with patch("hermes_cli.banner.subprocess.run") as mock_run:
+        result = banner.check_for_updates()
+    assert result is None
+    mock_run.assert_not_called()
 
 
-def test_check_for_updates_fallback_to_project_root():
+def test_check_for_updates_fallback_to_project_root(tmp_path, monkeypatch):
     """Dev install: falls back to Path(__file__).parent.parent when HERMES_HOME has no git repo."""
     import hermes_cli.banner as banner
 
@@ -87,14 +83,12 @@ def test_check_for_updates_fallback_to_project_root():
         pytest.skip("Not running from a git checkout")
 
     # Point HERMES_HOME at a temp dir with no hermes-agent/.git
-    import tempfile
-    with tempfile.TemporaryDirectory() as td:
-        with patch("hermes_cli.banner.os.getenv", return_value=td):
-            with patch("hermes_cli.banner.subprocess.run") as mock_run:
-                mock_run.return_value = MagicMock(returncode=0, stdout="0\n")
-                result = banner.check_for_updates()
-        # Should have fallen back to project root and run git commands
-        assert mock_run.call_count >= 1
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    with patch("hermes_cli.banner.subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="0\n")
+        result = banner.check_for_updates()
+    # Should have fallen back to project root and run git commands
+    assert mock_run.call_count >= 1
 
 
 def test_prefetch_non_blocking():
diff --git a/tests/run_agent/__init__.py b/tests/run_agent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_1630_context_overflow_loop.py b/tests/run_agent/test_1630_context_overflow_loop.py
similarity index 100%
rename from tests/test_1630_context_overflow_loop.py
rename to tests/run_agent/test_1630_context_overflow_loop.py
diff --git a/tests/test_413_compression.py b/tests/run_agent/test_413_compression.py
similarity index 100%
rename from tests/test_413_compression.py
rename to tests/run_agent/test_413_compression.py
diff --git a/tests/test_860_dedup.py b/tests/run_agent/test_860_dedup.py
similarity index 100%
rename from tests/test_860_dedup.py
rename to tests/run_agent/test_860_dedup.py
diff --git a/tests/test_agent_guardrails.py b/tests/run_agent/test_agent_guardrails.py
similarity index 100%
rename from tests/test_agent_guardrails.py
rename to tests/run_agent/test_agent_guardrails.py
diff --git a/tests/test_agent_loop.py b/tests/run_agent/test_agent_loop.py
similarity index 99%
rename from tests/test_agent_loop.py
rename to tests/run_agent/test_agent_loop.py
index b95ff7808..bd9e41b91 100644
--- a/tests/test_agent_loop.py
+++ b/tests/run_agent/test_agent_loop.py
@@ -16,7 +16,7 @@ from unittest.mock import MagicMock
 import pytest
 
 # Ensure repo root is importable
-sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
 
 try:
     from environments.agent_loop import (
diff --git a/tests/test_agent_loop_tool_calling.py b/tests/run_agent/test_agent_loop_tool_calling.py
similarity index 99%
rename from tests/test_agent_loop_tool_calling.py
rename to tests/run_agent/test_agent_loop_tool_calling.py
index 74e67c0be..3b8d6ac59 100644
--- a/tests/test_agent_loop_tool_calling.py
+++ b/tests/run_agent/test_agent_loop_tool_calling.py
@@ -31,7 +31,7 @@ import pytest
 # pytestmark removed — tests skip gracefully via OPENROUTER_API_KEY check on line 59
 
 # Ensure repo root is importable
-_repo_root = Path(__file__).resolve().parent.parent
+_repo_root = Path(__file__).resolve().parent.parent.parent
 if str(_repo_root) not in sys.path:
     sys.path.insert(0, str(_repo_root))
 
diff --git a/tests/test_agent_loop_vllm.py b/tests/run_agent/test_agent_loop_vllm.py
similarity index 99%
rename from tests/test_agent_loop_vllm.py
rename to tests/run_agent/test_agent_loop_vllm.py
index d47478ecb..d42849094 100644
--- a/tests/test_agent_loop_vllm.py
+++ b/tests/run_agent/test_agent_loop_vllm.py
@@ -30,7 +30,7 @@ import pytest
 import requests
 
 # Ensure repo root is importable
-_repo_root = Path(__file__).resolve().parent.parent
+_repo_root = Path(__file__).resolve().parent.parent.parent
 if str(_repo_root) not in sys.path:
     sys.path.insert(0, str(_repo_root))
 
diff --git a/tests/test_anthropic_error_handling.py b/tests/run_agent/test_anthropic_error_handling.py
similarity index 100%
rename from tests/test_anthropic_error_handling.py
rename to tests/run_agent/test_anthropic_error_handling.py
diff --git a/tests/test_async_httpx_del_neuter.py b/tests/run_agent/test_async_httpx_del_neuter.py
similarity index 100%
rename from tests/test_async_httpx_del_neuter.py
rename to tests/run_agent/test_async_httpx_del_neuter.py
diff --git a/tests/test_compression_boundary.py b/tests/run_agent/test_compression_boundary.py
similarity index 100%
rename from tests/test_compression_boundary.py
rename to tests/run_agent/test_compression_boundary.py
diff --git a/tests/test_compression_persistence.py b/tests/run_agent/test_compression_persistence.py
similarity index 100%
rename from tests/test_compression_persistence.py
rename to tests/run_agent/test_compression_persistence.py
diff --git a/tests/test_compressor_fallback_update.py b/tests/run_agent/test_compressor_fallback_update.py
similarity index 100%
rename from tests/test_compressor_fallback_update.py
rename to tests/run_agent/test_compressor_fallback_update.py
diff --git a/tests/test_context_pressure.py b/tests/run_agent/test_context_pressure.py
similarity index 100%
rename from tests/test_context_pressure.py
rename to tests/run_agent/test_context_pressure.py
diff --git a/tests/test_context_token_tracking.py b/tests/run_agent/test_context_token_tracking.py
similarity index 100%
rename from tests/test_context_token_tracking.py
rename to tests/run_agent/test_context_token_tracking.py
diff --git a/tests/test_dict_tool_call_args.py b/tests/run_agent/test_dict_tool_call_args.py
similarity index 100%
rename from tests/test_dict_tool_call_args.py
rename to tests/run_agent/test_dict_tool_call_args.py
diff --git a/tests/test_exit_cleanup_interrupt.py b/tests/run_agent/test_exit_cleanup_interrupt.py
similarity index 100%
rename from tests/test_exit_cleanup_interrupt.py
rename to tests/run_agent/test_exit_cleanup_interrupt.py
diff --git a/tests/test_fallback_model.py b/tests/run_agent/test_fallback_model.py
similarity index 100%
rename from tests/test_fallback_model.py
rename to tests/run_agent/test_fallback_model.py
diff --git a/tests/test_flush_memories_codex.py b/tests/run_agent/test_flush_memories_codex.py
similarity index 100%
rename from tests/test_flush_memories_codex.py
rename to tests/run_agent/test_flush_memories_codex.py
diff --git a/tests/test_interactive_interrupt.py b/tests/run_agent/test_interactive_interrupt.py
similarity index 98%
rename from tests/test_interactive_interrupt.py
rename to tests/run_agent/test_interactive_interrupt.py
index 8c0d328c2..762621f22 100644
--- a/tests/test_interactive_interrupt.py
+++ b/tests/run_agent/test_interactive_interrupt.py
@@ -23,7 +23,7 @@ logging.basicConfig(level=logging.DEBUG, stream=sys.stderr,
                     format="%(asctime)s [%(threadName)s] %(message)s")
 log = logging.getLogger("interrupt_test")
 
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 
 from unittest.mock import MagicMock, patch
 from run_agent import AIAgent, IterationBudget
diff --git a/tests/test_interrupt_propagation.py b/tests/run_agent/test_interrupt_propagation.py
similarity index 100%
rename from tests/test_interrupt_propagation.py
rename to tests/run_agent/test_interrupt_propagation.py
diff --git a/tests/test_large_tool_result.py b/tests/run_agent/test_large_tool_result.py
similarity index 100%
rename from tests/test_large_tool_result.py
rename to tests/run_agent/test_large_tool_result.py
diff --git a/tests/test_long_context_tier_429.py b/tests/run_agent/test_long_context_tier_429.py
similarity index 100%
rename from tests/test_long_context_tier_429.py
rename to tests/run_agent/test_long_context_tier_429.py
diff --git a/tests/test_openai_client_lifecycle.py b/tests/run_agent/test_openai_client_lifecycle.py
similarity index 100%
rename from tests/test_openai_client_lifecycle.py
rename to tests/run_agent/test_openai_client_lifecycle.py
diff --git a/tests/test_percentage_clamp.py b/tests/run_agent/test_percentage_clamp.py
similarity index 98%
rename from tests/test_percentage_clamp.py
rename to tests/run_agent/test_percentage_clamp.py
index 67d119149..fcf1e39e5 100644
--- a/tests/test_percentage_clamp.py
+++ b/tests/run_agent/test_percentage_clamp.py
@@ -122,7 +122,7 @@ class TestSourceLinesAreClamped:
     @staticmethod
     def _read_file(rel_path: str) -> str:
         import os
-        base = os.path.dirname(os.path.dirname(__file__))
+        base = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
         with open(os.path.join(base, rel_path)) as f:
             return f.read()
 
diff --git a/tests/test_primary_runtime_restore.py b/tests/run_agent/test_primary_runtime_restore.py
similarity index 100%
rename from tests/test_primary_runtime_restore.py
rename to tests/run_agent/test_primary_runtime_restore.py
diff --git a/tests/test_provider_fallback.py b/tests/run_agent/test_provider_fallback.py
similarity index 100%
rename from tests/test_provider_fallback.py
rename to tests/run_agent/test_provider_fallback.py
diff --git a/tests/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
similarity index 100%
rename from tests/test_provider_parity.py
rename to tests/run_agent/test_provider_parity.py
diff --git a/tests/test_real_interrupt_subagent.py b/tests/run_agent/test_real_interrupt_subagent.py
similarity index 100%
rename from tests/test_real_interrupt_subagent.py
rename to tests/run_agent/test_real_interrupt_subagent.py
diff --git a/tests/test_redirect_stdout_issue.py b/tests/run_agent/test_redirect_stdout_issue.py
similarity index 100%
rename from tests/test_redirect_stdout_issue.py
rename to tests/run_agent/test_redirect_stdout_issue.py
diff --git a/tests/test_run_agent.py b/tests/run_agent/test_run_agent.py
similarity index 100%
rename from tests/test_run_agent.py
rename to tests/run_agent/test_run_agent.py
diff --git a/tests/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
similarity index 100%
rename from tests/test_run_agent_codex_responses.py
rename to tests/run_agent/test_run_agent_codex_responses.py
diff --git a/tests/test_session_meta_filtering.py b/tests/run_agent/test_session_meta_filtering.py
similarity index 100%
rename from tests/test_session_meta_filtering.py
rename to tests/run_agent/test_session_meta_filtering.py
diff --git a/tests/test_session_reset_fix.py b/tests/run_agent/test_session_reset_fix.py
similarity index 98%
rename from tests/test_session_reset_fix.py
rename to tests/run_agent/test_session_reset_fix.py
index ee65ed90d..1fd1223ce 100644
--- a/tests/test_session_reset_fix.py
+++ b/tests/run_agent/test_session_reset_fix.py
@@ -13,7 +13,7 @@ from pathlib import Path
 import pytest
 
 # Ensure repo root is importable
-sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
 
 # Stub out optional heavy dependencies not installed in the test environment
 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
diff --git a/tests/test_streaming.py b/tests/run_agent/test_streaming.py
similarity index 100%
rename from tests/test_streaming.py
rename to tests/run_agent/test_streaming.py
diff --git a/tests/test_strict_api_validation.py b/tests/run_agent/test_strict_api_validation.py
similarity index 100%
rename from tests/test_strict_api_validation.py
rename to tests/run_agent/test_strict_api_validation.py
diff --git a/tests/test_token_persistence_non_cli.py b/tests/run_agent/test_token_persistence_non_cli.py
similarity index 100%
rename from tests/test_token_persistence_non_cli.py
rename to tests/run_agent/test_token_persistence_non_cli.py
diff --git a/tests/test_tool_arg_coercion.py b/tests/run_agent/test_tool_arg_coercion.py
similarity index 100%
rename from tests/test_tool_arg_coercion.py
rename to tests/run_agent/test_tool_arg_coercion.py
diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py
index 085ffad29..5ac3fd872 100644
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@@ -18,10 +18,18 @@ import pytest
 import json
 import os
 
-# Force local terminal backend for ALL tests in this file.
-# Under xdist, another test may leak TERMINAL_ENV=modal/docker, sending
-# execute_code down the remote path → modal.exception.AuthError.
 os.environ["TERMINAL_ENV"] = "local"
+
+
+@pytest.fixture(autouse=True)
+def _force_local_terminal(monkeypatch):
+    """Re-set TERMINAL_ENV=local before every test.
+
+    The module-level assignment above covers import time, but under xdist
+    another worker can overwrite os.environ between tests.  monkeypatch
+    ensures each test starts (and ends) with the correct value.
+    """
+    monkeypatch.setenv("TERMINAL_ENV", "local")
 import sys
 import time
 import threading
diff --git a/tests/tools/test_managed_browserbase_and_modal.py b/tests/tools/test_managed_browserbase_and_modal.py
index d07dcb367..5ae24f01a 100644
--- a/tests/tools/test_managed_browserbase_and_modal.py
+++ b/tests/tools/test_managed_browserbase_and_modal.py
@@ -91,7 +91,11 @@ def _install_fake_tools_package():
         def register(self, **kwargs):
             return None
 
-    sys.modules["tools.registry"] = types.SimpleNamespace(registry=_Registry())
+    from tools.registry import tool_error
+
+    sys.modules["tools.registry"] = types.SimpleNamespace(
+        registry=_Registry(), tool_error=tool_error,
+    )
 
     class _DummyEnvironment:
         def __init__(self, *args, **kwargs):
diff --git a/tests/test_managed_server_tool_support.py b/tests/tools/test_managed_server_tool_support.py
similarity index 100%
rename from tests/test_managed_server_tool_support.py
rename to tests/tools/test_managed_server_tool_support.py
diff --git a/tests/tools/test_modal_sandbox_fixes.py b/tests/tools/test_modal_sandbox_fixes.py
index 7e3feb5cf..e1baf13d9 100644
--- a/tests/tools/test_modal_sandbox_fixes.py
+++ b/tests/tools/test_modal_sandbox_fixes.py
@@ -12,8 +12,6 @@ Covers the bugs discovered while setting up TBLite evaluation:
 import os
 import sys
 from pathlib import Path
-from unittest.mock import patch, MagicMock
-
 import pytest
 
 # Ensure repo root is importable
@@ -64,89 +62,72 @@ class TestToolResolution:
 class TestCwdHandling:
     """Verify host paths are sanitized for container backends."""
 
-    def test_home_path_replaced_for_modal(self):
+    def test_home_path_replaced_for_modal(self, monkeypatch):
         """TERMINAL_CWD=/home/user/... should be replaced with /root for modal."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "modal",
-            "TERMINAL_CWD": "/home/dakota/github/hermes-agent",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/root", (
-                f"Expected /root, got {config['cwd']}. "
-                "/home/ paths should be replaced for modal backend."
-            )
+        monkeypatch.setenv("TERMINAL_ENV", "modal")
+        monkeypatch.setenv("TERMINAL_CWD", "/home/dakota/github/hermes-agent")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/root", (
+            f"Expected /root, got {config['cwd']}. "
+            "/home/ paths should be replaced for modal backend."
+        )
 
-    def test_users_path_replaced_for_docker_by_default(self):
+    def test_users_path_replaced_for_docker_by_default(self, monkeypatch):
         """Docker should keep host paths out of the sandbox unless explicitly enabled."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "docker",
-            "TERMINAL_CWD": "/Users/someone/projects",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/root", (
-                f"Expected /root, got {config['cwd']}. "
-                "Host paths should be discarded for docker backend by default."
-            )
-            assert config["host_cwd"] is None
-            assert config["docker_mount_cwd_to_workspace"] is False
+        monkeypatch.setenv("TERMINAL_ENV", "docker")
+        monkeypatch.setenv("TERMINAL_CWD", "/Users/someone/projects")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/root", (
+            f"Expected /root, got {config['cwd']}. "
+            "Host paths should be discarded for docker backend by default."
+        )
+        assert config["host_cwd"] is None
+        assert config["docker_mount_cwd_to_workspace"] is False
 
-    def test_users_path_maps_to_workspace_for_docker_when_enabled(self):
+    def test_users_path_maps_to_workspace_for_docker_when_enabled(self, monkeypatch):
         """Docker should map the host cwd into /workspace only when explicitly enabled."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "docker",
-            "TERMINAL_CWD": "/Users/someone/projects",
-            "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/workspace"
-            assert config["host_cwd"] == "/Users/someone/projects"
-            assert config["docker_mount_cwd_to_workspace"] is True
+        monkeypatch.setenv("TERMINAL_ENV", "docker")
+        monkeypatch.setenv("TERMINAL_CWD", "/Users/someone/projects")
+        monkeypatch.setenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "true")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/workspace"
+        assert config["host_cwd"] == "/Users/someone/projects"
+        assert config["docker_mount_cwd_to_workspace"] is True
 
-    def test_windows_path_replaced_for_modal(self):
+    def test_windows_path_replaced_for_modal(self, monkeypatch):
         """TERMINAL_CWD=C:\\Users\\... should be replaced for modal."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "modal",
-            "TERMINAL_CWD": "C:\\Users\\someone\\projects",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/root"
+        monkeypatch.setenv("TERMINAL_ENV", "modal")
+        monkeypatch.setenv("TERMINAL_CWD", "C:\\Users\\someone\\projects")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/root"
 
-    def test_default_cwd_is_root_for_container_backends(self):
+    @pytest.mark.parametrize("backend", ["modal", "docker", "singularity", "daytona"])
+    def test_default_cwd_is_root_for_container_backends(self, backend, monkeypatch):
         """Container backends should default to /root, not ~."""
-        for backend in ("modal", "docker", "singularity", "daytona"):
-            with patch.dict(os.environ, {"TERMINAL_ENV": backend}, clear=False):
-                # Remove TERMINAL_CWD so it uses default
-                env = os.environ.copy()
-                env.pop("TERMINAL_CWD", None)
-                env.pop("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", None)
-                with patch.dict(os.environ, env, clear=True):
-                    config = _tt_mod._get_env_config()
-                    assert config["cwd"] == "/root", (
-                        f"Backend {backend}: expected /root default, got {config['cwd']}"
-                    )
+        monkeypatch.setenv("TERMINAL_ENV", backend)
+        monkeypatch.delenv("TERMINAL_CWD", raising=False)
+        monkeypatch.delenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", raising=False)
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/root", (
+            f"Backend {backend}: expected /root default, got {config['cwd']}"
+        )
 
-    def test_docker_default_cwd_maps_current_directory_when_enabled(self):
+    def test_docker_default_cwd_maps_current_directory_when_enabled(self, monkeypatch):
         """Docker should use /workspace when cwd mounting is explicitly enabled."""
-        with patch("tools.terminal_tool.os.getcwd", return_value="/home/user/project"):
-            with patch.dict(os.environ, {
-                "TERMINAL_ENV": "docker",
-                "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true",
-            }, clear=False):
-                env = os.environ.copy()
-                env.pop("TERMINAL_CWD", None)
-                with patch.dict(os.environ, env, clear=True):
-                    config = _tt_mod._get_env_config()
-                    assert config["cwd"] == "/workspace"
-                    assert config["host_cwd"] == "/home/user/project"
+        monkeypatch.setattr("tools.terminal_tool.os.getcwd", lambda: "/home/user/project")
+        monkeypatch.setenv("TERMINAL_ENV", "docker")
+        monkeypatch.setenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "true")
+        monkeypatch.delenv("TERMINAL_CWD", raising=False)
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/workspace"
+        assert config["host_cwd"] == "/home/user/project"
 
-    def test_local_backend_uses_getcwd(self):
+    def test_local_backend_uses_getcwd(self, monkeypatch):
         """Local backend should use os.getcwd(), not /root."""
-        with patch.dict(os.environ, {"TERMINAL_ENV": "local"}, clear=False):
-            env = os.environ.copy()
-            env.pop("TERMINAL_CWD", None)
-            with patch.dict(os.environ, env, clear=True):
-                config = _tt_mod._get_env_config()
-                assert config["cwd"] == os.getcwd()
+        monkeypatch.setenv("TERMINAL_ENV", "local")
+        monkeypatch.delenv("TERMINAL_CWD", raising=False)
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == os.getcwd()
 
     def test_create_environment_passes_docker_host_cwd_and_flag(self, monkeypatch):
         """Docker host cwd and mount flag should reach DockerEnvironment."""
@@ -173,18 +154,16 @@ class TestCwdHandling:
         assert captured["host_cwd"] == "/home/user/project"
         assert captured["auto_mount_cwd"] is True
 
-    def test_ssh_preserves_home_paths(self):
+    def test_ssh_preserves_home_paths(self, monkeypatch):
         """SSH backend should NOT replace /home/ paths (they're valid remotely)."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "ssh",
-            "TERMINAL_CWD": "/home/remote-user/work",
-            "TERMINAL_SSH_HOST": "example.com",
-            "TERMINAL_SSH_USER": "user",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/home/remote-user/work", (
-                "SSH backend should preserve /home/ paths"
-            )
+        monkeypatch.setenv("TERMINAL_ENV", "ssh")
+        monkeypatch.setenv("TERMINAL_CWD", "/home/remote-user/work")
+        monkeypatch.setenv("TERMINAL_SSH_HOST", "example.com")
+        monkeypatch.setenv("TERMINAL_SSH_USER", "user")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/home/remote-user/work", (
+            "SSH backend should preserve /home/ paths"
+        )
 
 
 # =========================================================================
@@ -194,12 +173,8 @@ class TestCwdHandling:
 class TestEphemeralDiskCheck:
     """Verify ephemeral_disk is only passed when modal supports it."""
 
-    def test_ephemeral_disk_skipped_when_unsupported(self):
+    def test_ephemeral_disk_skipped_when_unsupported(self, monkeypatch):
         """If modal.Sandbox.create doesn't have ephemeral_disk param, skip it."""
-        # Mock the modal import and Sandbox.create signature
-        mock_modal = MagicMock()
-        mock_sandbox_create = MagicMock()
-        # Simulate a signature WITHOUT ephemeral_disk
         import inspect
         mock_params = {
             "args": inspect.Parameter("args", inspect.Parameter.VAR_POSITIONAL),
@@ -208,26 +183,25 @@ class TestEphemeralDiskCheck:
             "cpu": inspect.Parameter("cpu", inspect.Parameter.KEYWORD_ONLY),
             "memory": inspect.Parameter("memory", inspect.Parameter.KEYWORD_ONLY),
         }
-        mock_sig = inspect.Signature(parameters=list(mock_params.values()))
 
-        with patch.dict(os.environ, {"TERMINAL_ENV": "modal"}):
-            config = _tt_mod._get_env_config()
-            # The config has container_disk default of 51200
-            disk = config.get("container_disk", 51200)
-            assert disk > 0, "disk should default to > 0"
+        monkeypatch.setenv("TERMINAL_ENV", "modal")
+        config = _tt_mod._get_env_config()
+        # The config has container_disk default of 51200
+        disk = config.get("container_disk", 51200)
+        assert disk > 0, "disk should default to > 0"
 
-            # Simulate the version check logic from terminal_tool.py
-            sandbox_kwargs = {}
-            if disk > 0:
-                try:
-                    if "ephemeral_disk" in mock_params:
-                        sandbox_kwargs["ephemeral_disk"] = disk
-                except Exception:
-                    pass
+        # Simulate the version check logic from terminal_tool.py
+        sandbox_kwargs = {}
+        if disk > 0:
+            try:
+                if "ephemeral_disk" in mock_params:
+                    sandbox_kwargs["ephemeral_disk"] = disk
+            except Exception:
+                pass
 
-            assert "ephemeral_disk" not in sandbox_kwargs, (
-                "ephemeral_disk should not be set when Sandbox.create doesn't support it"
-            )
+        assert "ephemeral_disk" not in sandbox_kwargs, (
+            "ephemeral_disk should not be set when Sandbox.create doesn't support it"
+        )
 
 
 # =========================================================================
diff --git a/tests/test_tool_call_parsers.py b/tests/tools/test_tool_call_parsers.py
similarity index 100%
rename from tests/test_tool_call_parsers.py
rename to tests/tools/test_tool_call_parsers.py
diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py
index 08f26f509..226e99b56 100644
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@@ -34,6 +34,7 @@ import requests
 
 from hermes_cli.config import load_config
 from tools.browser_camofox_state import get_camofox_identity
+from tools.registry import tool_error
 
 logger = logging.getLogger(__name__)
 
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index 2ed8ba210..1ff42e77b 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -1317,6 +1317,8 @@ def _make_read_resource_handler(server_name: str, tool_timeout: float):
     """Return a sync handler that reads a resource by URI from an MCP server."""
 
     def _handler(args: dict, **kwargs) -> str:
+        from tools.registry import tool_error
+
         with _lock:
             server = _servers.get(server_name)
         if not server or not server.session:
@@ -1406,6 +1408,8 @@ def _make_get_prompt_handler(server_name: str, tool_timeout: float):
     """Return a sync handler that gets a prompt by name from an MCP server."""
 
     def _handler(args: dict, **kwargs) -> str:
+        from tools.registry import tool_error
+
         with _lock:
             server = _servers.get(server_name)
         if not server or not server.session: