fix(tests): fix 11 real test failures + major cascade poisoner (#4570)

Three root causes addressed: 1. AIAgent no longer defaults base_url to OpenRouter (9 tests) Tests that assert OpenRouter-specific behavior (prompt caching, reasoning extra_body, provider preferences) need explicit base_url and model set on the agent. Updated test_run_agent.py and test_provider_parity.py. 2. Credential pool auto-seeding from host env (2 tests) test_auxiliary_client.py tests for Anthropic OAuth and custom endpoint fallback were not mocking _select_pool_entry, so the host's credential pool interfered. Added pool + codex mocks. 3. sys.modules corruption cascade (major - ~250 tests) test_managed_modal_environment.py replaced sys.modules entries (tools, hermes_cli, agent packages) with SimpleNamespace stubs but had NO cleanup fixture. Every subsequent test in the process saw corrupted imports: 'cannot import get_config_path from <unknown module name>' and 'module tools has no attribute environments'. Added _restore_tool_and_agent_modules autouse fixture matching the pattern in test_managed_browserbase_and_modal.py. This was also the root cause of CI failures (104 failed on main).
2026-04-02 08:43:06 -07:00
parent 624ad582a5
commit acea9ee20b
4 changed files with 43 additions and 8 deletions
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -334,10 +334,11 @@ class TestExpiredCodexFallback:


    def test_hermes_oauth_file_sets_oauth_flag(self, monkeypatch):
-        """OAuth-style tokens should get is_oauth=True (token is not sk-ant-api-*)."""
+        """OAuth-style tokens should get is_oauth=*** (token is not sk-ant-api-*)."""
        # Mock resolve_anthropic_token to return an OAuth-style token
        with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="hermes-oauth-jwt-token"), \
-             patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
+             patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
+             patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
            mock_build.return_value = MagicMock()
            from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient
            client, model = _try_anthropic()
@@ -769,9 +770,13 @@ class TestAuxiliaryPoolAwareness:
        Many local models (Qwen-VL, LLaVA, etc.) support vision.
        When no OpenRouter/Nous/Codex is available, try the custom endpoint.
        """
-        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
-        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
+             patch("agent.auxiliary_client._resolve_custom_runtime",
+                   return_value=("http://localhost:1234/v1", "local-key")), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_vision_auxiliary_client()
        assert client is not None  # Custom endpoint picked up as fallback
--- a/tests/test_provider_parity.py
+++ b/tests/test_provider_parity.py
@@ -73,6 +73,7 @@ class TestBuildApiKwargsOpenRouter:

    def test_includes_reasoning_in_extra_body(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openrouter")
+        agent.model = "anthropic/claude-sonnet-4-20250514"
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        extra = kwargs.get("extra_body", {})
@@ -798,6 +799,7 @@ class TestReasoningEffortDefaults:

    def test_openrouter_default_medium(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openrouter")
+        agent.model = "anthropic/claude-sonnet-4-20250514"
        kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
        reasoning = kwargs["extra_body"]["reasoning"]
        assert reasoning["effort"] == "medium"
@@ -825,6 +827,7 @@ class TestReasoningEffortDefaults:

    def test_openrouter_reasoning_config_override(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openrouter")
+        agent.model = "anthropic/claude-sonnet-4-20250514"
        agent.reasoning_config = {"enabled": True, "effort": "medium"}
        kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
        assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -411,8 +411,9 @@ class TestInit:
            patch("run_agent.OpenAI"),
        ):
            a = AIAgent(
-                api_key="test-key-1234567890",
+                api_key="test-k...7890",
                model="anthropic/claude-sonnet-4-20250514",
+                base_url="https://openrouter.ai/api/v1",
                quiet_mode=True,
                skip_context_files=True,
                skip_memory=True,
@@ -792,6 +793,7 @@ class TestBuildApiKwargs:
        assert kwargs["timeout"] == 1800.0

    def test_provider_preferences_injected(self, agent):
+        agent.base_url = "https://openrouter.ai/api/v1"
        agent.providers_allowed = ["Anthropic"]
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
@@ -799,6 +801,8 @@ class TestBuildApiKwargs:

    def test_reasoning_config_default_openrouter(self, agent):
        """Default reasoning config for OpenRouter should be medium."""
+        agent.base_url = "https://openrouter.ai/api/v1"
+        agent.model = "anthropic/claude-sonnet-4-20250514"
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        reasoning = kwargs["extra_body"]["reasoning"]
@@ -806,6 +810,8 @@ class TestBuildApiKwargs:
        assert reasoning["effort"] == "medium"

    def test_reasoning_config_custom(self, agent):
+        agent.base_url = "https://openrouter.ai/api/v1"
+        agent.model = "anthropic/claude-sonnet-4-20250514"
        agent.reasoning_config = {"enabled": False}
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
@@ -818,6 +824,7 @@ class TestBuildApiKwargs:
        assert "reasoning" not in kwargs.get("extra_body", {})

    def test_reasoning_sent_for_supported_openrouter_model(self, agent):
+        agent.base_url = "https://openrouter.ai/api/v1"
        agent.model = "qwen/qwen3.5-plus-02-15"
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
@@ -3156,9 +3163,11 @@ class TestStreamingApiCall:
    def test_api_exception_falls_back_to_non_streaming(self, agent):
        """When streaming fails before any deltas, fallback to non-streaming is attempted."""
        agent.client.chat.completions.create.side_effect = ConnectionError("fail")
-        # The fallback also uses the same client, so it'll fail too
-        with pytest.raises(ConnectionError, match="fail"):
-            agent._interruptible_streaming_api_call({"messages": []})
+        # Prevent stream retry logic from replacing the mock client
+        with patch.object(agent, "_replace_primary_openai_client", return_value=False):
+            # The fallback also uses the same client, so it'll fail too
+            with pytest.raises(ConnectionError, match="fail"):
+                agent._interruptible_streaming_api_call({"messages": []})

    def test_response_has_uuid_id(self, agent):
        chunks = [_make_chunk(content="x"), _make_chunk(finish_reason="stop")]
--- a/tests/tools/test_managed_modal_environment.py
+++ b/tests/tools/test_managed_modal_environment.py
@@ -27,6 +27,24 @@ def _reset_modules(prefixes: tuple[str, ...]):
            sys.modules.pop(name, None)


+@pytest.fixture(autouse=True)
+def _restore_tool_and_agent_modules():
+    """Save and restore sys.modules entries so fakes don't leak to other tests."""
+    original_modules = {
+        name: module
+        for name, module in sys.modules.items()
+        if name in ("tools", "agent", "hermes_cli")
+        or name.startswith("tools.")
+        or name.startswith("agent.")
+        or name.startswith("hermes_cli.")
+    }
+    try:
+        yield
+    finally:
+        _reset_modules(("tools", "agent", "hermes_cli"))
+        sys.modules.update(original_modules)
+
+
 def _install_fake_tools_package(*, credential_mounts=None):
    _reset_modules(("tools", "agent", "hermes_cli"))