fix(agent): prevent AsyncOpenAI/httpx cross-loop deadlock in gateway mode (#2701)

In gateway mode, async tools (vision_analyze, web_extract, session_search) deadlock because _run_async() spawns a thread with asyncio.run(), creating a new event loop, but _get_cached_client() returns an AsyncOpenAI client bound to a different loop. httpx.AsyncClient cannot work across event loop boundaries, causing await client.chat.completions.create() to hang forever. Fix: include the event loop identity in the async client cache key so each loop gets its own AsyncOpenAI instance. Also fix session_search_tool.py which had its own broken asyncio.run()-in-thread pattern — now uses the centralized _run_async() bridge.
2026-03-25 17:31:56 -07:00
parent 0d7f739675
commit 281100e2df
3 changed files with 218 additions and 16 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1258,13 +1258,33 @@ def _get_cached_client(
    base_url: str = None,
    api_key: str = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
-    """Get or create a cached client for the given provider."""
-    cache_key = (provider, async_mode, base_url or "", api_key or "")
+    """Get or create a cached client for the given provider.
+
+    Async clients (AsyncOpenAI) use httpx.AsyncClient internally, which
+    binds to the event loop that was current when the client was created.
+    Using such a client on a *different* loop causes deadlocks or
+    RuntimeError.  To prevent cross-loop issues (especially in gateway
+    mode where _run_async() may spawn fresh loops in worker threads), the
+    cache key for async clients includes the current event loop's identity
+    so each loop gets its own client instance.
+    """
+    # Include loop identity for async clients to prevent cross-loop reuse.
+    # httpx.AsyncClient (inside AsyncOpenAI) is bound to the loop where it
+    # was created — reusing it on a different loop causes deadlocks (#2681).
+    loop_id = 0
+    current_loop = None
+    if async_mode:
+        try:
+            import asyncio as _aio
+            current_loop = _aio.get_event_loop()
+            loop_id = id(current_loop)
+        except RuntimeError:
+            pass
+    cache_key = (provider, async_mode, base_url or "", api_key or "", loop_id)
    with _client_cache_lock:
        if cache_key in _client_cache:
            cached_client, cached_default, cached_loop = _client_cache[cache_key]
            if async_mode:
-                # Async clients are bound to the event loop that created them.
                # A cached async client whose loop has been closed will raise
                # "Event loop is closed" when httpx tries to clean up its
                # transport.  Discard the stale client and create a fresh one.
@@ -1286,13 +1306,7 @@ def _get_cached_client(
    if client is not None:
        # For async clients, remember which loop they were created on so we
        # can detect stale entries later.
-        bound_loop = None
-        if async_mode:
-            try:
-                import asyncio as _aio
-                bound_loop = _aio.get_event_loop()
-            except RuntimeError:
-                pass
+        bound_loop = current_loop
        with _client_cache_lock:
            if cache_key not in _client_cache:
                _client_cache[cache_key] = (client, default_model, bound_loop)
--- a/tests/test_crossloop_client_cache.py
+++ b/tests/test_crossloop_client_cache.py
@@ -0,0 +1,186 @@
+"""Tests for cross-loop client cache isolation fix (#2681).
+
+Verifies that _get_cached_client() returns different AsyncOpenAI clients
+when called from different event loops, preventing the httpx deadlock
+that occurs when a cached async client bound to loop A is reused on loop B.
+
+This test file is self-contained and does not import the full tool chain,
+so it can run without optional dependencies like firecrawl.
+"""
+
+import asyncio
+import threading
+from concurrent.futures import ThreadPoolExecutor
+from unittest.mock import patch, MagicMock
+from types import SimpleNamespace
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Minimal stubs so we can import _get_cached_client without the full tree
+# ---------------------------------------------------------------------------
+
+def _stub_resolve_provider_client(provider, model, async_mode, **kw):
+    """Return a unique mock client each time, simulating AsyncOpenAI creation."""
+    client = MagicMock(name=f"client-{provider}-async={async_mode}")
+    client.api_key = "test"
+    client.base_url = kw.get("explicit_base_url", "http://localhost:8081/v1")
+    return client, model or "test-model"
+
+
+@pytest.fixture(autouse=True)
+def _clean_client_cache():
+    """Clear the client cache before each test."""
+    import importlib
+    # We need to patch before importing
+    with patch.dict("sys.modules", {}):
+        pass
+    # Import and clear
+    import agent.auxiliary_client as ac
+    ac._client_cache.clear()
+    yield
+    ac._client_cache.clear()
+
+
+class TestCrossLoopCacheIsolation:
+    """Verify async clients are cached per-event-loop, not globally."""
+
+    def test_same_loop_reuses_client(self):
+        """Within a single event loop, the same client should be returned."""
+        from agent.auxiliary_client import _get_cached_client, _client_cache
+
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+
+        with patch("agent.auxiliary_client.resolve_provider_client",
+                    side_effect=_stub_resolve_provider_client):
+            client1, _ = _get_cached_client("custom", "m1", async_mode=True,
+                                             base_url="http://localhost:8081/v1")
+            client2, _ = _get_cached_client("custom", "m1", async_mode=True,
+                                             base_url="http://localhost:8081/v1")
+
+        assert client1 is client2, (
+            "Same loop should return the same cached client"
+        )
+        loop.close()
+
+    def test_different_loops_get_different_clients(self):
+        """Different event loops must get separate client instances."""
+        from agent.auxiliary_client import _get_cached_client
+
+        results = {}
+
+        def _get_client_on_new_loop(name):
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            with patch("agent.auxiliary_client.resolve_provider_client",
+                        side_effect=_stub_resolve_provider_client):
+                client, _ = _get_cached_client("custom", "m1", async_mode=True,
+                                                 base_url="http://localhost:8081/v1")
+            results[name] = (id(client), id(loop))
+            # Don't close loop — simulates real usage where loops persist
+
+        t1 = threading.Thread(target=_get_client_on_new_loop, args=("a",))
+        t2 = threading.Thread(target=_get_client_on_new_loop, args=("b",))
+        t1.start(); t1.join()
+        t2.start(); t2.join()
+
+        client_id_a, loop_id_a = results["a"]
+        client_id_b, loop_id_b = results["b"]
+
+        assert loop_id_a != loop_id_b, "Test setup error: same loop on both threads"
+        assert client_id_a != client_id_b, (
+            "Different event loops got the SAME cached client — this causes "
+            "httpx cross-loop deadlocks in gateway mode (#2681)"
+        )
+
+    def test_sync_clients_not_affected(self):
+        """Sync clients (async_mode=False) should still be cached globally,
+        since httpx.Client (sync) doesn't bind to an event loop."""
+        from agent.auxiliary_client import _get_cached_client
+
+        results = {}
+
+        def _get_sync_client(name):
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            with patch("agent.auxiliary_client.resolve_provider_client",
+                        side_effect=_stub_resolve_provider_client):
+                client, _ = _get_cached_client("custom", "m1", async_mode=False,
+                                                 base_url="http://localhost:8081/v1")
+            results[name] = id(client)
+
+        t1 = threading.Thread(target=_get_sync_client, args=("a",))
+        t2 = threading.Thread(target=_get_sync_client, args=("b",))
+        t1.start(); t1.join()
+        t2.start(); t2.join()
+
+        assert results["a"] == results["b"], (
+            "Sync clients should be shared across threads (no loop binding)"
+        )
+
+    def test_gateway_simulation_no_deadlock(self):
+        """Simulate gateway mode: _run_async spawns a thread with asyncio.run(),
+        which creates a new loop. The cached client must be created on THAT loop,
+        not reused from a different one."""
+        from agent.auxiliary_client import _get_cached_client
+
+        # Simulate: first call on "gateway loop"
+        gateway_loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(gateway_loop)
+
+        with patch("agent.auxiliary_client.resolve_provider_client",
+                    side_effect=_stub_resolve_provider_client):
+            gateway_client, _ = _get_cached_client("custom", "m1", async_mode=True,
+                                                     base_url="http://localhost:8081/v1")
+
+        # Simulate: _run_async spawns a thread with asyncio.run()
+        worker_client_id = [None]
+        def _worker():
+            async def _inner():
+                with patch("agent.auxiliary_client.resolve_provider_client",
+                            side_effect=_stub_resolve_provider_client):
+                    client, _ = _get_cached_client("custom", "m1", async_mode=True,
+                                                     base_url="http://localhost:8081/v1")
+                worker_client_id[0] = id(client)
+            asyncio.run(_inner())
+
+        t = threading.Thread(target=_worker)
+        t.start()
+        t.join()
+
+        assert worker_client_id[0] != id(gateway_client), (
+            "Worker thread (asyncio.run) got the gateway's cached client — "
+            "this is the exact cross-loop scenario that causes httpx deadlocks. "
+            "The cache key must include the event loop identity (#2681)"
+        )
+        gateway_loop.close()
+
+    def test_closed_loop_client_discarded(self):
+        """A cached client whose loop has closed should be replaced."""
+        from agent.auxiliary_client import _get_cached_client
+
+        loop1 = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop1)
+
+        with patch("agent.auxiliary_client.resolve_provider_client",
+                    side_effect=_stub_resolve_provider_client):
+            client1, _ = _get_cached_client("custom", "m1", async_mode=True,
+                                             base_url="http://localhost:8081/v1")
+
+        loop1.close()
+
+        # New loop on same thread
+        loop2 = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop2)
+
+        with patch("agent.auxiliary_client.resolve_provider_client",
+                    side_effect=_stub_resolve_provider_client):
+            client2, _ = _get_cached_client("custom", "m1", async_mode=True,
+                                             base_url="http://localhost:8081/v1")
+
+        assert client1 is not client2, (
+            "Client from closed loop should not be reused"
+        )
+        loop2.close()
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@@ -358,12 +358,14 @@ def session_search(
            return await asyncio.gather(*coros, return_exceptions=True)

        try:
-            asyncio.get_running_loop()
-            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-                results = pool.submit(lambda: asyncio.run(_summarize_all())).result(timeout=60)
-        except RuntimeError:
-            # No event loop running, create a new one
-            results = asyncio.run(_summarize_all())
+            # Use _run_async() which properly manages event loops across
+            # CLI, gateway, and worker-thread contexts.  The previous
+            # pattern (asyncio.run() in a ThreadPoolExecutor) created a
+            # disposable event loop that conflicted with cached
+            # AsyncOpenAI/httpx clients bound to a different loop,
+            # causing deadlocks in gateway mode (#2681).
+            from model_tools import _run_async
+            results = _run_async(_summarize_all())
        except concurrent.futures.TimeoutError:
            logging.warning(
                "Session summarization timed out after 60 seconds",