fix(gateway): close adapter resources when connect() fails or raises (#12339)

Gateway startup leaks aiohttp.ClientSession (and other partial-init
resources) when an adapter's connect() returns False or raises. The
adapter is never added to self.adapters, so the shutdown path at
gateway/run.py:2426 never calls disconnect() on it — Python GC later
logs 'Unclosed client session' at process exit.

Seen on 2026-04-18 18:08:16 during a double --replace takeover cycle:
one of the partial-init sessions survived past shutdown and emitted
the warning right before status=75/TEMPFAIL.

Fix:
- New GatewayRunner._safe_adapter_disconnect() helper — calls
  adapter.disconnect() and swallows any exception. Used on error paths.
- Connect loop calls it in both failure branches: success=False and
  except Exception.
- Adapter disconnect() implementations are already expected to be
  idempotent and tolerate partial-init state (they all guard on
  self._http_session / self._bridge_process before touching them).

Tests: tests/gateway/test_safe_adapter_disconnect.py — 3 cases verify
the helper forwards to disconnect, swallows exceptions, and tolerates
platform=None.
This commit is contained in:
Teknium
2026-04-18 18:53:31 -07:00
committed by GitHub
parent 632a807a3e
commit beabbd87ef
2 changed files with 92 additions and 0 deletions

View File

@@ -752,6 +752,26 @@ class GatewayRunner:
chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
)
async def _safe_adapter_disconnect(self, adapter, platform) -> None:
"""Call adapter.disconnect() defensively, swallowing any error.
Used when adapter.connect() failed or raised — the adapter may
have allocated partial resources (aiohttp.ClientSession, poll
tasks, child subprocesses) that would otherwise leak and surface
as "Unclosed client session" warnings at process exit.
Must tolerate partial-init state and never raise, since callers
use it inside error-handling blocks.
"""
try:
await adapter.disconnect()
except Exception as e:
logger.debug(
"Defensive %s disconnect after failed connect raised: %s",
platform.value if platform is not None else "adapter",
e,
)
# -----------------------------------------------------------------
def _flush_memories_for_session(
@@ -1913,6 +1933,15 @@ class GatewayRunner:
logger.info("%s connected", platform.value)
else:
logger.warning("%s failed to connect", platform.value)
# Defensive cleanup: a failed connect() may have
# allocated resources (aiohttp.ClientSession, poll
# tasks, bridge subprocesses) before giving up.
# Without this call, those resources are orphaned
# and Python logs "Unclosed client session" at
# process exit. Adapter disconnect() implementations
# are expected to be idempotent and tolerate
# partial-init state.
await self._safe_adapter_disconnect(adapter, platform)
if adapter.has_fatal_error:
self._update_platform_runtime_status(
platform.value,
@@ -1953,6 +1982,10 @@ class GatewayRunner:
}
except Exception as e:
logger.error("%s error: %s", platform.value, e)
# Same defensive cleanup path for exceptions — an adapter
# that raised mid-connect may still have a live
# aiohttp.ClientSession or child subprocess.
await self._safe_adapter_disconnect(adapter, platform)
self._update_platform_runtime_status(
platform.value,
platform_state="retrying",

View File

@@ -0,0 +1,59 @@
"""Regression tests: failed-connect path must call adapter.disconnect().
When adapter.connect() returns False or raises, the adapter may have
allocated resources (aiohttp.ClientSession, poll tasks, child
subprocesses) before giving up. Without a defensive disconnect() call
these leak and surface as "Unclosed client session" warnings at
process exit (seen on the 2026-04-18 18:08:16 gateway restart).
The fix: gateway/run.py wraps each adapter connect() with a safety-net
call to _safe_adapter_disconnect() in the failure branches.
"""
from unittest.mock import AsyncMock, MagicMock
import pytest
from gateway.config import Platform
from gateway.run import GatewayRunner
@pytest.fixture
def bare_runner():
"""A GatewayRunner shell that only needs to support _safe_adapter_disconnect."""
return object.__new__(GatewayRunner)
@pytest.mark.asyncio
async def test_safe_disconnect_calls_adapter_disconnect(bare_runner):
"""The helper forwards to adapter.disconnect()."""
adapter = MagicMock()
adapter.disconnect = AsyncMock(return_value=None)
await bare_runner._safe_adapter_disconnect(adapter, Platform.TELEGRAM)
adapter.disconnect.assert_awaited_once()
@pytest.mark.asyncio
async def test_safe_disconnect_swallows_exceptions(bare_runner):
"""An exception in adapter.disconnect() must not propagate — the
caller is already on an error path."""
adapter = MagicMock()
adapter.disconnect = AsyncMock(side_effect=RuntimeError("partial init"))
# Must NOT raise
await bare_runner._safe_adapter_disconnect(adapter, Platform.TELEGRAM)
adapter.disconnect.assert_awaited_once()
@pytest.mark.asyncio
async def test_safe_disconnect_handles_none_platform(bare_runner):
"""Logging path must tolerate platform=None."""
adapter = MagicMock()
adapter.disconnect = AsyncMock(side_effect=ValueError("nope"))
await bare_runner._safe_adapter_disconnect(adapter, None)
adapter.disconnect.assert_awaited_once()