Four fixes for MCP server stability issues reported by community member (terminal lockup, zombie processes, escape sequence pollution, startup hang): 1. MCP reload timeout guard (cli.py): _check_config_mcp_changes now runs _reload_mcp in a separate daemon thread with a 30s hard timeout. Previously, a hung MCP server could block the process_loop thread indefinitely, freezing the entire TUI (user can type but nothing happens, only Ctrl+D/Ctrl+\ work). 2. MCP stdio subprocess PID tracking (mcp_tool.py): Tracks child PIDs spawned by stdio_client via before/after snapshots of /proc children. On shutdown, _stop_mcp_loop force-kills any tracked PIDs that survived the SDK's graceful SIGTERM→SIGKILL cleanup. Prevents zombie MCP server processes from accumulating across sessions. 3. MCP event loop exception handler (mcp_tool.py): Installs _mcp_loop_exception_handler on the MCP background event loop — same pattern as the existing _suppress_closed_loop_errors on prompt_toolkit's loop. Suppresses benign 'Event loop is closed' RuntimeError from httpx transport __del__ during MCP shutdown. Salvaged from PR #2538 (acsezen). 4. MCP OAuth non-blocking (mcp_oauth.py): Replaces blocking input() call in _wait_for_callback with OAuthNonInteractiveError raise. Adds _is_interactive() TTY detection. In non-interactive environments, build_oauth_auth() still returns a provider (cached tokens + refresh work), but the callback handler raises immediately instead of blocking the MCP event loop for 120s. Re-raises OAuth setup failures in _run_http so failed servers are reported cleanly without blocking others. Salvaged from PRs #4521 (voidborne-d) and #4465 (heathley). Closes #2537, closes #4462 Related: #4128, #3436
144 lines
5.3 KiB
Python
144 lines
5.3 KiB
Python
"""Tests for MCP stability fixes — event loop handler, PID tracking, shutdown robustness."""
|
|
|
|
import asyncio
|
|
import os
|
|
import signal
|
|
import threading
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
import pytest
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fix 1: MCP event loop exception handler
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestMCPLoopExceptionHandler:
|
|
"""_mcp_loop_exception_handler suppresses benign 'Event loop is closed'."""
|
|
|
|
def test_suppresses_event_loop_closed(self):
|
|
from tools.mcp_tool import _mcp_loop_exception_handler
|
|
loop = MagicMock()
|
|
context = {"exception": RuntimeError("Event loop is closed")}
|
|
# Should NOT call default handler
|
|
_mcp_loop_exception_handler(loop, context)
|
|
loop.default_exception_handler.assert_not_called()
|
|
|
|
def test_forwards_other_runtime_errors(self):
|
|
from tools.mcp_tool import _mcp_loop_exception_handler
|
|
loop = MagicMock()
|
|
context = {"exception": RuntimeError("some other error")}
|
|
_mcp_loop_exception_handler(loop, context)
|
|
loop.default_exception_handler.assert_called_once_with(context)
|
|
|
|
def test_forwards_non_runtime_errors(self):
|
|
from tools.mcp_tool import _mcp_loop_exception_handler
|
|
loop = MagicMock()
|
|
context = {"exception": ValueError("bad value")}
|
|
_mcp_loop_exception_handler(loop, context)
|
|
loop.default_exception_handler.assert_called_once_with(context)
|
|
|
|
def test_forwards_contexts_without_exception(self):
|
|
from tools.mcp_tool import _mcp_loop_exception_handler
|
|
loop = MagicMock()
|
|
context = {"message": "just a message"}
|
|
_mcp_loop_exception_handler(loop, context)
|
|
loop.default_exception_handler.assert_called_once_with(context)
|
|
|
|
def test_handler_installed_on_mcp_loop(self):
|
|
"""_ensure_mcp_loop installs the exception handler on the new loop."""
|
|
import tools.mcp_tool as mcp_mod
|
|
try:
|
|
mcp_mod._ensure_mcp_loop()
|
|
with mcp_mod._lock:
|
|
loop = mcp_mod._mcp_loop
|
|
assert loop is not None
|
|
assert loop.get_exception_handler() is mcp_mod._mcp_loop_exception_handler
|
|
finally:
|
|
mcp_mod._stop_mcp_loop()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fix 2: stdio PID tracking
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestStdioPidTracking:
|
|
"""_snapshot_child_pids and _stdio_pids track subprocess PIDs."""
|
|
|
|
def test_snapshot_returns_set(self):
|
|
from tools.mcp_tool import _snapshot_child_pids
|
|
result = _snapshot_child_pids()
|
|
assert isinstance(result, set)
|
|
# All elements should be ints
|
|
for pid in result:
|
|
assert isinstance(pid, int)
|
|
|
|
def test_stdio_pids_starts_empty(self):
|
|
from tools.mcp_tool import _stdio_pids, _lock
|
|
with _lock:
|
|
# Might have residual state from other tests, just check type
|
|
assert isinstance(_stdio_pids, set)
|
|
|
|
def test_kill_orphaned_noop_when_empty(self):
|
|
"""_kill_orphaned_mcp_children does nothing when no PIDs tracked."""
|
|
from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock
|
|
|
|
with _lock:
|
|
_stdio_pids.clear()
|
|
|
|
# Should not raise
|
|
_kill_orphaned_mcp_children()
|
|
|
|
def test_kill_orphaned_handles_dead_pids(self):
|
|
"""_kill_orphaned_mcp_children gracefully handles already-dead PIDs."""
|
|
from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock
|
|
|
|
# Use a PID that definitely doesn't exist
|
|
fake_pid = 999999999
|
|
with _lock:
|
|
_stdio_pids.add(fake_pid)
|
|
|
|
# Should not raise (ProcessLookupError is caught)
|
|
_kill_orphaned_mcp_children()
|
|
|
|
with _lock:
|
|
assert fake_pid not in _stdio_pids
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fix 3: MCP reload timeout (cli.py)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestMCPReloadTimeout:
|
|
"""_check_config_mcp_changes uses a timeout on _reload_mcp."""
|
|
|
|
def test_reload_timeout_does_not_block_forever(self, tmp_path, monkeypatch):
|
|
"""If _reload_mcp hangs, the config watcher times out and returns."""
|
|
import time
|
|
|
|
# Create a mock HermesCLI-like object with the needed attributes
|
|
class FakeCLI:
|
|
_config_mtime = 0.0
|
|
_config_mcp_servers = {}
|
|
_last_config_check = 0.0
|
|
_command_running = False
|
|
config = {}
|
|
agent = None
|
|
|
|
def _reload_mcp(self):
|
|
# Simulate a hang — sleep longer than the timeout
|
|
time.sleep(60)
|
|
|
|
def _slow_command_status(self, cmd):
|
|
return cmd
|
|
|
|
# This test verifies the timeout mechanism exists in the code
|
|
# by checking that _check_config_mcp_changes doesn't call
|
|
# _reload_mcp directly (it uses a thread now)
|
|
import inspect
|
|
from cli import HermesCLI
|
|
source = inspect.getsource(HermesCLI._check_config_mcp_changes)
|
|
# The fix adds threading.Thread for _reload_mcp
|
|
assert "Thread" in source or "thread" in source.lower(), \
|
|
"_check_config_mcp_changes should use a thread for _reload_mcp"
|