From fc061c2fee59e00227b512e9b132c745988c6d57 Mon Sep 17 00:00:00 2001 From: Test Date: Thu, 19 Mar 2026 18:26:09 -0700 Subject: [PATCH] fix: harden sentinel guard for /stop during setup and shutdown - /stop during sentinel returns helpful message instead of queuing - Shutdown loop skips sentinel entries instead of catching AttributeError - _handle_stop_command guards against sentinel (defensive) - Added tests for both edge cases (7 total race guard tests) --- gateway/run.py | 16 ++++-- tests/gateway/test_session_race_guard.py | 70 ++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 08beabc91..679bc6c6f 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1056,6 +1056,8 @@ class GatewayRunner: self._running = False for session_key, agent in list(self._running_agents.items()): + if agent is _AGENT_PENDING_SENTINEL: + continue try: agent.interrupt("Gateway shutting down") logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20]) @@ -1354,8 +1356,12 @@ class GatewayRunner: running_agent = self._running_agents.get(_quick_key) if running_agent is _AGENT_PENDING_SENTINEL: - # Agent is being set up but not ready yet — queue the message - # so it will be picked up after the agent starts. + # Agent is being set up but not ready yet. + if event.get_command() == "stop": + # Nothing to interrupt — agent hasn't started yet. + return "⏳ The agent is still starting up — nothing to stop yet." + # Queue the message so it will be picked up after the + # agent starts. adapter = self.adapters.get(source.platform) if adapter: adapter._pending_messages[_quick_key] = event @@ -2326,8 +2332,10 @@ class GatewayRunner: session_entry = self.session_store.get_or_create_session(source) session_key = session_entry.session_key - if session_key in self._running_agents: - agent = self._running_agents[session_key] + agent = self._running_agents.get(session_key) + if agent is _AGENT_PENDING_SENTINEL: + return "⏳ The agent is still starting up — nothing to stop yet." + if agent: agent.interrupt() return "⚡ Stopping the current task... The agent will finish its current step and respond." else: diff --git a/tests/gateway/test_session_race_guard.py b/tests/gateway/test_session_race_guard.py index 0161b44c7..3c11a1a31 100644 --- a/tests/gateway/test_session_race_guard.py +++ b/tests/gateway/test_session_race_guard.py @@ -195,3 +195,73 @@ async def test_command_messages_do_not_leave_sentinel(): assert session_key not in runner._running_agents, ( "Command handlers must not leave sentinel in _running_agents" ) + + +# ------------------------------------------------------------------ +# Test 6: /stop during sentinel returns helpful message +# ------------------------------------------------------------------ +@pytest.mark.asyncio +async def test_stop_during_sentinel_returns_message(): + """If /stop arrives while the sentinel is set (agent still starting), + it should return a helpful message instead of crashing or queuing.""" + runner = _make_runner() + event1 = _make_event(text="hello") + session_key = build_session_key(event1.source) + + barrier = asyncio.Event() + + async def slow_inner(self_inner, ev, src, qk): + await barrier.wait() + return "ok" + + with patch.object(GatewayRunner, "_handle_message_with_agent", slow_inner): + task1 = asyncio.create_task(runner._handle_message(event1)) + await asyncio.sleep(0) + + # Sentinel should be set + assert runner._running_agents.get(session_key) is _AGENT_PENDING_SENTINEL + + # Send /stop — should get a message, not crash + stop_event = _make_event(text="/stop") + result = await runner._handle_message(stop_event) + assert result is not None, "/stop during sentinel should return a message" + assert "starting up" in result.lower() + + # Should NOT be queued as pending + adapter = runner.adapters[Platform.TELEGRAM] + assert session_key not in adapter._pending_messages + + barrier.set() + await task1 + + +# ------------------------------------------------------------------ +# Test 7: Shutdown skips sentinel entries +# ------------------------------------------------------------------ +@pytest.mark.asyncio +async def test_shutdown_skips_sentinel(): + """During gateway shutdown, sentinel entries in _running_agents + should be skipped without raising AttributeError.""" + runner = _make_runner() + session_key = "telegram:dm:99999" + + # Simulate a sentinel in _running_agents + runner._running_agents[session_key] = _AGENT_PENDING_SENTINEL + + # Also add a real agent mock to verify it still gets interrupted + real_agent = MagicMock() + runner._running_agents["telegram:dm:88888"] = real_agent + + runner.adapters = {} # No adapters to disconnect + runner._running = True + runner._shutdown_event = asyncio.Event() + runner._exit_reason = None + runner._shutdown_all_gateway_honcho = lambda: None + + with patch("gateway.status.remove_pid_file"), \ + patch("gateway.status.write_runtime_status"): + await runner.stop() + + # Real agent should have been interrupted + real_agent.interrupt.assert_called_once() + # Should not have raised on the sentinel