fix(gateway): /restart uses service restart under systemd instead of detached subprocess
The detached bash subprocess spawned by /restart gets killed by systemd's KillMode=mixed cgroup cleanup, leaving the gateway dead. Under systemd (detected via INVOCATION_ID env var), /restart now uses via_service=True which exits with code 75 — RestartForceExitStatus=75 in the unit file makes systemd auto-restart the service. The detached subprocess approach is preserved as fallback for non-systemd environments (Docker, tmux, foreground mode).
This commit is contained in:
@@ -4167,7 +4167,16 @@ class GatewayRunner:
|
||||
logger.debug("Failed to write restart notify file: %s", e)
|
||||
|
||||
active_agents = self._running_agent_count()
|
||||
self.request_restart(detached=True, via_service=False)
|
||||
# When running under a service manager (systemd/launchd), use the
|
||||
# service restart path: exit with code 75 so the service manager
|
||||
# restarts us. The detached subprocess approach (setsid + bash)
|
||||
# doesn't work under systemd because KillMode=mixed kills all
|
||||
# processes in the cgroup, including the detached helper.
|
||||
_under_service = bool(os.environ.get("INVOCATION_ID")) # systemd sets this
|
||||
if _under_service:
|
||||
self.request_restart(detached=False, via_service=True)
|
||||
else:
|
||||
self.request_restart(detached=True, via_service=False)
|
||||
if active_agents:
|
||||
return f"⏳ Draining {active_agents} active agent(s) before restart..."
|
||||
return "♻ Restarting gateway..."
|
||||
|
||||
@@ -47,6 +47,48 @@ async def test_restart_command_writes_notify_file(tmp_path, monkeypatch):
|
||||
assert "thread_id" not in data # no thread → omitted
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_restart_command_uses_service_restart_under_systemd(tmp_path, monkeypatch):
|
||||
"""Under systemd (INVOCATION_ID set), /restart uses via_service=True."""
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
monkeypatch.setenv("INVOCATION_ID", "abc123")
|
||||
|
||||
runner, _adapter = make_restart_runner()
|
||||
runner.request_restart = MagicMock(return_value=True)
|
||||
|
||||
source = make_restart_source(chat_id="42")
|
||||
event = MessageEvent(
|
||||
text="/restart",
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
message_id="m1",
|
||||
)
|
||||
|
||||
await runner._handle_restart_command(event)
|
||||
runner.request_restart.assert_called_once_with(detached=False, via_service=True)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_restart_command_uses_detached_without_systemd(tmp_path, monkeypatch):
|
||||
"""Without systemd, /restart uses the detached subprocess approach."""
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
monkeypatch.delenv("INVOCATION_ID", raising=False)
|
||||
|
||||
runner, _adapter = make_restart_runner()
|
||||
runner.request_restart = MagicMock(return_value=True)
|
||||
|
||||
source = make_restart_source(chat_id="42")
|
||||
event = MessageEvent(
|
||||
text="/restart",
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
message_id="m1",
|
||||
)
|
||||
|
||||
await runner._handle_restart_command(event)
|
||||
runner.request_restart.assert_called_once_with(detached=True, via_service=False)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_restart_command_preserves_thread_id(tmp_path, monkeypatch):
|
||||
"""Thread ID is saved when the requester is in a threaded chat."""
|
||||
|
||||
Reference in New Issue
Block a user