Merge pull request #1614 from PeterFile/fix/launchd-service-recovery
fix(gateway): recover stale launchd service state
This commit is contained in:
@@ -562,6 +562,12 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
|
||||
scope_flag = " --system" if system else ""
|
||||
|
||||
if unit_path.exists() and not force:
|
||||
if not systemd_unit_is_current(system=system):
|
||||
print(f"↻ Repairing outdated {_service_scope_label(system)} systemd service at: {unit_path}")
|
||||
refresh_systemd_unit_if_needed(system=system)
|
||||
subprocess.run(_systemctl_cmd(system) + ["enable", get_service_name()], check=True)
|
||||
print(f"✓ {_service_scope_label(system).capitalize()} service definition updated")
|
||||
return
|
||||
print(f"Service already installed at: {unit_path}")
|
||||
print("Use --force to reinstall")
|
||||
return
|
||||
@@ -787,6 +793,11 @@ def launchd_install(force: bool = False):
|
||||
plist_path = get_launchd_plist_path()
|
||||
|
||||
if plist_path.exists() and not force:
|
||||
if not launchd_plist_is_current():
|
||||
print(f"↻ Repairing outdated launchd service at: {plist_path}")
|
||||
refresh_launchd_plist_if_needed()
|
||||
print("✓ Service definition updated")
|
||||
return
|
||||
print(f"Service already installed at: {plist_path}")
|
||||
print("Use --force to reinstall")
|
||||
return
|
||||
@@ -816,7 +827,15 @@ def launchd_uninstall():
|
||||
|
||||
def launchd_start():
|
||||
refresh_launchd_plist_if_needed()
|
||||
subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True)
|
||||
plist_path = get_launchd_plist_path()
|
||||
try:
|
||||
subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
if e.returncode != 3 or not plist_path.exists():
|
||||
raise
|
||||
print("↻ launchd job was unloaded; reloading service definition")
|
||||
subprocess.run(["launchctl", "load", str(plist_path)], check=True)
|
||||
subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True)
|
||||
print("✓ Service started")
|
||||
|
||||
def launchd_stop():
|
||||
@@ -824,22 +843,36 @@ def launchd_stop():
|
||||
print("✓ Service stopped")
|
||||
|
||||
def launchd_restart():
|
||||
refresh_launchd_plist_if_needed()
|
||||
launchd_stop()
|
||||
try:
|
||||
launchd_stop()
|
||||
except subprocess.CalledProcessError as e:
|
||||
if e.returncode != 3:
|
||||
raise
|
||||
print("↻ launchd job was unloaded; skipping stop")
|
||||
launchd_start()
|
||||
|
||||
def launchd_status(deep: bool = False):
|
||||
plist_path = get_launchd_plist_path()
|
||||
result = subprocess.run(
|
||||
["launchctl", "list", "ai.hermes.gateway"],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
print(f"Launchd plist: {plist_path}")
|
||||
if launchd_plist_is_current():
|
||||
print("✓ Service definition matches the current Hermes install")
|
||||
else:
|
||||
print("⚠ Service definition is stale relative to the current Hermes install")
|
||||
print(" Run: hermes gateway start")
|
||||
|
||||
if result.returncode == 0:
|
||||
print("✓ Gateway service is loaded")
|
||||
print(result.stdout)
|
||||
else:
|
||||
print("✗ Gateway service is not loaded")
|
||||
print(" Service definition exists locally but launchd has not loaded it.")
|
||||
print(" Run: hermes gateway start")
|
||||
|
||||
if deep:
|
||||
log_file = get_hermes_home() / "logs" / "gateway.log"
|
||||
@@ -1555,14 +1588,17 @@ def gateway_command(args):
|
||||
# Try service first, fall back to killing and restarting
|
||||
service_available = False
|
||||
system = getattr(args, 'system', False)
|
||||
service_configured = False
|
||||
|
||||
if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
|
||||
service_configured = True
|
||||
try:
|
||||
systemd_restart(system=system)
|
||||
service_available = True
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
elif is_macos() and get_launchd_plist_path().exists():
|
||||
service_configured = True
|
||||
try:
|
||||
launchd_restart()
|
||||
service_available = True
|
||||
@@ -1586,6 +1622,13 @@ def gateway_command(args):
|
||||
print(" hermes gateway restart")
|
||||
return
|
||||
|
||||
if service_configured:
|
||||
print()
|
||||
print("✗ Gateway service restart failed.")
|
||||
print(" The service definition exists, but the service manager did not recover it.")
|
||||
print(" Fix the service, then retry: hermes gateway start")
|
||||
sys.exit(1)
|
||||
|
||||
# Manual restart: kill existing processes
|
||||
killed = kill_gateway_processes()
|
||||
if killed:
|
||||
|
||||
@@ -7,6 +7,29 @@ import hermes_cli.gateway as gateway_cli
|
||||
|
||||
|
||||
class TestSystemdServiceRefresh:
|
||||
def test_systemd_install_repairs_outdated_unit_without_force(self, tmp_path, monkeypatch):
|
||||
unit_path = tmp_path / "hermes-gateway.service"
|
||||
unit_path.write_text("old unit\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
|
||||
monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_run(cmd, check=True, **kwargs):
|
||||
calls.append(cmd)
|
||||
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
||||
|
||||
gateway_cli.systemd_install()
|
||||
|
||||
assert unit_path.read_text(encoding="utf-8") == "new unit\n"
|
||||
assert calls[:2] == [
|
||||
["systemctl", "--user", "daemon-reload"],
|
||||
["systemctl", "--user", "enable", gateway_cli.get_service_name()],
|
||||
]
|
||||
|
||||
def test_systemd_start_refreshes_outdated_unit(self, tmp_path, monkeypatch):
|
||||
unit_path = tmp_path / "hermes-gateway.service"
|
||||
unit_path.write_text("old unit\n", encoding="utf-8")
|
||||
@@ -96,6 +119,71 @@ class TestGatewayStopCleanup:
|
||||
assert kill_calls == [False]
|
||||
|
||||
|
||||
class TestLaunchdServiceRecovery:
|
||||
def test_launchd_install_repairs_outdated_plist_without_force(self, tmp_path, monkeypatch):
|
||||
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
||||
plist_path.write_text("<plist>old content</plist>", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_run(cmd, check=False, **kwargs):
|
||||
calls.append(cmd)
|
||||
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
||||
|
||||
gateway_cli.launchd_install()
|
||||
|
||||
assert "--replace" in plist_path.read_text(encoding="utf-8")
|
||||
assert calls[:2] == [
|
||||
["launchctl", "unload", str(plist_path)],
|
||||
["launchctl", "load", str(plist_path)],
|
||||
]
|
||||
|
||||
def test_launchd_start_reloads_unloaded_job_and_retries(self, tmp_path, monkeypatch):
|
||||
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
||||
plist_path.write_text(gateway_cli.generate_launchd_plist(), encoding="utf-8")
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_run(cmd, check=False, **kwargs):
|
||||
calls.append(cmd)
|
||||
if cmd == ["launchctl", "start", "ai.hermes.gateway"] and calls.count(cmd) == 1:
|
||||
raise gateway_cli.subprocess.CalledProcessError(3, cmd, stderr="Could not find service")
|
||||
return SimpleNamespace(returncode=0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
||||
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
||||
|
||||
gateway_cli.launchd_start()
|
||||
|
||||
assert calls == [
|
||||
["launchctl", "start", "ai.hermes.gateway"],
|
||||
["launchctl", "load", str(plist_path)],
|
||||
["launchctl", "start", "ai.hermes.gateway"],
|
||||
]
|
||||
|
||||
def test_launchd_status_reports_local_stale_plist_when_unloaded(self, tmp_path, monkeypatch, capsys):
|
||||
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
||||
plist_path.write_text("<plist>old content</plist>", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli.subprocess,
|
||||
"run",
|
||||
lambda *args, **kwargs: SimpleNamespace(returncode=113, stdout="", stderr="Could not find service"),
|
||||
)
|
||||
|
||||
gateway_cli.launchd_status()
|
||||
|
||||
output = capsys.readouterr().out
|
||||
assert str(plist_path) in output
|
||||
assert "stale" in output.lower()
|
||||
assert "not loaded" in output.lower()
|
||||
|
||||
|
||||
class TestGatewayServiceDetection:
|
||||
def test_is_service_running_checks_system_scope_when_user_scope_is_inactive(self, monkeypatch):
|
||||
user_unit = SimpleNamespace(exists=lambda: True)
|
||||
@@ -158,6 +246,34 @@ class TestGatewaySystemServiceRouting:
|
||||
|
||||
assert calls == [(False, False)]
|
||||
|
||||
def test_gateway_restart_does_not_fallback_to_foreground_when_launchd_restart_fails(self, tmp_path, monkeypatch):
|
||||
plist_path = tmp_path / "ai.hermes.gateway.plist"
|
||||
plist_path.write_text("plist\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_cli, "is_linux", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "is_macos", lambda: True)
|
||||
monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli,
|
||||
"launchd_restart",
|
||||
lambda: (_ for _ in ()).throw(
|
||||
gateway_cli.subprocess.CalledProcessError(5, ["launchctl", "start", "ai.hermes.gateway"])
|
||||
),
|
||||
)
|
||||
|
||||
run_calls = []
|
||||
monkeypatch.setattr(gateway_cli, "run_gateway", lambda verbose=False, replace=False: run_calls.append((verbose, replace)))
|
||||
monkeypatch.setattr(gateway_cli, "kill_gateway_processes", lambda force=False: 0)
|
||||
|
||||
try:
|
||||
gateway_cli.gateway_command(SimpleNamespace(gateway_command="restart", system=False))
|
||||
except SystemExit as exc:
|
||||
assert exc.code == 1
|
||||
else:
|
||||
raise AssertionError("Expected gateway_command to exit when service restart fails")
|
||||
|
||||
assert run_calls == []
|
||||
|
||||
|
||||
class TestEnsureUserSystemdEnv:
|
||||
"""Tests for _ensure_user_systemd_env() D-Bus session bus auto-detection."""
|
||||
|
||||
Reference in New Issue
Block a user