""" Gateway runtime status helpers. Provides PID-file based detection of whether the gateway daemon is running, used by send_message's check_fn to gate availability in the CLI. The PID file lives at ``{HERMES_HOME}/gateway.pid``. HERMES_HOME defaults to ``~/.hermes`` but can be overridden via the environment variable. This means separate HERMES_HOME directories naturally get separate PID files — a property that will be useful when we add named profiles (multiple agents running concurrently under distinct configurations). """ import json import os import sys from pathlib import Path from typing import Optional _GATEWAY_KIND = "hermes-gateway" def _get_pid_path() -> Path: """Return the path to the gateway PID file, respecting HERMES_HOME.""" home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) return home / "gateway.pid" def _get_process_start_time(pid: int) -> Optional[int]: """Return the kernel start time for a process when available.""" stat_path = Path(f"/proc/{pid}/stat") try: # Field 22 in /proc//stat is process start time (clock ticks). return int(stat_path.read_text().split()[21]) except (FileNotFoundError, IndexError, PermissionError, ValueError, OSError): return None def _read_process_cmdline(pid: int) -> Optional[str]: """Return the process command line as a space-separated string.""" cmdline_path = Path(f"/proc/{pid}/cmdline") try: raw = cmdline_path.read_bytes() except (FileNotFoundError, PermissionError, OSError): return None if not raw: return None return raw.replace(b"\x00", b" ").decode("utf-8", errors="ignore").strip() def _looks_like_gateway_process(pid: int) -> bool: """Return True when the live PID still looks like the Hermes gateway.""" cmdline = _read_process_cmdline(pid) if not cmdline: # If we cannot inspect the process, fall back to the liveness check. return True patterns = ( "hermes_cli.main gateway", "hermes gateway", "gateway/run.py", ) return any(pattern in cmdline for pattern in patterns) def _build_pid_record() -> dict: return { "pid": os.getpid(), "kind": _GATEWAY_KIND, "argv": list(sys.argv), "start_time": _get_process_start_time(os.getpid()), } def _read_pid_record() -> Optional[dict]: pid_path = _get_pid_path() if not pid_path.exists(): return None raw = pid_path.read_text().strip() if not raw: return None try: payload = json.loads(raw) except json.JSONDecodeError: try: return {"pid": int(raw)} except ValueError: return None if isinstance(payload, int): return {"pid": payload} if isinstance(payload, dict): return payload return None def write_pid_file() -> None: """Write the current process PID and metadata to the gateway PID file.""" pid_path = _get_pid_path() pid_path.parent.mkdir(parents=True, exist_ok=True) pid_path.write_text(json.dumps(_build_pid_record())) def remove_pid_file() -> None: """Remove the gateway PID file if it exists.""" try: _get_pid_path().unlink(missing_ok=True) except Exception: pass def get_running_pid() -> Optional[int]: """Return the PID of a running gateway instance, or ``None``. Checks the PID file and verifies the process is actually alive. Cleans up stale PID files automatically. """ record = _read_pid_record() if not record: remove_pid_file() return None try: pid = int(record["pid"]) except (KeyError, TypeError, ValueError): remove_pid_file() return None try: os.kill(pid, 0) # signal 0 = existence check, no actual signal sent except (ProcessLookupError, PermissionError): remove_pid_file() return None recorded_start = record.get("start_time") current_start = _get_process_start_time(pid) if recorded_start is not None and current_start is not None and current_start != recorded_start: remove_pid_file() return None if not _looks_like_gateway_process(pid): remove_pid_file() return None return pid def is_gateway_running() -> bool: """Check if the gateway daemon is currently running.""" return get_running_pid() is not None