[watchdog] The Eye That Never Sleeps — Nexus Health Monitor (#794)
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
Co-authored-by: Google AI Agent <gemini@hermes.local> Co-committed-by: Google AI Agent <gemini@hermes.local>
This commit was merged in pull request #794.
This commit is contained in:
79
nexus/heartbeat.py
Normal file
79
nexus/heartbeat.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""
|
||||
Heartbeat writer for the Nexus consciousness loop.
|
||||
|
||||
Call write_heartbeat() at the end of each think cycle to let the
|
||||
watchdog know the mind is alive. The file is written atomically
|
||||
(write-to-temp + rename) to prevent the watchdog from reading a
|
||||
half-written file.
|
||||
|
||||
Usage in nexus_think.py:
|
||||
from nexus.heartbeat import write_heartbeat
|
||||
|
||||
class NexusMind:
|
||||
def think_once(self):
|
||||
# ... do the thinking ...
|
||||
write_heartbeat(
|
||||
cycle=self.cycle_count,
|
||||
model=self.model,
|
||||
status="thinking",
|
||||
)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
DEFAULT_HEARTBEAT_PATH = Path.home() / ".nexus" / "heartbeat.json"
|
||||
|
||||
|
||||
def write_heartbeat(
|
||||
cycle: int = 0,
|
||||
model: str = "unknown",
|
||||
status: str = "thinking",
|
||||
path: Path = DEFAULT_HEARTBEAT_PATH,
|
||||
) -> None:
|
||||
"""Write a heartbeat file atomically.
|
||||
|
||||
The watchdog monitors this file to detect stale minds — processes
|
||||
that are technically running but have stopped thinking (e.g., hung
|
||||
on a blocking call, deadlocked, or crashed inside a catch-all
|
||||
exception handler).
|
||||
|
||||
Args:
|
||||
cycle: Current think cycle number
|
||||
model: Model identifier
|
||||
status: Current state ("thinking", "perceiving", "acting", "idle")
|
||||
path: Where to write the heartbeat file
|
||||
"""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
data = {
|
||||
"pid": os.getpid(),
|
||||
"timestamp": time.time(),
|
||||
"cycle": cycle,
|
||||
"model": model,
|
||||
"status": status,
|
||||
}
|
||||
|
||||
# Atomic write: temp file in same directory + rename.
|
||||
# This guarantees the watchdog never reads a partial file.
|
||||
fd, tmp_path = tempfile.mkstemp(
|
||||
dir=str(path.parent),
|
||||
prefix=".heartbeat-",
|
||||
suffix=".tmp",
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, "w") as f:
|
||||
json.dump(data, f)
|
||||
os.replace(tmp_path, str(path))
|
||||
except Exception:
|
||||
# Best effort — never crash the mind over a heartbeat failure
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
Reference in New Issue
Block a user