Files
the-nexus/tests/test_cron_heartbeats.py
2026-04-07 14:38:55 +00:00

342 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for the poka-yoke cron heartbeat system.
Covers:
- nexus/cron_heartbeat.py (write utility)
- bin/check_cron_heartbeats.py (meta-heartbeat checker)
Refs: #1096
"""
from __future__ import annotations
import importlib.util
import json
import os
import sys
import time
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
# ── Load modules under test ───────────────────────────────────────────
PROJECT_ROOT = Path(__file__).parent.parent
_hb_spec = importlib.util.spec_from_file_location(
"_cron_heartbeat",
PROJECT_ROOT / "nexus" / "cron_heartbeat.py",
)
_hb = importlib.util.module_from_spec(_hb_spec)
sys.modules["_cron_heartbeat"] = _hb
_hb_spec.loader.exec_module(_hb)
_chk_spec = importlib.util.spec_from_file_location(
"_check_cron_heartbeats",
PROJECT_ROOT / "bin" / "check_cron_heartbeats.py",
)
_chk = importlib.util.module_from_spec(_chk_spec)
sys.modules["_check_cron_heartbeats"] = _chk
_chk_spec.loader.exec_module(_chk)
write_cron_heartbeat = _hb.write_cron_heartbeat
heartbeat_path = _hb.heartbeat_path
scan_heartbeats = _chk.scan_heartbeats
build_report = _chk.build_report
HeartbeatReport = _chk.HeartbeatReport
JobStatus = _chk.JobStatus
_read_job_status = _chk._read_job_status
_fmt_duration = _chk._fmt_duration
# ── nexus/cron_heartbeat.py ───────────────────────────────────────────
class TestWriteCronHeartbeat:
def test_creates_file(self, tmp_path):
"""write_cron_heartbeat creates <job>.last in the given directory."""
path = write_cron_heartbeat("my_job", interval_seconds=300, directory=tmp_path)
assert path == tmp_path / "my_job.last"
assert path.exists()
def test_file_content(self, tmp_path):
"""Written file has all required fields."""
write_cron_heartbeat("my_job", interval_seconds=600, status="ok", directory=tmp_path)
data = json.loads((tmp_path / "my_job.last").read_text())
assert data["job"] == "my_job"
assert data["interval_seconds"] == 600
assert data["status"] == "ok"
assert data["pid"] == os.getpid()
assert abs(data["timestamp"] - time.time()) < 2
def test_atomic_write_no_temp_files(self, tmp_path):
"""No temporary files remain after a successful write."""
write_cron_heartbeat("my_job", interval_seconds=300, directory=tmp_path)
files = list(tmp_path.iterdir())
assert len(files) == 1
assert files[0].name == "my_job.last"
def test_overwrites_cleanly(self, tmp_path):
"""Successive writes update, not append."""
write_cron_heartbeat("j", interval_seconds=60, status="ok", directory=tmp_path)
write_cron_heartbeat("j", interval_seconds=60, status="warn", directory=tmp_path)
data = json.loads((tmp_path / "j.last").read_text())
assert data["status"] == "warn"
def test_creates_parent_dirs(self, tmp_path):
"""Parent directories are created as needed."""
deep_dir = tmp_path / "a" / "b" / "c"
write_cron_heartbeat("j", interval_seconds=60, directory=deep_dir)
assert (deep_dir / "j.last").exists()
def test_heartbeat_path_helper(self, tmp_path):
"""heartbeat_path() returns the correct path without writing."""
p = heartbeat_path("myjob", directory=tmp_path)
assert p == tmp_path / "myjob.last"
assert not p.exists()
def test_env_var_override(self, tmp_path, monkeypatch):
"""BEZALEL_HEARTBEAT_DIR env var overrides the directory."""
monkeypatch.setenv("BEZALEL_HEARTBEAT_DIR", str(tmp_path))
# Call without directory= so it uses the env var
path = write_cron_heartbeat("env_job", interval_seconds=120)
assert path.parent == tmp_path
# ── bin/check_cron_heartbeats.py ─────────────────────────────────────
class TestScanHeartbeats:
def test_empty_dir(self, tmp_path):
"""No .last files → empty list."""
assert scan_heartbeats(tmp_path) == []
def test_nonexistent_dir(self, tmp_path):
"""Missing directory → empty list (no exception)."""
assert scan_heartbeats(tmp_path / "nope") == []
def test_healthy_job(self, tmp_path):
"""Fresh heartbeat → healthy."""
(tmp_path / "myjob.last").write_text(json.dumps({
"job": "myjob",
"timestamp": time.time(),
"interval_seconds": 300,
"pid": 1,
"status": "ok",
}))
jobs = scan_heartbeats(tmp_path)
assert len(jobs) == 1
assert jobs[0].healthy is True
assert jobs[0].job == "myjob"
def test_stale_job(self, tmp_path):
"""Heartbeat older than 2× interval → stale."""
(tmp_path / "slow.last").write_text(json.dumps({
"job": "slow",
"timestamp": time.time() - 700, # 11.7 minutes
"interval_seconds": 300, # 5 min interval → ratio 2.33
"pid": 1,
"status": "ok",
}))
jobs = scan_heartbeats(tmp_path)
assert jobs[0].healthy is False
assert jobs[0].staleness_ratio > 2.0
def test_missing_heartbeat_file(self, tmp_path):
"""_read_job_status handles a file that disappears mid-scan."""
ghost_path = tmp_path / "ghost.last"
status = _read_job_status("ghost", ghost_path)
assert status.healthy is False
assert "missing" in status.raw_status
def test_corrupt_heartbeat(self, tmp_path):
"""Corrupt JSON → unhealthy with 'corrupt' status."""
p = tmp_path / "bad.last"
p.write_text("{not valid json")
status = _read_job_status("bad", p)
assert status.healthy is False
assert "corrupt" in status.raw_status
def test_multiple_jobs(self, tmp_path):
"""Multiple .last files are all reported."""
for i, name in enumerate(["alpha", "beta", "gamma"]):
(tmp_path / f"{name}.last").write_text(json.dumps({
"job": name,
"timestamp": time.time() - i * 10,
"interval_seconds": 300,
"pid": i + 1,
"status": "ok",
}))
jobs = scan_heartbeats(tmp_path)
assert len(jobs) == 3
job_names = {j.job for j in jobs}
assert job_names == {"alpha", "beta", "gamma"}
def test_non_last_files_ignored(self, tmp_path):
""".json and other extensions are ignored."""
(tmp_path / "other.json").write_text("{}")
(tmp_path / "notes.txt").write_text("hello")
assert scan_heartbeats(tmp_path) == []
class TestHeartbeatReport:
def _fresh_job(self, name="j"):
return JobStatus(
job=name, path=Path(f"/tmp/{name}.last"),
healthy=True, age_seconds=30, interval_seconds=300,
staleness_ratio=0.1, last_timestamp=time.time() - 30,
pid=1, raw_status="ok",
message="Last beat 30s ago (ratio 0.1x)",
)
def _stale_job(self, name="s"):
return JobStatus(
job=name, path=Path(f"/tmp/{name}.last"),
healthy=False, age_seconds=700, interval_seconds=300,
staleness_ratio=2.33, last_timestamp=time.time() - 700,
pid=1, raw_status="stale",
message="Silent for 11m 40s (2.3x interval of 5m 0s)",
)
def test_overall_healthy(self):
report = HeartbeatReport(
timestamp=time.time(),
heartbeat_dir=Path("/tmp"),
jobs=[self._fresh_job()],
)
assert report.overall_healthy is True
def test_overall_unhealthy(self):
report = HeartbeatReport(
timestamp=time.time(),
heartbeat_dir=Path("/tmp"),
jobs=[self._fresh_job(), self._stale_job()],
)
assert report.overall_healthy is False
assert len(report.stale_jobs) == 1
def test_panel_markdown_contains_table(self):
report = HeartbeatReport(
timestamp=time.time(),
heartbeat_dir=Path("/tmp"),
jobs=[self._fresh_job("alpha"), self._stale_job("beta")],
)
panel = report.to_panel_markdown()
assert "## Heartbeat Panel" in panel
assert "| `alpha` |" in panel
assert "| `beta` |" in panel
assert "STALE" in panel
assert "OK" in panel
assert "**Overall:** ALERT" in panel
def test_panel_markdown_no_jobs(self):
report = HeartbeatReport(
timestamp=time.time(),
heartbeat_dir=Path("/tmp"),
jobs=[],
)
panel = report.to_panel_markdown()
assert "no heartbeat files found" in panel
def test_panel_overall_ok(self):
report = HeartbeatReport(
timestamp=time.time(),
heartbeat_dir=Path("/tmp"),
jobs=[self._fresh_job()],
)
panel = report.to_panel_markdown()
assert "**Overall:** OK" in panel
def test_alert_body_lists_stale_jobs(self):
report = HeartbeatReport(
timestamp=time.time(),
heartbeat_dir=Path("/tmp"),
jobs=[self._stale_job("slow")],
)
body = report.to_alert_body()
assert "slow" in body
assert "STALE" in body.upper() or "stale" in body.lower() or "silent" in body.lower()
assert "crontab" in body.lower()
def test_to_json(self):
report = HeartbeatReport(
timestamp=time.time(),
heartbeat_dir=Path("/tmp"),
jobs=[self._fresh_job()],
)
data = report.to_json()
assert data["healthy"] is True
assert len(data["jobs"]) == 1
assert data["jobs"][0]["job"] == "j"
class TestFmtDuration:
def test_seconds(self):
assert _fmt_duration(45) == "45s"
def test_minutes(self):
assert _fmt_duration(90) == "1m 30s"
def test_hours(self):
assert _fmt_duration(3661) == "1h 1m"
class TestBuildReport:
def test_build_report_with_dir(self, tmp_path):
"""build_report() uses the given directory."""
(tmp_path / "myjob.last").write_text(json.dumps({
"job": "myjob",
"timestamp": time.time(),
"interval_seconds": 300,
"pid": 1,
"status": "ok",
}))
report = build_report(directory=tmp_path)
assert len(report.jobs) == 1
assert report.overall_healthy is True
def test_build_report_empty_dir(self, tmp_path):
report = build_report(directory=tmp_path)
assert report.jobs == []
assert report.overall_healthy is True # nothing stale = healthy
# ── Integration: nexus_watchdog writes its own heartbeat ─────────────
class TestWatchdogHeartbeatIntegration:
"""Verify nexus_watchdog.py writes a cron heartbeat after run_once()."""
def test_watchdog_writes_cron_heartbeat(self, tmp_path, monkeypatch):
"""After run_once, nexus_watchdog writes nexus_watchdog.last."""
monkeypatch.setenv("BEZALEL_HEARTBEAT_DIR", str(tmp_path))
# Load watchdog module
spec = importlib.util.spec_from_file_location(
"_watchdog_hb_test",
PROJECT_ROOT / "bin" / "nexus_watchdog.py",
)
wd = importlib.util.module_from_spec(spec)
sys.modules["_watchdog_hb_test"] = wd
# Patch out network calls
with patch("socket.socket") as mock_sock, \
patch("subprocess.run") as mock_run:
mock_sock.return_value.connect_ex.return_value = 111 # port closed
mock_run.return_value = MagicMock(returncode=1, stdout="")
spec.loader.exec_module(wd)
args = MagicMock()
args.ws_host = "localhost"
args.ws_port = 8765
args.heartbeat_path = str(tmp_path / "nexus_heartbeat.json")
args.stale_threshold = 300
args.dry_run = True # don't touch Gitea
wd.run_once(args)
hb_file = tmp_path / "nexus_watchdog.last"
assert hb_file.exists(), "nexus_watchdog.last was not written"
data = json.loads(hb_file.read_text())
assert data["job"] == "nexus_watchdog"
assert data["interval_seconds"] == 300