342 lines
12 KiB
Python
342 lines
12 KiB
Python
"""Tests for the poka-yoke cron heartbeat system.
|
||
|
||
Covers:
|
||
- nexus/cron_heartbeat.py (write utility)
|
||
- bin/check_cron_heartbeats.py (meta-heartbeat checker)
|
||
|
||
Refs: #1096
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import importlib.util
|
||
import json
|
||
import os
|
||
import sys
|
||
import time
|
||
from pathlib import Path
|
||
from unittest.mock import MagicMock, patch
|
||
|
||
import pytest
|
||
|
||
# ── Load modules under test ───────────────────────────────────────────
|
||
|
||
PROJECT_ROOT = Path(__file__).parent.parent
|
||
|
||
_hb_spec = importlib.util.spec_from_file_location(
|
||
"_cron_heartbeat",
|
||
PROJECT_ROOT / "nexus" / "cron_heartbeat.py",
|
||
)
|
||
_hb = importlib.util.module_from_spec(_hb_spec)
|
||
sys.modules["_cron_heartbeat"] = _hb
|
||
_hb_spec.loader.exec_module(_hb)
|
||
|
||
_chk_spec = importlib.util.spec_from_file_location(
|
||
"_check_cron_heartbeats",
|
||
PROJECT_ROOT / "bin" / "check_cron_heartbeats.py",
|
||
)
|
||
_chk = importlib.util.module_from_spec(_chk_spec)
|
||
sys.modules["_check_cron_heartbeats"] = _chk
|
||
_chk_spec.loader.exec_module(_chk)
|
||
|
||
write_cron_heartbeat = _hb.write_cron_heartbeat
|
||
heartbeat_path = _hb.heartbeat_path
|
||
|
||
scan_heartbeats = _chk.scan_heartbeats
|
||
build_report = _chk.build_report
|
||
HeartbeatReport = _chk.HeartbeatReport
|
||
JobStatus = _chk.JobStatus
|
||
_read_job_status = _chk._read_job_status
|
||
_fmt_duration = _chk._fmt_duration
|
||
|
||
|
||
# ── nexus/cron_heartbeat.py ───────────────────────────────────────────
|
||
|
||
class TestWriteCronHeartbeat:
|
||
def test_creates_file(self, tmp_path):
|
||
"""write_cron_heartbeat creates <job>.last in the given directory."""
|
||
path = write_cron_heartbeat("my_job", interval_seconds=300, directory=tmp_path)
|
||
assert path == tmp_path / "my_job.last"
|
||
assert path.exists()
|
||
|
||
def test_file_content(self, tmp_path):
|
||
"""Written file has all required fields."""
|
||
write_cron_heartbeat("my_job", interval_seconds=600, status="ok", directory=tmp_path)
|
||
data = json.loads((tmp_path / "my_job.last").read_text())
|
||
assert data["job"] == "my_job"
|
||
assert data["interval_seconds"] == 600
|
||
assert data["status"] == "ok"
|
||
assert data["pid"] == os.getpid()
|
||
assert abs(data["timestamp"] - time.time()) < 2
|
||
|
||
def test_atomic_write_no_temp_files(self, tmp_path):
|
||
"""No temporary files remain after a successful write."""
|
||
write_cron_heartbeat("my_job", interval_seconds=300, directory=tmp_path)
|
||
files = list(tmp_path.iterdir())
|
||
assert len(files) == 1
|
||
assert files[0].name == "my_job.last"
|
||
|
||
def test_overwrites_cleanly(self, tmp_path):
|
||
"""Successive writes update, not append."""
|
||
write_cron_heartbeat("j", interval_seconds=60, status="ok", directory=tmp_path)
|
||
write_cron_heartbeat("j", interval_seconds=60, status="warn", directory=tmp_path)
|
||
data = json.loads((tmp_path / "j.last").read_text())
|
||
assert data["status"] == "warn"
|
||
|
||
def test_creates_parent_dirs(self, tmp_path):
|
||
"""Parent directories are created as needed."""
|
||
deep_dir = tmp_path / "a" / "b" / "c"
|
||
write_cron_heartbeat("j", interval_seconds=60, directory=deep_dir)
|
||
assert (deep_dir / "j.last").exists()
|
||
|
||
def test_heartbeat_path_helper(self, tmp_path):
|
||
"""heartbeat_path() returns the correct path without writing."""
|
||
p = heartbeat_path("myjob", directory=tmp_path)
|
||
assert p == tmp_path / "myjob.last"
|
||
assert not p.exists()
|
||
|
||
def test_env_var_override(self, tmp_path, monkeypatch):
|
||
"""BEZALEL_HEARTBEAT_DIR env var overrides the directory."""
|
||
monkeypatch.setenv("BEZALEL_HEARTBEAT_DIR", str(tmp_path))
|
||
# Call without directory= so it uses the env var
|
||
path = write_cron_heartbeat("env_job", interval_seconds=120)
|
||
assert path.parent == tmp_path
|
||
|
||
|
||
# ── bin/check_cron_heartbeats.py ─────────────────────────────────────
|
||
|
||
class TestScanHeartbeats:
|
||
def test_empty_dir(self, tmp_path):
|
||
"""No .last files → empty list."""
|
||
assert scan_heartbeats(tmp_path) == []
|
||
|
||
def test_nonexistent_dir(self, tmp_path):
|
||
"""Missing directory → empty list (no exception)."""
|
||
assert scan_heartbeats(tmp_path / "nope") == []
|
||
|
||
def test_healthy_job(self, tmp_path):
|
||
"""Fresh heartbeat → healthy."""
|
||
(tmp_path / "myjob.last").write_text(json.dumps({
|
||
"job": "myjob",
|
||
"timestamp": time.time(),
|
||
"interval_seconds": 300,
|
||
"pid": 1,
|
||
"status": "ok",
|
||
}))
|
||
jobs = scan_heartbeats(tmp_path)
|
||
assert len(jobs) == 1
|
||
assert jobs[0].healthy is True
|
||
assert jobs[0].job == "myjob"
|
||
|
||
def test_stale_job(self, tmp_path):
|
||
"""Heartbeat older than 2× interval → stale."""
|
||
(tmp_path / "slow.last").write_text(json.dumps({
|
||
"job": "slow",
|
||
"timestamp": time.time() - 700, # 11.7 minutes
|
||
"interval_seconds": 300, # 5 min interval → ratio 2.33
|
||
"pid": 1,
|
||
"status": "ok",
|
||
}))
|
||
jobs = scan_heartbeats(tmp_path)
|
||
assert jobs[0].healthy is False
|
||
assert jobs[0].staleness_ratio > 2.0
|
||
|
||
def test_missing_heartbeat_file(self, tmp_path):
|
||
"""_read_job_status handles a file that disappears mid-scan."""
|
||
ghost_path = tmp_path / "ghost.last"
|
||
status = _read_job_status("ghost", ghost_path)
|
||
assert status.healthy is False
|
||
assert "missing" in status.raw_status
|
||
|
||
def test_corrupt_heartbeat(self, tmp_path):
|
||
"""Corrupt JSON → unhealthy with 'corrupt' status."""
|
||
p = tmp_path / "bad.last"
|
||
p.write_text("{not valid json")
|
||
status = _read_job_status("bad", p)
|
||
assert status.healthy is False
|
||
assert "corrupt" in status.raw_status
|
||
|
||
def test_multiple_jobs(self, tmp_path):
|
||
"""Multiple .last files are all reported."""
|
||
for i, name in enumerate(["alpha", "beta", "gamma"]):
|
||
(tmp_path / f"{name}.last").write_text(json.dumps({
|
||
"job": name,
|
||
"timestamp": time.time() - i * 10,
|
||
"interval_seconds": 300,
|
||
"pid": i + 1,
|
||
"status": "ok",
|
||
}))
|
||
jobs = scan_heartbeats(tmp_path)
|
||
assert len(jobs) == 3
|
||
job_names = {j.job for j in jobs}
|
||
assert job_names == {"alpha", "beta", "gamma"}
|
||
|
||
def test_non_last_files_ignored(self, tmp_path):
|
||
""".json and other extensions are ignored."""
|
||
(tmp_path / "other.json").write_text("{}")
|
||
(tmp_path / "notes.txt").write_text("hello")
|
||
assert scan_heartbeats(tmp_path) == []
|
||
|
||
|
||
class TestHeartbeatReport:
|
||
def _fresh_job(self, name="j"):
|
||
return JobStatus(
|
||
job=name, path=Path(f"/tmp/{name}.last"),
|
||
healthy=True, age_seconds=30, interval_seconds=300,
|
||
staleness_ratio=0.1, last_timestamp=time.time() - 30,
|
||
pid=1, raw_status="ok",
|
||
message="Last beat 30s ago (ratio 0.1x)",
|
||
)
|
||
|
||
def _stale_job(self, name="s"):
|
||
return JobStatus(
|
||
job=name, path=Path(f"/tmp/{name}.last"),
|
||
healthy=False, age_seconds=700, interval_seconds=300,
|
||
staleness_ratio=2.33, last_timestamp=time.time() - 700,
|
||
pid=1, raw_status="stale",
|
||
message="Silent for 11m 40s (2.3x interval of 5m 0s)",
|
||
)
|
||
|
||
def test_overall_healthy(self):
|
||
report = HeartbeatReport(
|
||
timestamp=time.time(),
|
||
heartbeat_dir=Path("/tmp"),
|
||
jobs=[self._fresh_job()],
|
||
)
|
||
assert report.overall_healthy is True
|
||
|
||
def test_overall_unhealthy(self):
|
||
report = HeartbeatReport(
|
||
timestamp=time.time(),
|
||
heartbeat_dir=Path("/tmp"),
|
||
jobs=[self._fresh_job(), self._stale_job()],
|
||
)
|
||
assert report.overall_healthy is False
|
||
assert len(report.stale_jobs) == 1
|
||
|
||
def test_panel_markdown_contains_table(self):
|
||
report = HeartbeatReport(
|
||
timestamp=time.time(),
|
||
heartbeat_dir=Path("/tmp"),
|
||
jobs=[self._fresh_job("alpha"), self._stale_job("beta")],
|
||
)
|
||
panel = report.to_panel_markdown()
|
||
assert "## Heartbeat Panel" in panel
|
||
assert "| `alpha` |" in panel
|
||
assert "| `beta` |" in panel
|
||
assert "STALE" in panel
|
||
assert "OK" in panel
|
||
assert "**Overall:** ALERT" in panel
|
||
|
||
def test_panel_markdown_no_jobs(self):
|
||
report = HeartbeatReport(
|
||
timestamp=time.time(),
|
||
heartbeat_dir=Path("/tmp"),
|
||
jobs=[],
|
||
)
|
||
panel = report.to_panel_markdown()
|
||
assert "no heartbeat files found" in panel
|
||
|
||
def test_panel_overall_ok(self):
|
||
report = HeartbeatReport(
|
||
timestamp=time.time(),
|
||
heartbeat_dir=Path("/tmp"),
|
||
jobs=[self._fresh_job()],
|
||
)
|
||
panel = report.to_panel_markdown()
|
||
assert "**Overall:** OK" in panel
|
||
|
||
def test_alert_body_lists_stale_jobs(self):
|
||
report = HeartbeatReport(
|
||
timestamp=time.time(),
|
||
heartbeat_dir=Path("/tmp"),
|
||
jobs=[self._stale_job("slow")],
|
||
)
|
||
body = report.to_alert_body()
|
||
assert "slow" in body
|
||
assert "STALE" in body.upper() or "stale" in body.lower() or "silent" in body.lower()
|
||
assert "crontab" in body.lower()
|
||
|
||
def test_to_json(self):
|
||
report = HeartbeatReport(
|
||
timestamp=time.time(),
|
||
heartbeat_dir=Path("/tmp"),
|
||
jobs=[self._fresh_job()],
|
||
)
|
||
data = report.to_json()
|
||
assert data["healthy"] is True
|
||
assert len(data["jobs"]) == 1
|
||
assert data["jobs"][0]["job"] == "j"
|
||
|
||
|
||
class TestFmtDuration:
|
||
def test_seconds(self):
|
||
assert _fmt_duration(45) == "45s"
|
||
|
||
def test_minutes(self):
|
||
assert _fmt_duration(90) == "1m 30s"
|
||
|
||
def test_hours(self):
|
||
assert _fmt_duration(3661) == "1h 1m"
|
||
|
||
|
||
class TestBuildReport:
|
||
def test_build_report_with_dir(self, tmp_path):
|
||
"""build_report() uses the given directory."""
|
||
(tmp_path / "myjob.last").write_text(json.dumps({
|
||
"job": "myjob",
|
||
"timestamp": time.time(),
|
||
"interval_seconds": 300,
|
||
"pid": 1,
|
||
"status": "ok",
|
||
}))
|
||
report = build_report(directory=tmp_path)
|
||
assert len(report.jobs) == 1
|
||
assert report.overall_healthy is True
|
||
|
||
def test_build_report_empty_dir(self, tmp_path):
|
||
report = build_report(directory=tmp_path)
|
||
assert report.jobs == []
|
||
assert report.overall_healthy is True # nothing stale = healthy
|
||
|
||
|
||
# ── Integration: nexus_watchdog writes its own heartbeat ─────────────
|
||
|
||
class TestWatchdogHeartbeatIntegration:
|
||
"""Verify nexus_watchdog.py writes a cron heartbeat after run_once()."""
|
||
|
||
def test_watchdog_writes_cron_heartbeat(self, tmp_path, monkeypatch):
|
||
"""After run_once, nexus_watchdog writes nexus_watchdog.last."""
|
||
monkeypatch.setenv("BEZALEL_HEARTBEAT_DIR", str(tmp_path))
|
||
|
||
# Load watchdog module
|
||
spec = importlib.util.spec_from_file_location(
|
||
"_watchdog_hb_test",
|
||
PROJECT_ROOT / "bin" / "nexus_watchdog.py",
|
||
)
|
||
wd = importlib.util.module_from_spec(spec)
|
||
sys.modules["_watchdog_hb_test"] = wd
|
||
|
||
# Patch out network calls
|
||
with patch("socket.socket") as mock_sock, \
|
||
patch("subprocess.run") as mock_run:
|
||
mock_sock.return_value.connect_ex.return_value = 111 # port closed
|
||
mock_run.return_value = MagicMock(returncode=1, stdout="")
|
||
|
||
spec.loader.exec_module(wd)
|
||
|
||
args = MagicMock()
|
||
args.ws_host = "localhost"
|
||
args.ws_port = 8765
|
||
args.heartbeat_path = str(tmp_path / "nexus_heartbeat.json")
|
||
args.stale_threshold = 300
|
||
args.dry_run = True # don't touch Gitea
|
||
|
||
wd.run_once(args)
|
||
|
||
hb_file = tmp_path / "nexus_watchdog.last"
|
||
assert hb_file.exists(), "nexus_watchdog.last was not written"
|
||
data = json.loads(hb_file.read_text())
|
||
assert data["job"] == "nexus_watchdog"
|
||
assert data["interval_seconds"] == 300
|