Files
the-nexus/tests/test_bezalel_heartbeat.py
2026-04-07 14:44:05 +00:00

335 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Tests for Bezalel Cron Heartbeat system (poka-yoke #1096).
Validates:
- check_cron_heartbeats() with healthy and stale jobs
- Empty heartbeat dir (no .last files) returns safely
- Corrupt JSON in a .last file is handled gracefully
- Mixed healthy/stale jobs
- Alert file writing (write_alert)
- The 2× interval staleness threshold is applied correctly
Uses importlib to load bin/bezalel_heartbeat_check.py without __init__.py,
following the same pattern as test_nexus_watchdog.py.
Refs: https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/1096
"""
from __future__ import annotations
import importlib.util
import json
import sys
import time
from pathlib import Path
from unittest.mock import patch
import pytest
# ── Load module under test ────────────────────────────────────────────────────
PROJECT_ROOT = Path(__file__).parent.parent
_hb_spec = importlib.util.spec_from_file_location(
"bezalel_heartbeat_check_test",
PROJECT_ROOT / "bin" / "bezalel_heartbeat_check.py",
)
_hb_mod = importlib.util.module_from_spec(_hb_spec)
sys.modules["bezalel_heartbeat_check_test"] = _hb_mod
_hb_spec.loader.exec_module(_hb_mod)
check_cron_heartbeats = _hb_mod.check_cron_heartbeats
write_alert = _hb_mod.write_alert
# ── Helpers ───────────────────────────────────────────────────────────────────
def write_heartbeat_file(
directory: Path,
job: str,
timestamp: float,
interval: int = 3600,
pid: int = 12345,
) -> Path:
"""Write a valid .last heartbeat file for testing."""
directory.mkdir(parents=True, exist_ok=True)
payload = {
"job": job,
"timestamp": timestamp,
"interval": interval,
"pid": pid,
}
path = directory / f"{job}.last"
path.write_text(json.dumps(payload), encoding="utf-8")
return path
# ── Tests ─────────────────────────────────────────────────────────────────────
class TestCheckCronHeartbeats:
def test_healthy_job(self, tmp_path: Path) -> None:
"""A job with a recent timestamp is reported as healthy."""
now = time.time()
write_heartbeat_file(tmp_path, "morning-report", timestamp=now - 100, interval=3600)
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 0
assert result["healthy_count"] == 1
assert len(result["jobs"]) == 1
job = result["jobs"][0]
assert job["job"] == "morning-report"
assert job["healthy"] is True
assert job["age_secs"] == pytest.approx(100, abs=5)
assert "OK" in job["message"]
def test_stale_job(self, tmp_path: Path) -> None:
"""A job silent for > 2× its interval is reported as stale."""
now = time.time()
# 3 hours ago with 1-hour interval → 3 > 2×1 → stale
write_heartbeat_file(tmp_path, "hourly-sync", timestamp=now - 10800, interval=3600)
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 1
assert result["healthy_count"] == 0
job = result["jobs"][0]
assert job["job"] == "hourly-sync"
assert job["healthy"] is False
assert "STALE" in job["message"]
assert "exceeds 2x threshold" in job["message"]
def test_just_within_threshold(self, tmp_path: Path) -> None:
"""A job at exactly 2× interval is NOT stale (threshold is strictly >)."""
fake_now = 1700000000.0
# age = 7200, threshold = 2 * 3600 = 7200 — NOT stale (not strictly greater)
write_heartbeat_file(tmp_path, "edge-job", timestamp=fake_now - 7200, interval=3600)
with patch("time.time", return_value=fake_now):
result = check_cron_heartbeats(str(tmp_path))
# age_secs == 7200 and threshold = 7200, so not stale (age > threshold is False)
assert result["stale_count"] == 0
def test_stale_threshold_just_over(self, tmp_path: Path) -> None:
"""A job silent for 2× interval + 1 second is stale."""
now = time.time()
# age = 7201, threshold = 7200 — IS stale
write_heartbeat_file(tmp_path, "edge-job", timestamp=now - 7201, interval=3600)
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 1
def test_empty_dir_returns_safely(self, tmp_path: Path) -> None:
"""Empty heartbeat directory returns zero jobs without error."""
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 0
assert result["healthy_count"] == 0
assert result["jobs"] == []
assert "checked_at" in result
def test_nonexistent_dir_returns_safely(self, tmp_path: Path) -> None:
"""Non-existent heartbeat dir returns empty result without error."""
missing = str(tmp_path / "does-not-exist")
result = check_cron_heartbeats(missing)
assert result["stale_count"] == 0
assert result["healthy_count"] == 0
assert result["jobs"] == []
def test_corrupt_json_handled_gracefully(self, tmp_path: Path) -> None:
"""Corrupt JSON in a .last file is reported as stale with an error message."""
bad_file = tmp_path / "broken-job.last"
bad_file.write_text("{this is not valid json!}", encoding="utf-8")
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 1
assert result["healthy_count"] == 0
job = result["jobs"][0]
assert job["job"] == "broken-job"
assert job["healthy"] is False
assert "CORRUPT" in job["message"]
assert job["last_seen"] is None
def test_multiple_jobs_mixed(self, tmp_path: Path) -> None:
"""Mixed healthy and stale jobs are correctly counted."""
now = time.time()
# 3 healthy jobs (recent)
write_heartbeat_file(tmp_path, "job-a", timestamp=now - 60, interval=3600)
write_heartbeat_file(tmp_path, "job-b", timestamp=now - 1800, interval=3600)
write_heartbeat_file(tmp_path, "job-c", timestamp=now - 3599, interval=3600)
# 2 stale jobs
write_heartbeat_file(tmp_path, "job-d", timestamp=now - 10000, interval=3600)
write_heartbeat_file(tmp_path, "job-e", timestamp=now - 86400, interval=3600)
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 2
assert result["healthy_count"] == 3
assert len(result["jobs"]) == 5
stale_jobs = {j["job"] for j in result["jobs"] if not j["healthy"]}
healthy_jobs = {j["job"] for j in result["jobs"] if j["healthy"]}
assert stale_jobs == {"job-d", "job-e"}
assert healthy_jobs == {"job-a", "job-b", "job-c"}
def test_result_contains_required_keys(self, tmp_path: Path) -> None:
"""Result dict contains all required keys."""
now = time.time()
write_heartbeat_file(tmp_path, "test-job", timestamp=now - 100, interval=3600)
result = check_cron_heartbeats(str(tmp_path))
assert "checked_at" in result
assert "jobs" in result
assert "stale_count" in result
assert "healthy_count" in result
job = result["jobs"][0]
assert "job" in job
assert "healthy" in job
assert "age_secs" in job
assert "interval" in job
assert "last_seen" in job
assert "message" in job
def test_job_last_seen_is_iso_timestamp(self, tmp_path: Path) -> None:
"""last_seen field is a valid ISO 8601 timestamp string."""
from datetime import datetime
now = time.time()
write_heartbeat_file(tmp_path, "ts-job", timestamp=now - 100, interval=3600)
result = check_cron_heartbeats(str(tmp_path))
job = result["jobs"][0]
# Should be parseable as an ISO timestamp
assert job["last_seen"] is not None
dt = datetime.fromisoformat(job["last_seen"])
assert dt is not None
def test_checked_at_is_iso_timestamp(self, tmp_path: Path) -> None:
"""checked_at is a valid ISO 8601 timestamp string."""
from datetime import datetime
result = check_cron_heartbeats(str(tmp_path))
dt = datetime.fromisoformat(result["checked_at"])
assert dt is not None
def test_custom_interval_applied(self, tmp_path: Path) -> None:
"""Custom interval (e.g. daily) is respected for stale detection."""
now = time.time()
# 25 hours ago with 12-hour interval → 25 > 2×12 = 24 → stale
write_heartbeat_file(tmp_path, "daily-job", timestamp=now - 90000, interval=43200)
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 1
job = result["jobs"][0]
assert job["interval"] == 43200
assert not job["healthy"]
def test_custom_interval_healthy(self, tmp_path: Path) -> None:
"""Job within 2× custom interval is healthy."""
now = time.time()
# 23 hours ago with 12-hour interval → 23 < 2×12 = 24 → healthy
write_heartbeat_file(tmp_path, "daily-job", timestamp=now - 82800, interval=43200)
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 0
assert result["healthy_count"] == 1
def test_deterministic_with_mocked_time(self, tmp_path: Path) -> None:
"""Test with mocked time.time() for fully deterministic assertion."""
fake_now = 1700000000.0
write_heartbeat_file(tmp_path, "frozen-job", timestamp=fake_now - 500, interval=3600)
with patch("time.time", return_value=fake_now):
result = check_cron_heartbeats(str(tmp_path))
job = result["jobs"][0]
# age should be exactly 500s
assert job["age_secs"] == pytest.approx(500.0, abs=0.01)
assert job["healthy"] is True # 500 < 7200
def test_stale_with_mocked_time(self, tmp_path: Path) -> None:
"""Stale detection with mocked time is exact."""
fake_now = 1700000000.0
# 8000s ago with 3600s interval → 8000 > 7200 → stale
write_heartbeat_file(tmp_path, "frozen-stale", timestamp=fake_now - 8000, interval=3600)
with patch("time.time", return_value=fake_now):
result = check_cron_heartbeats(str(tmp_path))
job = result["jobs"][0]
assert job["age_secs"] == pytest.approx(8000.0, abs=0.01)
assert job["healthy"] is False
class TestWriteAlert:
def test_alert_file_created(self, tmp_path: Path) -> None:
"""write_alert creates an alert file in the alerts subdirectory."""
job_info = {
"job": "test-job",
"healthy": False,
"age_secs": 8000.0,
"interval": 3600,
"last_seen": "2024-01-01T00:00:00+00:00",
"message": "STALE (last 8000s ago, interval 3600s — exceeds 2x threshold of 7200s)",
}
write_alert(str(tmp_path), job_info)
alert_file = tmp_path / "alerts" / "test-job.alert"
assert alert_file.exists()
def test_alert_file_content(self, tmp_path: Path) -> None:
"""Alert file contains correct JSON fields."""
job_info = {
"job": "my-job",
"healthy": False,
"age_secs": 9000.0,
"interval": 3600,
"last_seen": "2024-06-01T12:00:00+00:00",
"message": "STALE",
}
write_alert(str(tmp_path), job_info)
alert_file = tmp_path / "alerts" / "my-job.alert"
data = json.loads(alert_file.read_text())
assert data["alert_level"] == "P1"
assert data["job"] == "my-job"
assert data["age_secs"] == 9000.0
assert data["interval"] == 3600
assert "detected_at" in data
def test_alert_no_partial_files_left(self, tmp_path: Path) -> None:
"""No temp files remain after a successful write."""
job_info = {
"job": "clean-job",
"healthy": False,
"age_secs": 8000.0,
"interval": 3600,
"last_seen": None,
"message": "STALE",
}
write_alert(str(tmp_path), job_info)
alerts_dir = tmp_path / "alerts"
# Only the .alert file should exist — no .tmp files
files = list(alerts_dir.iterdir())
assert len(files) == 1
assert files[0].suffix == ".alert"