Files
the-nexus/tests/test_bezalel_heartbeat.py

335 lines
12 KiB
Python
Raw Normal View History

"""
Tests for Bezalel Cron Heartbeat system (poka-yoke #1096).
Validates:
- check_cron_heartbeats() with healthy and stale jobs
- Empty heartbeat dir (no .last files) returns safely
- Corrupt JSON in a .last file is handled gracefully
- Mixed healthy/stale jobs
- Alert file writing (write_alert)
- The 2× interval staleness threshold is applied correctly
Uses importlib to load bin/bezalel_heartbeat_check.py without __init__.py,
following the same pattern as test_nexus_watchdog.py.
Refs: https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/1096
"""
from __future__ import annotations
import importlib.util
import json
import sys
import time
from pathlib import Path
from unittest.mock import patch
import pytest
# ── Load module under test ────────────────────────────────────────────────────
PROJECT_ROOT = Path(__file__).parent.parent
_hb_spec = importlib.util.spec_from_file_location(
"bezalel_heartbeat_check_test",
PROJECT_ROOT / "bin" / "bezalel_heartbeat_check.py",
)
_hb_mod = importlib.util.module_from_spec(_hb_spec)
sys.modules["bezalel_heartbeat_check_test"] = _hb_mod
_hb_spec.loader.exec_module(_hb_mod)
check_cron_heartbeats = _hb_mod.check_cron_heartbeats
write_alert = _hb_mod.write_alert
# ── Helpers ───────────────────────────────────────────────────────────────────
def write_heartbeat_file(
directory: Path,
job: str,
timestamp: float,
interval: int = 3600,
pid: int = 12345,
) -> Path:
"""Write a valid .last heartbeat file for testing."""
directory.mkdir(parents=True, exist_ok=True)
payload = {
"job": job,
"timestamp": timestamp,
"interval": interval,
"pid": pid,
}
path = directory / f"{job}.last"
path.write_text(json.dumps(payload), encoding="utf-8")
return path
# ── Tests ─────────────────────────────────────────────────────────────────────
class TestCheckCronHeartbeats:
def test_healthy_job(self, tmp_path: Path) -> None:
"""A job with a recent timestamp is reported as healthy."""
now = time.time()
write_heartbeat_file(tmp_path, "morning-report", timestamp=now - 100, interval=3600)
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 0
assert result["healthy_count"] == 1
assert len(result["jobs"]) == 1
job = result["jobs"][0]
assert job["job"] == "morning-report"
assert job["healthy"] is True
assert job["age_secs"] == pytest.approx(100, abs=5)
assert "OK" in job["message"]
def test_stale_job(self, tmp_path: Path) -> None:
"""A job silent for > 2× its interval is reported as stale."""
now = time.time()
# 3 hours ago with 1-hour interval → 3 > 2×1 → stale
write_heartbeat_file(tmp_path, "hourly-sync", timestamp=now - 10800, interval=3600)
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 1
assert result["healthy_count"] == 0
job = result["jobs"][0]
assert job["job"] == "hourly-sync"
assert job["healthy"] is False
assert "STALE" in job["message"]
assert "exceeds 2x threshold" in job["message"]
def test_just_within_threshold(self, tmp_path: Path) -> None:
"""A job at exactly 2× interval is NOT stale (threshold is strictly >)."""
fake_now = 1700000000.0
# age = 7200, threshold = 2 * 3600 = 7200 — NOT stale (not strictly greater)
write_heartbeat_file(tmp_path, "edge-job", timestamp=fake_now - 7200, interval=3600)
with patch("time.time", return_value=fake_now):
result = check_cron_heartbeats(str(tmp_path))
# age_secs == 7200 and threshold = 7200, so not stale (age > threshold is False)
assert result["stale_count"] == 0
def test_stale_threshold_just_over(self, tmp_path: Path) -> None:
"""A job silent for 2× interval + 1 second is stale."""
now = time.time()
# age = 7201, threshold = 7200 — IS stale
write_heartbeat_file(tmp_path, "edge-job", timestamp=now - 7201, interval=3600)
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 1
def test_empty_dir_returns_safely(self, tmp_path: Path) -> None:
"""Empty heartbeat directory returns zero jobs without error."""
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 0
assert result["healthy_count"] == 0
assert result["jobs"] == []
assert "checked_at" in result
def test_nonexistent_dir_returns_safely(self, tmp_path: Path) -> None:
"""Non-existent heartbeat dir returns empty result without error."""
missing = str(tmp_path / "does-not-exist")
result = check_cron_heartbeats(missing)
assert result["stale_count"] == 0
assert result["healthy_count"] == 0
assert result["jobs"] == []
def test_corrupt_json_handled_gracefully(self, tmp_path: Path) -> None:
"""Corrupt JSON in a .last file is reported as stale with an error message."""
bad_file = tmp_path / "broken-job.last"
bad_file.write_text("{this is not valid json!}", encoding="utf-8")
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 1
assert result["healthy_count"] == 0
job = result["jobs"][0]
assert job["job"] == "broken-job"
assert job["healthy"] is False
assert "CORRUPT" in job["message"]
assert job["last_seen"] is None
def test_multiple_jobs_mixed(self, tmp_path: Path) -> None:
"""Mixed healthy and stale jobs are correctly counted."""
now = time.time()
# 3 healthy jobs (recent)
write_heartbeat_file(tmp_path, "job-a", timestamp=now - 60, interval=3600)
write_heartbeat_file(tmp_path, "job-b", timestamp=now - 1800, interval=3600)
write_heartbeat_file(tmp_path, "job-c", timestamp=now - 3599, interval=3600)
# 2 stale jobs
write_heartbeat_file(tmp_path, "job-d", timestamp=now - 10000, interval=3600)
write_heartbeat_file(tmp_path, "job-e", timestamp=now - 86400, interval=3600)
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 2
assert result["healthy_count"] == 3
assert len(result["jobs"]) == 5
stale_jobs = {j["job"] for j in result["jobs"] if not j["healthy"]}
healthy_jobs = {j["job"] for j in result["jobs"] if j["healthy"]}
assert stale_jobs == {"job-d", "job-e"}
assert healthy_jobs == {"job-a", "job-b", "job-c"}
def test_result_contains_required_keys(self, tmp_path: Path) -> None:
"""Result dict contains all required keys."""
now = time.time()
write_heartbeat_file(tmp_path, "test-job", timestamp=now - 100, interval=3600)
result = check_cron_heartbeats(str(tmp_path))
assert "checked_at" in result
assert "jobs" in result
assert "stale_count" in result
assert "healthy_count" in result
job = result["jobs"][0]
assert "job" in job
assert "healthy" in job
assert "age_secs" in job
assert "interval" in job
assert "last_seen" in job
assert "message" in job
def test_job_last_seen_is_iso_timestamp(self, tmp_path: Path) -> None:
"""last_seen field is a valid ISO 8601 timestamp string."""
from datetime import datetime
now = time.time()
write_heartbeat_file(tmp_path, "ts-job", timestamp=now - 100, interval=3600)
result = check_cron_heartbeats(str(tmp_path))
job = result["jobs"][0]
# Should be parseable as an ISO timestamp
assert job["last_seen"] is not None
dt = datetime.fromisoformat(job["last_seen"])
assert dt is not None
def test_checked_at_is_iso_timestamp(self, tmp_path: Path) -> None:
"""checked_at is a valid ISO 8601 timestamp string."""
from datetime import datetime
result = check_cron_heartbeats(str(tmp_path))
dt = datetime.fromisoformat(result["checked_at"])
assert dt is not None
def test_custom_interval_applied(self, tmp_path: Path) -> None:
"""Custom interval (e.g. daily) is respected for stale detection."""
now = time.time()
# 25 hours ago with 12-hour interval → 25 > 2×12 = 24 → stale
write_heartbeat_file(tmp_path, "daily-job", timestamp=now - 90000, interval=43200)
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 1
job = result["jobs"][0]
assert job["interval"] == 43200
assert not job["healthy"]
def test_custom_interval_healthy(self, tmp_path: Path) -> None:
"""Job within 2× custom interval is healthy."""
now = time.time()
# 23 hours ago with 12-hour interval → 23 < 2×12 = 24 → healthy
write_heartbeat_file(tmp_path, "daily-job", timestamp=now - 82800, interval=43200)
result = check_cron_heartbeats(str(tmp_path))
assert result["stale_count"] == 0
assert result["healthy_count"] == 1
def test_deterministic_with_mocked_time(self, tmp_path: Path) -> None:
"""Test with mocked time.time() for fully deterministic assertion."""
fake_now = 1700000000.0
write_heartbeat_file(tmp_path, "frozen-job", timestamp=fake_now - 500, interval=3600)
with patch("time.time", return_value=fake_now):
result = check_cron_heartbeats(str(tmp_path))
job = result["jobs"][0]
# age should be exactly 500s
assert job["age_secs"] == pytest.approx(500.0, abs=0.01)
assert job["healthy"] is True # 500 < 7200
def test_stale_with_mocked_time(self, tmp_path: Path) -> None:
"""Stale detection with mocked time is exact."""
fake_now = 1700000000.0
# 8000s ago with 3600s interval → 8000 > 7200 → stale
write_heartbeat_file(tmp_path, "frozen-stale", timestamp=fake_now - 8000, interval=3600)
with patch("time.time", return_value=fake_now):
result = check_cron_heartbeats(str(tmp_path))
job = result["jobs"][0]
assert job["age_secs"] == pytest.approx(8000.0, abs=0.01)
assert job["healthy"] is False
class TestWriteAlert:
def test_alert_file_created(self, tmp_path: Path) -> None:
"""write_alert creates an alert file in the alerts subdirectory."""
job_info = {
"job": "test-job",
"healthy": False,
"age_secs": 8000.0,
"interval": 3600,
"last_seen": "2024-01-01T00:00:00+00:00",
"message": "STALE (last 8000s ago, interval 3600s — exceeds 2x threshold of 7200s)",
}
write_alert(str(tmp_path), job_info)
alert_file = tmp_path / "alerts" / "test-job.alert"
assert alert_file.exists()
def test_alert_file_content(self, tmp_path: Path) -> None:
"""Alert file contains correct JSON fields."""
job_info = {
"job": "my-job",
"healthy": False,
"age_secs": 9000.0,
"interval": 3600,
"last_seen": "2024-06-01T12:00:00+00:00",
"message": "STALE",
}
write_alert(str(tmp_path), job_info)
alert_file = tmp_path / "alerts" / "my-job.alert"
data = json.loads(alert_file.read_text())
assert data["alert_level"] == "P1"
assert data["job"] == "my-job"
assert data["age_secs"] == 9000.0
assert data["interval"] == 3600
assert "detected_at" in data
def test_alert_no_partial_files_left(self, tmp_path: Path) -> None:
"""No temp files remain after a successful write."""
job_info = {
"job": "clean-job",
"healthy": False,
"age_secs": 8000.0,
"interval": 3600,
"last_seen": None,
"message": "STALE",
}
write_alert(str(tmp_path), job_info)
alerts_dir = tmp_path / "alerts"
# Only the .alert file should exist — no .tmp files
files = list(alerts_dir.iterdir())
assert len(files) == 1
assert files[0].suffix == ".alert"