""" Tests for Bezalel Cron Heartbeat system (poka-yoke #1096). Validates: - check_cron_heartbeats() with healthy and stale jobs - Empty heartbeat dir (no .last files) returns safely - Corrupt JSON in a .last file is handled gracefully - Mixed healthy/stale jobs - Alert file writing (write_alert) - The 2× interval staleness threshold is applied correctly Uses importlib to load bin/bezalel_heartbeat_check.py without __init__.py, following the same pattern as test_nexus_watchdog.py. Refs: https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/1096 """ from __future__ import annotations import importlib.util import json import sys import time from pathlib import Path from unittest.mock import patch import pytest # ── Load module under test ──────────────────────────────────────────────────── PROJECT_ROOT = Path(__file__).parent.parent _hb_spec = importlib.util.spec_from_file_location( "bezalel_heartbeat_check_test", PROJECT_ROOT / "bin" / "bezalel_heartbeat_check.py", ) _hb_mod = importlib.util.module_from_spec(_hb_spec) sys.modules["bezalel_heartbeat_check_test"] = _hb_mod _hb_spec.loader.exec_module(_hb_mod) check_cron_heartbeats = _hb_mod.check_cron_heartbeats write_alert = _hb_mod.write_alert # ── Helpers ─────────────────────────────────────────────────────────────────── def write_heartbeat_file( directory: Path, job: str, timestamp: float, interval: int = 3600, pid: int = 12345, ) -> Path: """Write a valid .last heartbeat file for testing.""" directory.mkdir(parents=True, exist_ok=True) payload = { "job": job, "timestamp": timestamp, "interval": interval, "pid": pid, } path = directory / f"{job}.last" path.write_text(json.dumps(payload), encoding="utf-8") return path # ── Tests ───────────────────────────────────────────────────────────────────── class TestCheckCronHeartbeats: def test_healthy_job(self, tmp_path: Path) -> None: """A job with a recent timestamp is reported as healthy.""" now = time.time() write_heartbeat_file(tmp_path, "morning-report", timestamp=now - 100, interval=3600) result = check_cron_heartbeats(str(tmp_path)) assert result["stale_count"] == 0 assert result["healthy_count"] == 1 assert len(result["jobs"]) == 1 job = result["jobs"][0] assert job["job"] == "morning-report" assert job["healthy"] is True assert job["age_secs"] == pytest.approx(100, abs=5) assert "OK" in job["message"] def test_stale_job(self, tmp_path: Path) -> None: """A job silent for > 2× its interval is reported as stale.""" now = time.time() # 3 hours ago with 1-hour interval → 3 > 2×1 → stale write_heartbeat_file(tmp_path, "hourly-sync", timestamp=now - 10800, interval=3600) result = check_cron_heartbeats(str(tmp_path)) assert result["stale_count"] == 1 assert result["healthy_count"] == 0 job = result["jobs"][0] assert job["job"] == "hourly-sync" assert job["healthy"] is False assert "STALE" in job["message"] assert "exceeds 2x threshold" in job["message"] def test_just_within_threshold(self, tmp_path: Path) -> None: """A job at exactly 2× interval is NOT stale (threshold is strictly >).""" fake_now = 1700000000.0 # age = 7200, threshold = 2 * 3600 = 7200 — NOT stale (not strictly greater) write_heartbeat_file(tmp_path, "edge-job", timestamp=fake_now - 7200, interval=3600) with patch("time.time", return_value=fake_now): result = check_cron_heartbeats(str(tmp_path)) # age_secs == 7200 and threshold = 7200, so not stale (age > threshold is False) assert result["stale_count"] == 0 def test_stale_threshold_just_over(self, tmp_path: Path) -> None: """A job silent for 2× interval + 1 second is stale.""" now = time.time() # age = 7201, threshold = 7200 — IS stale write_heartbeat_file(tmp_path, "edge-job", timestamp=now - 7201, interval=3600) result = check_cron_heartbeats(str(tmp_path)) assert result["stale_count"] == 1 def test_empty_dir_returns_safely(self, tmp_path: Path) -> None: """Empty heartbeat directory returns zero jobs without error.""" result = check_cron_heartbeats(str(tmp_path)) assert result["stale_count"] == 0 assert result["healthy_count"] == 0 assert result["jobs"] == [] assert "checked_at" in result def test_nonexistent_dir_returns_safely(self, tmp_path: Path) -> None: """Non-existent heartbeat dir returns empty result without error.""" missing = str(tmp_path / "does-not-exist") result = check_cron_heartbeats(missing) assert result["stale_count"] == 0 assert result["healthy_count"] == 0 assert result["jobs"] == [] def test_corrupt_json_handled_gracefully(self, tmp_path: Path) -> None: """Corrupt JSON in a .last file is reported as stale with an error message.""" bad_file = tmp_path / "broken-job.last" bad_file.write_text("{this is not valid json!}", encoding="utf-8") result = check_cron_heartbeats(str(tmp_path)) assert result["stale_count"] == 1 assert result["healthy_count"] == 0 job = result["jobs"][0] assert job["job"] == "broken-job" assert job["healthy"] is False assert "CORRUPT" in job["message"] assert job["last_seen"] is None def test_multiple_jobs_mixed(self, tmp_path: Path) -> None: """Mixed healthy and stale jobs are correctly counted.""" now = time.time() # 3 healthy jobs (recent) write_heartbeat_file(tmp_path, "job-a", timestamp=now - 60, interval=3600) write_heartbeat_file(tmp_path, "job-b", timestamp=now - 1800, interval=3600) write_heartbeat_file(tmp_path, "job-c", timestamp=now - 3599, interval=3600) # 2 stale jobs write_heartbeat_file(tmp_path, "job-d", timestamp=now - 10000, interval=3600) write_heartbeat_file(tmp_path, "job-e", timestamp=now - 86400, interval=3600) result = check_cron_heartbeats(str(tmp_path)) assert result["stale_count"] == 2 assert result["healthy_count"] == 3 assert len(result["jobs"]) == 5 stale_jobs = {j["job"] for j in result["jobs"] if not j["healthy"]} healthy_jobs = {j["job"] for j in result["jobs"] if j["healthy"]} assert stale_jobs == {"job-d", "job-e"} assert healthy_jobs == {"job-a", "job-b", "job-c"} def test_result_contains_required_keys(self, tmp_path: Path) -> None: """Result dict contains all required keys.""" now = time.time() write_heartbeat_file(tmp_path, "test-job", timestamp=now - 100, interval=3600) result = check_cron_heartbeats(str(tmp_path)) assert "checked_at" in result assert "jobs" in result assert "stale_count" in result assert "healthy_count" in result job = result["jobs"][0] assert "job" in job assert "healthy" in job assert "age_secs" in job assert "interval" in job assert "last_seen" in job assert "message" in job def test_job_last_seen_is_iso_timestamp(self, tmp_path: Path) -> None: """last_seen field is a valid ISO 8601 timestamp string.""" from datetime import datetime now = time.time() write_heartbeat_file(tmp_path, "ts-job", timestamp=now - 100, interval=3600) result = check_cron_heartbeats(str(tmp_path)) job = result["jobs"][0] # Should be parseable as an ISO timestamp assert job["last_seen"] is not None dt = datetime.fromisoformat(job["last_seen"]) assert dt is not None def test_checked_at_is_iso_timestamp(self, tmp_path: Path) -> None: """checked_at is a valid ISO 8601 timestamp string.""" from datetime import datetime result = check_cron_heartbeats(str(tmp_path)) dt = datetime.fromisoformat(result["checked_at"]) assert dt is not None def test_custom_interval_applied(self, tmp_path: Path) -> None: """Custom interval (e.g. daily) is respected for stale detection.""" now = time.time() # 25 hours ago with 12-hour interval → 25 > 2×12 = 24 → stale write_heartbeat_file(tmp_path, "daily-job", timestamp=now - 90000, interval=43200) result = check_cron_heartbeats(str(tmp_path)) assert result["stale_count"] == 1 job = result["jobs"][0] assert job["interval"] == 43200 assert not job["healthy"] def test_custom_interval_healthy(self, tmp_path: Path) -> None: """Job within 2× custom interval is healthy.""" now = time.time() # 23 hours ago with 12-hour interval → 23 < 2×12 = 24 → healthy write_heartbeat_file(tmp_path, "daily-job", timestamp=now - 82800, interval=43200) result = check_cron_heartbeats(str(tmp_path)) assert result["stale_count"] == 0 assert result["healthy_count"] == 1 def test_deterministic_with_mocked_time(self, tmp_path: Path) -> None: """Test with mocked time.time() for fully deterministic assertion.""" fake_now = 1700000000.0 write_heartbeat_file(tmp_path, "frozen-job", timestamp=fake_now - 500, interval=3600) with patch("time.time", return_value=fake_now): result = check_cron_heartbeats(str(tmp_path)) job = result["jobs"][0] # age should be exactly 500s assert job["age_secs"] == pytest.approx(500.0, abs=0.01) assert job["healthy"] is True # 500 < 7200 def test_stale_with_mocked_time(self, tmp_path: Path) -> None: """Stale detection with mocked time is exact.""" fake_now = 1700000000.0 # 8000s ago with 3600s interval → 8000 > 7200 → stale write_heartbeat_file(tmp_path, "frozen-stale", timestamp=fake_now - 8000, interval=3600) with patch("time.time", return_value=fake_now): result = check_cron_heartbeats(str(tmp_path)) job = result["jobs"][0] assert job["age_secs"] == pytest.approx(8000.0, abs=0.01) assert job["healthy"] is False class TestWriteAlert: def test_alert_file_created(self, tmp_path: Path) -> None: """write_alert creates an alert file in the alerts subdirectory.""" job_info = { "job": "test-job", "healthy": False, "age_secs": 8000.0, "interval": 3600, "last_seen": "2024-01-01T00:00:00+00:00", "message": "STALE (last 8000s ago, interval 3600s — exceeds 2x threshold of 7200s)", } write_alert(str(tmp_path), job_info) alert_file = tmp_path / "alerts" / "test-job.alert" assert alert_file.exists() def test_alert_file_content(self, tmp_path: Path) -> None: """Alert file contains correct JSON fields.""" job_info = { "job": "my-job", "healthy": False, "age_secs": 9000.0, "interval": 3600, "last_seen": "2024-06-01T12:00:00+00:00", "message": "STALE", } write_alert(str(tmp_path), job_info) alert_file = tmp_path / "alerts" / "my-job.alert" data = json.loads(alert_file.read_text()) assert data["alert_level"] == "P1" assert data["job"] == "my-job" assert data["age_secs"] == 9000.0 assert data["interval"] == 3600 assert "detected_at" in data def test_alert_no_partial_files_left(self, tmp_path: Path) -> None: """No temp files remain after a successful write.""" job_info = { "job": "clean-job", "healthy": False, "age_secs": 8000.0, "interval": 3600, "last_seen": None, "message": "STALE", } write_alert(str(tmp_path), job_info) alerts_dir = tmp_path / "alerts" # Only the .alert file should exist — no .tmp files files = list(alerts_dir.iterdir()) assert len(files) == 1 assert files[0].suffix == ".alert"