335 lines
12 KiB
Python
335 lines
12 KiB
Python
"""
|
||
Tests for Bezalel Cron Heartbeat system (poka-yoke #1096).
|
||
|
||
Validates:
|
||
- check_cron_heartbeats() with healthy and stale jobs
|
||
- Empty heartbeat dir (no .last files) returns safely
|
||
- Corrupt JSON in a .last file is handled gracefully
|
||
- Mixed healthy/stale jobs
|
||
- Alert file writing (write_alert)
|
||
- The 2× interval staleness threshold is applied correctly
|
||
|
||
Uses importlib to load bin/bezalel_heartbeat_check.py without __init__.py,
|
||
following the same pattern as test_nexus_watchdog.py.
|
||
|
||
Refs: https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/1096
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import importlib.util
|
||
import json
|
||
import sys
|
||
import time
|
||
from pathlib import Path
|
||
from unittest.mock import patch
|
||
|
||
import pytest
|
||
|
||
# ── Load module under test ────────────────────────────────────────────────────
|
||
|
||
PROJECT_ROOT = Path(__file__).parent.parent
|
||
|
||
_hb_spec = importlib.util.spec_from_file_location(
|
||
"bezalel_heartbeat_check_test",
|
||
PROJECT_ROOT / "bin" / "bezalel_heartbeat_check.py",
|
||
)
|
||
_hb_mod = importlib.util.module_from_spec(_hb_spec)
|
||
sys.modules["bezalel_heartbeat_check_test"] = _hb_mod
|
||
_hb_spec.loader.exec_module(_hb_mod)
|
||
|
||
check_cron_heartbeats = _hb_mod.check_cron_heartbeats
|
||
write_alert = _hb_mod.write_alert
|
||
|
||
|
||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||
|
||
def write_heartbeat_file(
|
||
directory: Path,
|
||
job: str,
|
||
timestamp: float,
|
||
interval: int = 3600,
|
||
pid: int = 12345,
|
||
) -> Path:
|
||
"""Write a valid .last heartbeat file for testing."""
|
||
directory.mkdir(parents=True, exist_ok=True)
|
||
payload = {
|
||
"job": job,
|
||
"timestamp": timestamp,
|
||
"interval": interval,
|
||
"pid": pid,
|
||
}
|
||
path = directory / f"{job}.last"
|
||
path.write_text(json.dumps(payload), encoding="utf-8")
|
||
return path
|
||
|
||
|
||
# ── Tests ─────────────────────────────────────────────────────────────────────
|
||
|
||
class TestCheckCronHeartbeats:
|
||
|
||
def test_healthy_job(self, tmp_path: Path) -> None:
|
||
"""A job with a recent timestamp is reported as healthy."""
|
||
now = time.time()
|
||
write_heartbeat_file(tmp_path, "morning-report", timestamp=now - 100, interval=3600)
|
||
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
assert result["stale_count"] == 0
|
||
assert result["healthy_count"] == 1
|
||
assert len(result["jobs"]) == 1
|
||
|
||
job = result["jobs"][0]
|
||
assert job["job"] == "morning-report"
|
||
assert job["healthy"] is True
|
||
assert job["age_secs"] == pytest.approx(100, abs=5)
|
||
assert "OK" in job["message"]
|
||
|
||
def test_stale_job(self, tmp_path: Path) -> None:
|
||
"""A job silent for > 2× its interval is reported as stale."""
|
||
now = time.time()
|
||
# 3 hours ago with 1-hour interval → 3 > 2×1 → stale
|
||
write_heartbeat_file(tmp_path, "hourly-sync", timestamp=now - 10800, interval=3600)
|
||
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
assert result["stale_count"] == 1
|
||
assert result["healthy_count"] == 0
|
||
|
||
job = result["jobs"][0]
|
||
assert job["job"] == "hourly-sync"
|
||
assert job["healthy"] is False
|
||
assert "STALE" in job["message"]
|
||
assert "exceeds 2x threshold" in job["message"]
|
||
|
||
def test_just_within_threshold(self, tmp_path: Path) -> None:
|
||
"""A job at exactly 2× interval is NOT stale (threshold is strictly >)."""
|
||
fake_now = 1700000000.0
|
||
# age = 7200, threshold = 2 * 3600 = 7200 — NOT stale (not strictly greater)
|
||
write_heartbeat_file(tmp_path, "edge-job", timestamp=fake_now - 7200, interval=3600)
|
||
|
||
with patch("time.time", return_value=fake_now):
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
# age_secs == 7200 and threshold = 7200, so not stale (age > threshold is False)
|
||
assert result["stale_count"] == 0
|
||
|
||
def test_stale_threshold_just_over(self, tmp_path: Path) -> None:
|
||
"""A job silent for 2× interval + 1 second is stale."""
|
||
now = time.time()
|
||
# age = 7201, threshold = 7200 — IS stale
|
||
write_heartbeat_file(tmp_path, "edge-job", timestamp=now - 7201, interval=3600)
|
||
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
assert result["stale_count"] == 1
|
||
|
||
def test_empty_dir_returns_safely(self, tmp_path: Path) -> None:
|
||
"""Empty heartbeat directory returns zero jobs without error."""
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
assert result["stale_count"] == 0
|
||
assert result["healthy_count"] == 0
|
||
assert result["jobs"] == []
|
||
assert "checked_at" in result
|
||
|
||
def test_nonexistent_dir_returns_safely(self, tmp_path: Path) -> None:
|
||
"""Non-existent heartbeat dir returns empty result without error."""
|
||
missing = str(tmp_path / "does-not-exist")
|
||
result = check_cron_heartbeats(missing)
|
||
|
||
assert result["stale_count"] == 0
|
||
assert result["healthy_count"] == 0
|
||
assert result["jobs"] == []
|
||
|
||
def test_corrupt_json_handled_gracefully(self, tmp_path: Path) -> None:
|
||
"""Corrupt JSON in a .last file is reported as stale with an error message."""
|
||
bad_file = tmp_path / "broken-job.last"
|
||
bad_file.write_text("{this is not valid json!}", encoding="utf-8")
|
||
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
assert result["stale_count"] == 1
|
||
assert result["healthy_count"] == 0
|
||
|
||
job = result["jobs"][0]
|
||
assert job["job"] == "broken-job"
|
||
assert job["healthy"] is False
|
||
assert "CORRUPT" in job["message"]
|
||
assert job["last_seen"] is None
|
||
|
||
def test_multiple_jobs_mixed(self, tmp_path: Path) -> None:
|
||
"""Mixed healthy and stale jobs are correctly counted."""
|
||
now = time.time()
|
||
|
||
# 3 healthy jobs (recent)
|
||
write_heartbeat_file(tmp_path, "job-a", timestamp=now - 60, interval=3600)
|
||
write_heartbeat_file(tmp_path, "job-b", timestamp=now - 1800, interval=3600)
|
||
write_heartbeat_file(tmp_path, "job-c", timestamp=now - 3599, interval=3600)
|
||
|
||
# 2 stale jobs
|
||
write_heartbeat_file(tmp_path, "job-d", timestamp=now - 10000, interval=3600)
|
||
write_heartbeat_file(tmp_path, "job-e", timestamp=now - 86400, interval=3600)
|
||
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
assert result["stale_count"] == 2
|
||
assert result["healthy_count"] == 3
|
||
assert len(result["jobs"]) == 5
|
||
|
||
stale_jobs = {j["job"] for j in result["jobs"] if not j["healthy"]}
|
||
healthy_jobs = {j["job"] for j in result["jobs"] if j["healthy"]}
|
||
assert stale_jobs == {"job-d", "job-e"}
|
||
assert healthy_jobs == {"job-a", "job-b", "job-c"}
|
||
|
||
def test_result_contains_required_keys(self, tmp_path: Path) -> None:
|
||
"""Result dict contains all required keys."""
|
||
now = time.time()
|
||
write_heartbeat_file(tmp_path, "test-job", timestamp=now - 100, interval=3600)
|
||
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
assert "checked_at" in result
|
||
assert "jobs" in result
|
||
assert "stale_count" in result
|
||
assert "healthy_count" in result
|
||
|
||
job = result["jobs"][0]
|
||
assert "job" in job
|
||
assert "healthy" in job
|
||
assert "age_secs" in job
|
||
assert "interval" in job
|
||
assert "last_seen" in job
|
||
assert "message" in job
|
||
|
||
def test_job_last_seen_is_iso_timestamp(self, tmp_path: Path) -> None:
|
||
"""last_seen field is a valid ISO 8601 timestamp string."""
|
||
from datetime import datetime
|
||
now = time.time()
|
||
write_heartbeat_file(tmp_path, "ts-job", timestamp=now - 100, interval=3600)
|
||
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
job = result["jobs"][0]
|
||
|
||
# Should be parseable as an ISO timestamp
|
||
assert job["last_seen"] is not None
|
||
dt = datetime.fromisoformat(job["last_seen"])
|
||
assert dt is not None
|
||
|
||
def test_checked_at_is_iso_timestamp(self, tmp_path: Path) -> None:
|
||
"""checked_at is a valid ISO 8601 timestamp string."""
|
||
from datetime import datetime
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
dt = datetime.fromisoformat(result["checked_at"])
|
||
assert dt is not None
|
||
|
||
def test_custom_interval_applied(self, tmp_path: Path) -> None:
|
||
"""Custom interval (e.g. daily) is respected for stale detection."""
|
||
now = time.time()
|
||
# 25 hours ago with 12-hour interval → 25 > 2×12 = 24 → stale
|
||
write_heartbeat_file(tmp_path, "daily-job", timestamp=now - 90000, interval=43200)
|
||
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
assert result["stale_count"] == 1
|
||
job = result["jobs"][0]
|
||
assert job["interval"] == 43200
|
||
assert not job["healthy"]
|
||
|
||
def test_custom_interval_healthy(self, tmp_path: Path) -> None:
|
||
"""Job within 2× custom interval is healthy."""
|
||
now = time.time()
|
||
# 23 hours ago with 12-hour interval → 23 < 2×12 = 24 → healthy
|
||
write_heartbeat_file(tmp_path, "daily-job", timestamp=now - 82800, interval=43200)
|
||
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
assert result["stale_count"] == 0
|
||
assert result["healthy_count"] == 1
|
||
|
||
def test_deterministic_with_mocked_time(self, tmp_path: Path) -> None:
|
||
"""Test with mocked time.time() for fully deterministic assertion."""
|
||
fake_now = 1700000000.0
|
||
|
||
write_heartbeat_file(tmp_path, "frozen-job", timestamp=fake_now - 500, interval=3600)
|
||
|
||
with patch("time.time", return_value=fake_now):
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
job = result["jobs"][0]
|
||
# age should be exactly 500s
|
||
assert job["age_secs"] == pytest.approx(500.0, abs=0.01)
|
||
assert job["healthy"] is True # 500 < 7200
|
||
|
||
def test_stale_with_mocked_time(self, tmp_path: Path) -> None:
|
||
"""Stale detection with mocked time is exact."""
|
||
fake_now = 1700000000.0
|
||
|
||
# 8000s ago with 3600s interval → 8000 > 7200 → stale
|
||
write_heartbeat_file(tmp_path, "frozen-stale", timestamp=fake_now - 8000, interval=3600)
|
||
|
||
with patch("time.time", return_value=fake_now):
|
||
result = check_cron_heartbeats(str(tmp_path))
|
||
|
||
job = result["jobs"][0]
|
||
assert job["age_secs"] == pytest.approx(8000.0, abs=0.01)
|
||
assert job["healthy"] is False
|
||
|
||
|
||
class TestWriteAlert:
|
||
|
||
def test_alert_file_created(self, tmp_path: Path) -> None:
|
||
"""write_alert creates an alert file in the alerts subdirectory."""
|
||
job_info = {
|
||
"job": "test-job",
|
||
"healthy": False,
|
||
"age_secs": 8000.0,
|
||
"interval": 3600,
|
||
"last_seen": "2024-01-01T00:00:00+00:00",
|
||
"message": "STALE (last 8000s ago, interval 3600s — exceeds 2x threshold of 7200s)",
|
||
}
|
||
write_alert(str(tmp_path), job_info)
|
||
|
||
alert_file = tmp_path / "alerts" / "test-job.alert"
|
||
assert alert_file.exists()
|
||
|
||
def test_alert_file_content(self, tmp_path: Path) -> None:
|
||
"""Alert file contains correct JSON fields."""
|
||
job_info = {
|
||
"job": "my-job",
|
||
"healthy": False,
|
||
"age_secs": 9000.0,
|
||
"interval": 3600,
|
||
"last_seen": "2024-06-01T12:00:00+00:00",
|
||
"message": "STALE",
|
||
}
|
||
write_alert(str(tmp_path), job_info)
|
||
|
||
alert_file = tmp_path / "alerts" / "my-job.alert"
|
||
data = json.loads(alert_file.read_text())
|
||
|
||
assert data["alert_level"] == "P1"
|
||
assert data["job"] == "my-job"
|
||
assert data["age_secs"] == 9000.0
|
||
assert data["interval"] == 3600
|
||
assert "detected_at" in data
|
||
|
||
def test_alert_no_partial_files_left(self, tmp_path: Path) -> None:
|
||
"""No temp files remain after a successful write."""
|
||
job_info = {
|
||
"job": "clean-job",
|
||
"healthy": False,
|
||
"age_secs": 8000.0,
|
||
"interval": 3600,
|
||
"last_seen": None,
|
||
"message": "STALE",
|
||
}
|
||
write_alert(str(tmp_path), job_info)
|
||
|
||
alerts_dir = tmp_path / "alerts"
|
||
# Only the .alert file should exist — no .tmp files
|
||
files = list(alerts_dir.iterdir())
|
||
assert len(files) == 1
|
||
assert files[0].suffix == ".alert"
|