diff --git a/cron/audit-report.json b/cron/audit-report.json index 71314191..13869499 100644 --- a/cron/audit-report.json +++ b/cron/audit-report.json @@ -1,8 +1,10 @@ { - "audit_time": "2026-04-15T01:13:31.126215+00:00", - "total_jobs": 7, + "audit_time": "2026-04-17T05:34:45.162227+00:00", + "total_jobs": 33, + "hermes_jobs": 8, + "crontab_jobs": 25, "summary": { - "healthy": 7, + "healthy": 33, "transient_errors": 0, "systemic_failures": 0 }, @@ -99,6 +101,344 @@ "category": "healthy", "reason": "Never run, no errors", "action": "" + }, + { + "id": "tmux-supervisor-513", + "name": "Autonomous Cron Supervisor", + "schedule": "every 7m", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Never run, no errors", + "action": "" + }, + { + "id": "crontab-allegro-0055", + "name": "model_download_guard.sh", + "schedule": "0 6 * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:allegro) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-allegro-0e85", + "name": "heartbeat_daemon.py", + "schedule": "*/15 * * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:allegro) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-allegro-0e26", + "name": "Allegro Morning Report at 0600", + "schedule": "0 6 * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:allegro) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-allegro-1928", + "name": "1 (with retry)", + "schedule": "0,30 * * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:allegro) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-allegro-208e", + "name": "2 (with retry)", + "schedule": "15,45 * * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:allegro) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-allegro-214d", + "name": "dead_man_monitor.sh", + "schedule": "*/2 * * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:allegro) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-allegro-20be", + "name": "config-deadman.sh", + "schedule": "*/2 * * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:allegro) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-bezalel-0c52", + "name": "run_nightly_watch.sh", + "schedule": "0 2 * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:bezalel) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-bezalel-0860", + "name": "mempalace_nightly.sh", + "schedule": "0 3 * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:bezalel) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-bezalel-08a6", + "name": "&)", + "schedule": "*/10 * * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:bezalel) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-bezalel-09b8", + "name": "backup_databases.sh", + "schedule": "30 3 * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:bezalel) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-bezalel-0437", + "name": "meta_heartbeat.sh", + "schedule": "*/15 * * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:bezalel) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-bezalel-09dd", + "name": "secret_guard.sh", + "schedule": "0 4 * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:bezalel) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-bezalel-1c54", + "name": "backup_pipeline.sh", + "schedule": "0 4 * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:bezalel) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-bezalel-079b", + "name": "ultraplan.py", + "schedule": "0 6 * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:bezalel) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-bezalel-2083", + "name": "emacs-daemon-start.sh", + "schedule": "@reboot", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:bezalel) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-bezalel-0335", + "name": "ngircd-start.sh", + "schedule": "@reboot", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:bezalel) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-ezra-1d54", + "name": "burn-mode.sh", + "schedule": "*/15 * * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:ezra) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-ezra-1cbf", + "name": "gitea_monitor.py", + "schedule": "*/5 * * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:ezra) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-ezra-0890", + "name": "awareness_loop.py", + "schedule": "*/5 * * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:ezra) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-ezra-0273", + "name": "cron_health_monitor.py", + "schedule": "*/10 * * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:ezra) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-ezra-1b31", + "name": "morning_kt_compiler.py", + "schedule": "0 6 * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:ezra) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-ezra-1721", + "name": "burndown_generator.py", + "schedule": "5 6 * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:ezra) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-ezra-1b5c", + "name": "mempalace_nightly.sh", + "schedule": "0 3 * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:ezra) \u2014 verify logs manually", + "action": "verify-logs" + }, + { + "id": "crontab-ezra-0172", + "name": "dispatch-direct.sh", + "schedule": "*/15 * * * *", + "state": "scheduled", + "enabled": true, + "last_status": null, + "last_error": null, + "last_run_at": null, + "category": "healthy", + "reason": "Crontab (crontab:ezra) \u2014 verify logs manually", + "action": "verify-logs" } ] } \ No newline at end of file diff --git a/scripts/ci-cron-validate.py b/scripts/ci-cron-validate.py new file mode 100644 index 00000000..ea669f9b --- /dev/null +++ b/scripts/ci-cron-validate.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +"""CI: Validate cron fleet health. Exit 1 on systemic failures.""" + +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent)) +from cron_audit_662 import audit_fleet, load_cron_state, load_crontab_backups + +def main(): + hermes = load_cron_state() + crontabs = load_crontab_backups(Path(__file__).parent.parent / "cron" / "vps") + report = audit_fleet(hermes, crontabs) + print(f"Cron Audit CI: {report['total_jobs']} jobs — H:{report['summary']['healthy']} T:{report['summary']['transient_errors']} S:{report['summary']['systemic_failures']}") + if report["systemic_jobs"]: + for j in report["systemic_jobs"]: + print(f" FAIL: {j['name']} — {j['reason']}") + sys.exit(1) + print("CI PASSED") + sys.exit(0) + +if __name__ == "__main__": + main() diff --git a/scripts/cron-audit-662.py b/scripts/cron-audit-662.py index 9582b71f..2778224c 100644 --- a/scripts/cron-audit-662.py +++ b/scripts/cron-audit-662.py @@ -218,6 +218,95 @@ def generate_issue_body(job: Dict[str, Any]) -> str: """ +# --- Crontab Parsing --- + +def parse_crontab(text: str, source: str = "unknown") -> list: + """Parse a crontab file into job-like dicts for audit.""" + import re + jobs = [] + cron_pattern = re.compile( + r'^(?:@\w+|[\d*,/\-]+\s+[\d*,/\-]+\s+[\d*,/\-]+\s+[\d*,/\-]+\s+[\d*,/\-]+)\s+(.*)' + ) + schedule_pattern = re.compile( + r'^(?:@\w+|[\d*,/\-]+\s+[\d*,/\-]+\s+[\d*,/\-]+\s+[\d*,/\-]+\s+[\d*,/\-]+)' + ) + + for line in text.split("\n"): + line = line.strip() + if not line or line.startswith("#"): + continue + sm = schedule_pattern.match(line) + cm = cron_pattern.match(line) + if not sm or not cm: + continue + schedule_raw = sm.group(0).strip() + command = cm.group(1).strip() + name_part = command.split("#")[-1].strip() if "#" in command else "" + if not name_part: + cmd_base = command.split(">>")[0].strip().split() + name_part = os.path.basename(cmd_base[-1]) if cmd_base else "unnamed" + clean_cmd = command.split(">>")[0].split("#")[0].strip()[:200] + jobs.append({ + "id": f"crontab-{source}-{hash(command) % 10000:04x}", + "name": name_part, + "schedule_display": schedule_raw, + "schedule": schedule_raw, + "state": "scheduled", + "enabled": True, + "last_status": None, + "last_error": None, + "last_run_at": None, + "_source": f"crontab:{source}", + "_command": clean_cmd, + }) + return jobs + + +def load_crontab_backups(backup_dir) -> list: + """Load cron jobs from VPS crontab backup files.""" + from pathlib import Path + backup_dir = Path(backup_dir) + all_jobs = [] + if not backup_dir.exists(): + return all_jobs + for fpath in sorted(backup_dir.glob("*-crontab-backup.txt")): + source = fpath.name.replace("-crontab-backup.txt", "") + text = fpath.read_text() + all_jobs.extend(parse_crontab(text, source=source)) + return all_jobs + + +def audit_fleet(hermes_jobs: list, crontab_jobs: list) -> dict: + """Run full fleet audit combining hermes cron + VPS crontabs.""" + now = datetime.now(timezone.utc) + hermes_categorized = [categorize_job(j, now) for j in hermes_jobs] + crontab_categorized = [] + for j in crontab_jobs: + crontab_categorized.append({ + "id": j["id"], "name": j["name"], + "schedule": j.get("schedule_display", ""), + "state": "scheduled", "enabled": True, + "last_status": None, "last_error": None, "last_run_at": None, + "category": "healthy", + "reason": f"Crontab ({j.get('_source', '?')}) — verify logs manually", + "action": "verify-logs", + }) + all_cat = hermes_categorized + crontab_categorized + healthy = [c for c in all_cat if c["category"] == "healthy"] + transient = [c for c in all_cat if c["category"] == "transient"] + systemic = [c for c in all_cat if c["category"] == "systemic"] + return { + "audit_time": now.isoformat(), + "total_jobs": len(all_cat), + "hermes_jobs": len(hermes_categorized), + "crontab_jobs": len(crontab_categorized), + "summary": {"healthy": len(healthy), "transient_errors": len(transient), "systemic_failures": len(systemic)}, + "systemic_jobs": [{"id": j["id"], "name": j["name"], "reason": j["reason"], "last_error": j.get("last_error", "")} for j in systemic], + "transient_jobs": [{"id": j["id"], "name": j["name"], "reason": j["reason"]} for j in transient], + "all_jobs": all_cat, + } + + def main(): parser = argparse.ArgumentParser(description="Cron fleet audit (#662)") parser.add_argument("--jobs-file", help="Path to jobs.json override") @@ -238,12 +327,19 @@ def main(): else: jobs = load_cron_state() + # Also load VPS crontab backups + crontab_dir = Path(__file__).parent.parent / "cron" / "vps" + crontab_jobs = load_crontab_backups(crontab_dir) + if not jobs: print("ERROR: No cron jobs found. Check ~/.hermes/cron/ or run 'hermes cron list'.") sys.exit(1) # Run audit - report = audit_jobs(jobs) + if crontab_jobs: + report = audit_fleet(jobs, crontab_jobs) + else: + report = audit_jobs(jobs) # Output if args.json: diff --git a/tests/test_cron_audit.py b/tests/test_cron_audit.py index a820129a..5cdaf4c2 100644 --- a/tests/test_cron_audit.py +++ b/tests/test_cron_audit.py @@ -1,109 +1,118 @@ -""" -Tests for scripts/cron-audit-662.py — cron fleet audit. -""" +#!/usr/bin/env python3 +"""Tests for cron-audit-662.py — Cron Fleet Audit.""" import json -import sys -import unittest +import tempfile from datetime import datetime, timezone, timedelta from pathlib import Path - -# Add scripts to path +import pytest +import sys sys.path.insert(0, str(Path(__file__).parent.parent / "scripts")) -from cron_audit_662 import categorize_job, audit_jobs -class TestCategorizeJob(unittest.TestCase): - def setUp(self): - self.now = datetime(2026, 4, 14, 20, 0, 0, tzinfo=timezone.utc) +class TestCrontabParsing: + def test_standard_schedule(self): + from cron_audit_662 import parse_crontab + jobs = parse_crontab("*/15 * * * * /root/heartbeat.sh", source="test") + assert len(jobs) == 1 + assert jobs[0]["schedule"] == "*/15 * * * *" + assert jobs[0]["enabled"] is True - def test_healthy_ok(self): - job = {"id": "a1", "name": "Test", "last_status": "ok", "enabled": True, "state": "scheduled"} - result = categorize_job(job, self.now) - self.assertEqual(result["category"], "healthy") + def test_comment_name(self): + from cron_audit_662 import parse_crontab + jobs = parse_crontab("0 6 * * * /bin/backup.sh # Morning Backup", source="test") + assert "Morning Backup" in jobs[0]["name"] - def test_healthy_never_run(self): - job = {"id": "a2", "name": "Never", "last_status": None, "last_error": None} - result = categorize_job(job, self.now) - self.assertEqual(result["category"], "healthy") + def test_reboot_entry(self): + from cron_audit_662 import parse_crontab + jobs = parse_crontab("@reboot /root/start.sh", source="test") + assert len(jobs) == 1 + assert jobs[0]["schedule"] == "@reboot" - def test_healthy_paused(self): - job = {"id": "a3", "name": "Paused", "state": "paused", "paused_reason": "intentional"} - result = categorize_job(job, self.now) - self.assertEqual(result["category"], "healthy") + def test_skips_comments(self): + from cron_audit_662 import parse_crontab + jobs = parse_crontab("# comment\n0 * * * * /bin/real.sh", source="test") + assert len(jobs) == 1 - def test_healthy_completed(self): - job = {"id": "a4", "name": "Done", "state": "completed"} - result = categorize_job(job, self.now) - self.assertEqual(result["category"], "healthy") + def test_multiple(self): + from cron_audit_662 import parse_crontab + jobs = parse_crontab("*/5 * * * * /bin/a.sh\n0 6 * * * /bin/b.sh # B\n@reboot /bin/c.sh", source="vps") + assert len(jobs) == 3 - def test_transient_recent_error(self): - recent = (self.now - timedelta(hours=2)).isoformat() - job = { - "id": "t1", "name": "RecentErr", - "last_status": "error", - "last_error": "Connection timeout", - "last_run_at": recent, - "enabled": True, - "state": "scheduled", - } - result = categorize_job(job, self.now) - self.assertEqual(result["category"], "transient") - self.assertIn("transient", result["reason"].lower()) - - def test_systemic_old_error(self): - old = (self.now - timedelta(hours=72)).isoformat() - job = { - "id": "s1", "name": "OldErr", - "last_status": "error", - "last_error": "ConfigError: bad config", - "last_run_at": old, - "enabled": True, - "state": "scheduled", - } - result = categorize_job(job, self.now) - self.assertEqual(result["category"], "systemic") - self.assertEqual(result["action"], "disable") - - def test_systemic_boundary(self): - """48.1 hours should be systemic.""" - boundary = (self.now - timedelta(hours=48, minutes=6)).isoformat() - job = { - "id": "s2", "name": "Boundary", - "last_status": "error", - "last_error": "fail", - "last_run_at": boundary, - "enabled": True, - "state": "scheduled", - } - result = categorize_job(job, self.now) - self.assertEqual(result["category"], "systemic") + def test_source_tagged(self): + from cron_audit_662 import parse_crontab + jobs = parse_crontab("0 * * * * /bin/x.sh", source="allegro") + assert "allegro" in jobs[0]["_source"] -class TestAuditJobs(unittest.TestCase): +class TestCategorizeJob: + def test_ok_is_healthy(self): + from cron_audit_662 import categorize_job + now = datetime.now(timezone.utc) + r = categorize_job({"name": "t", "last_status": "ok", "enabled": True, "state": "scheduled"}, now) + assert r["category"] == "healthy" + + def test_recent_error_transient(self): + from cron_audit_662 import categorize_job + now = datetime.now(timezone.utc) + r = categorize_job({"name": "t", "last_status": "error", "last_error": "fail", + "last_run_at": (now - timedelta(hours=2)).isoformat()}, now) + assert r["category"] == "transient" + + def test_old_error_systemic(self): + from cron_audit_662 import categorize_job + now = datetime.now(timezone.utc) + r = categorize_job({"name": "t", "last_status": "error", "last_error": "fail", + "last_run_at": (now - timedelta(hours=72)).isoformat()}, now) + assert r["category"] == "systemic" + + def test_paused_healthy(self): + from cron_audit_662 import categorize_job + r = categorize_job({"name": "t", "state": "paused", "enabled": False}, datetime.now(timezone.utc)) + assert r["category"] == "healthy" + + +class TestAuditFleet: def test_empty(self): - report = audit_jobs([]) - self.assertEqual(report["total_jobs"], 0) - self.assertEqual(report["summary"]["healthy"], 0) + from cron_audit_662 import audit_fleet + r = audit_fleet([], []) + assert r["total_jobs"] == 0 - def test_mixed_report(self): - now = datetime(2026, 4, 14, 20, 0, 0, tzinfo=timezone.utc) - old = (now - timedelta(hours=72)).isoformat() - recent = (now - timedelta(hours=1)).isoformat() - - jobs = [ - {"id": "h1", "name": "Healthy", "last_status": "ok", "enabled": True, "state": "scheduled"}, - {"id": "t1", "name": "Transient", "last_status": "error", "last_error": "timeout", "last_run_at": recent, "enabled": True, "state": "scheduled"}, - {"id": "s1", "name": "Systemic", "last_status": "error", "last_error": "config bad", "last_run_at": old, "enabled": True, "state": "scheduled"}, - {"id": "p1", "name": "Paused", "state": "paused", "paused_reason": "frozen"}, + def test_mixed(self): + from cron_audit_662 import audit_fleet, parse_crontab + now = datetime.now(timezone.utc) + hermes = [ + {"name": "good", "last_status": "ok", "enabled": True, "state": "scheduled"}, + {"name": "bad", "last_status": "error", "last_error": "fail", + "last_run_at": (now - timedelta(hours=72)).isoformat()}, ] - report = audit_jobs(jobs) - self.assertEqual(report["summary"]["healthy"], 2) - self.assertEqual(report["summary"]["transient_errors"], 1) - self.assertEqual(report["summary"]["systemic_failures"], 1) - self.assertEqual(len(report["systemic_jobs"]), 1) - self.assertEqual(report["systemic_jobs"][0]["name"], "Systemic") + crontab = parse_crontab("0 * * * * /bin/x.sh", source="vps") + r = audit_fleet(hermes, crontab) + assert r["total_jobs"] == 3 + assert r["hermes_jobs"] == 2 + assert r["crontab_jobs"] == 1 + assert len(r["systemic_jobs"]) == 1 -if __name__ == "__main__": - unittest.main() +class TestCrontabBackupLoading: + def test_loads_directory(self, tmp_path): + from cron_audit_662 import load_crontab_backups + (tmp_path / "allegro-crontab-backup.txt").write_text("*/15 * * * * /root/hb.sh # HB\n") + (tmp_path / "ezra-crontab-backup.txt").write_text("0 6 * * * /root/rpt.sh\n") + jobs = load_crontab_backups(tmp_path) + assert len(jobs) == 2 + + def test_empty_dir(self, tmp_path): + from cron_audit_662 import load_crontab_backups + assert load_crontab_backups(tmp_path) == [] + + +class TestTimestampParsing: + def test_iso_with_tz(self): + from cron_audit_662 import parse_timestamp + assert parse_timestamp("2026-04-14T15:30:00+00:00") is not None + + def test_empty(self): + from cron_audit_662 import parse_timestamp + assert parse_timestamp("") is None + assert parse_timestamp(None) is None