280 lines
9.1 KiB
Python
280 lines
9.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Cron Audit & Cleanup — find and remove dead/stale cron jobs.
|
|
|
|
Identifies jobs that waste scheduler cycles:
|
|
- Dead jobs: zero completions despite running for days
|
|
- Stale jobs: no successful run in N days
|
|
- Error jobs: high error ratio
|
|
|
|
Usage:
|
|
# Show dead jobs (zero completions)
|
|
python3 scripts/cron_audit.py
|
|
|
|
# Show stale jobs (no runs in 7 days)
|
|
python3 scripts/cron_audit.py --older-than 7
|
|
|
|
# Show all jobs with status
|
|
python3 scripts/cron_audit.py --all
|
|
|
|
# Disable dead jobs (sets enabled=False, state=paused)
|
|
python3 scripts/cron_audit.py --disable
|
|
|
|
# Delete dead jobs permanently
|
|
python3 scripts/cron_audit.py --delete
|
|
|
|
# Custom threshold: dead = 0 completions after N days
|
|
python3 scripts/cron_audit.py --min-age 3
|
|
|
|
# JSON output
|
|
python3 scripts/cron_audit.py --json
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
# hermes cron is a sibling module
|
|
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
|
|
|
try:
|
|
from cron.jobs import (
|
|
list_jobs,
|
|
load_jobs,
|
|
save_jobs,
|
|
update_job,
|
|
remove_job,
|
|
)
|
|
except ImportError:
|
|
# Fallback: direct file access
|
|
JOBS_FILE = Path.home() / ".hermes" / "cron" / "jobs.json"
|
|
|
|
def load_jobs() -> list:
|
|
if JOBS_FILE.exists():
|
|
return json.loads(JOBS_FILE.read_text())
|
|
return []
|
|
|
|
def save_jobs(jobs: list):
|
|
JOBS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
JOBS_FILE.write_text(json.dumps(jobs, indent=2))
|
|
|
|
def list_jobs(include_disabled=False):
|
|
jobs = load_jobs()
|
|
if not include_disabled:
|
|
jobs = [j for j in jobs if j.get("enabled", True)]
|
|
return jobs
|
|
|
|
def update_job(job_id, updates):
|
|
jobs = load_jobs()
|
|
for job in jobs:
|
|
if job["id"] == job_id:
|
|
job.update(updates)
|
|
save_jobs(jobs)
|
|
return job
|
|
return None
|
|
|
|
def remove_job(job_id):
|
|
jobs = load_jobs()
|
|
original = len(jobs)
|
|
jobs = [j for j in jobs if j["id"] != job_id]
|
|
if len(jobs) < original:
|
|
save_jobs(jobs)
|
|
return True
|
|
return False
|
|
|
|
|
|
# ── Analysis ──────────────────────────────────────────────────────────────
|
|
|
|
def parse_schedule_display(job: dict) -> str:
|
|
"""Get human-readable schedule from job."""
|
|
sched = job.get("schedule", {})
|
|
if isinstance(sched, dict):
|
|
return sched.get("display", job.get("schedule_display", "?"))
|
|
return str(sched)
|
|
|
|
|
|
def get_last_run_age(job: dict) -> Optional[timedelta]:
|
|
"""Get time since last run."""
|
|
last = job.get("last_run_at")
|
|
if not last:
|
|
return None
|
|
try:
|
|
if isinstance(last, str):
|
|
last_dt = datetime.fromisoformat(last.replace("Z", "+00:00"))
|
|
else:
|
|
return None
|
|
return datetime.now(timezone.utc) - last_dt
|
|
except (ValueError, TypeError):
|
|
return None
|
|
|
|
|
|
def analyze_jobs(min_age_days: int = 0, stale_days: int = 0) -> Dict[str, List[dict]]:
|
|
"""Analyze all jobs and categorize them.
|
|
|
|
Returns dict with keys: dead, stale, healthy, disabled, completed.
|
|
"""
|
|
all_jobs = list_jobs(include_disabled=True)
|
|
now = datetime.now(timezone.utc)
|
|
|
|
result = {"dead": [], "stale": [], "healthy": [], "disabled": [], "completed": []}
|
|
|
|
for job in all_jobs:
|
|
job_id = job.get("id", "?")
|
|
name = job.get("name", job_id)
|
|
enabled = job.get("enabled", True)
|
|
state = job.get("state", "scheduled")
|
|
completed = job.get("repeat", {}).get("completed", 0)
|
|
schedule = parse_schedule_display(job)
|
|
last_run = job.get("last_run_at")
|
|
last_status = job.get("last_status", "never")
|
|
last_error = job.get("last_error")
|
|
created = job.get("created_at", "")
|
|
|
|
# Calculate age
|
|
age_days = 0
|
|
if created:
|
|
try:
|
|
created_dt = datetime.fromisoformat(created.replace("Z", "+00:00"))
|
|
age_days = (now - created_dt).days
|
|
except (ValueError, TypeError):
|
|
pass
|
|
|
|
last_age = get_last_run_age(job)
|
|
|
|
entry = {
|
|
"id": job_id,
|
|
"name": name,
|
|
"schedule": schedule,
|
|
"enabled": enabled,
|
|
"state": state,
|
|
"completed": completed,
|
|
"last_run_at": last_run,
|
|
"last_status": last_status,
|
|
"last_error": last_error,
|
|
"age_days": age_days,
|
|
"last_run_age_days": last_age.days if last_age else None,
|
|
}
|
|
|
|
if not enabled or state == "completed":
|
|
result["disabled"].append(entry)
|
|
elif completed == 0 and age_days >= min_age_days:
|
|
result["dead"].append(entry)
|
|
elif stale_days > 0 and last_age and last_age.days >= stale_days:
|
|
result["stale"].append(entry)
|
|
else:
|
|
result["healthy"].append(entry)
|
|
|
|
return result
|
|
|
|
|
|
# ── Actions ───────────────────────────────────────────────────────────────
|
|
|
|
def disable_jobs(jobs: List[dict]) -> int:
|
|
"""Disable dead/stale jobs (pause them)."""
|
|
count = 0
|
|
for j in jobs:
|
|
result = update_job(j["id"], {"enabled": False, "state": "paused"})
|
|
if result:
|
|
count += 1
|
|
print(f" DISABLED: {j['name']} ({j['schedule']})")
|
|
return count
|
|
|
|
|
|
def delete_jobs(jobs: List[dict]) -> int:
|
|
"""Permanently delete jobs."""
|
|
count = 0
|
|
for j in jobs:
|
|
if remove_job(j["id"]):
|
|
count += 1
|
|
print(f" DELETED: {j['name']} ({j['schedule']})")
|
|
return count
|
|
|
|
|
|
# ── Report ────────────────────────────────────────────────────────────────
|
|
|
|
def print_table(jobs: List[dict], title: str):
|
|
"""Print a table of jobs."""
|
|
if not jobs:
|
|
return
|
|
print(f"
|
|
{title} ({len(jobs)}):")
|
|
print(f" {'Name':<35} {'Schedule':<15} {'Completed':<10} {'Last Run':<15} {'Status'}")
|
|
print(f" {'-'*35} {'-'*15} {'-'*10} {'-'*15} {'-'*10}")
|
|
for j in jobs:
|
|
last_run = "never"
|
|
if j["last_run_age_days"] is not None:
|
|
last_run = f"{j['last_run_age_days']}d ago"
|
|
elif j["last_run_at"]:
|
|
last_run = j["last_run_at"][:10]
|
|
status = j["last_status"] or "never"
|
|
print(f" {j['name']:<35} {j['schedule']:<15} {j['completed']:<10} {last_run:<15} {status}")
|
|
|
|
|
|
# ── CLI ───────────────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Cron audit & cleanup")
|
|
parser.add_argument("--all", action="store_true", help="Show all jobs including healthy")
|
|
parser.add_argument("--older-than", type=int, default=0,
|
|
help="Stale threshold in days (jobs with no runs in N days)")
|
|
parser.add_argument("--min-age", type=int, default=0,
|
|
help="Minimum job age in days to be considered dead")
|
|
parser.add_argument("--disable", action="store_true", help="Disable dead jobs")
|
|
parser.add_argument("--delete", action="store_true", help="Delete dead jobs permanently")
|
|
parser.add_argument("--json", dest="json_output", action="store_true", help="JSON output")
|
|
args = parser.parse_args()
|
|
|
|
analysis = analyze_jobs(min_age_days=args.min_age, stale_days=args.older_than)
|
|
|
|
if args.json_output:
|
|
print(json.dumps(analysis, indent=2))
|
|
return
|
|
|
|
# Summary
|
|
total = sum(len(v) for v in analysis.values())
|
|
print(f"Cron Audit — {total} total jobs")
|
|
print(f" Healthy: {len(analysis['healthy'])}")
|
|
print(f" Dead: {len(analysis['dead'])}")
|
|
print(f" Stale: {len(analysis['stale'])}")
|
|
print(f" Disabled: {len(analysis['disabled'])}")
|
|
print(f" Completed: {len(analysis['completed'])}")
|
|
|
|
if args.all:
|
|
print_table(analysis["healthy"], "HEALTHY")
|
|
print_table(analysis["dead"], "DEAD (zero completions)")
|
|
print_table(analysis["stale"], "STALE (no recent runs)")
|
|
|
|
if not args.disable and not args.delete:
|
|
if analysis["dead"] or analysis["stale"]:
|
|
print(f"
|
|
To clean up: --disable (pause) or --delete (permanent)")
|
|
return
|
|
|
|
targets = analysis["dead"] + analysis["stale"]
|
|
if not targets:
|
|
print("
|
|
Nothing to clean up.")
|
|
return
|
|
|
|
if args.delete:
|
|
confirm = input(f"
|
|
Delete {len(targets)} jobs permanently? [y/N] ")
|
|
if confirm.lower() != "y":
|
|
print("Aborted.")
|
|
return
|
|
count = delete_jobs(targets)
|
|
print(f"
|
|
Deleted {count} jobs.")
|
|
elif args.disable:
|
|
count = disable_jobs(targets)
|
|
print(f"
|
|
Disabled {count} jobs.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|