Files
hermes-agent/scripts/cron_audit.py
Alexander Whitestone 6b0fca6944 feat: cron job audit and cleanup script
Closes #890

Finds dead cron jobs (zero completions, stale) and provides
--disable and --delete actions to clean them up.
2026-04-17 05:00:06 +00:00

182 lines
5.6 KiB
Python

#!/usr/bin/env python3
"""
cron-audit — Audit and clean up dead cron jobs.
Finds jobs with zero completions, low success rates, or stale schedules.
Can disable or delete dead jobs.
Usage:
python scripts/cron_audit.py # Show dead jobs
python scripts/cron_audit.py --disable # Disable dead jobs
python scripts/cron_audit.py --delete # Delete dead jobs
python scripts/cron_audit.py --threshold 0 # Jobs with 0 completions
python scripts/cron_audit.py --older-than 7d # Jobs with no runs in 7 days
"""
import argparse
import json
import sys
from pathlib import Path
from typing import Any, Dict, List
HERMES_HOME = Path.home() / ".hermes"
JOBS_FILE = HERMES_HOME / "cron" / "jobs.json"
def load_jobs() -> List[Dict[str, Any]]:
"""Load cron jobs from jobs.json."""
if not JOBS_FILE.exists():
print(f"Error: {JOBS_FILE} not found")
return []
with open(JOBS_FILE) as f:
data = json.load(f)
return data.get("jobs", [])
def save_jobs(jobs: List[Dict[str, Any]]):
"""Save jobs back to jobs.json."""
JOBS_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(JOBS_FILE, "r") as f:
data = json.load(f)
data["jobs"] = jobs
with open(JOBS_FILE, "w") as f:
json.dump(data, f, indent=2)
def find_dead_jobs(
jobs: List[Dict[str, Any]],
completion_threshold: int = 0,
) -> List[Dict[str, Any]]:
"""Find jobs with completions at or below threshold."""
dead = []
for job in jobs:
repeat = job.get("repeat", {})
completed = repeat.get("completed", 0)
if completed <= completion_threshold:
dead.append(job)
return dead
def find_stale_jobs(
jobs: List[Dict[str, Any]],
max_age_hours: float = 168, # 7 days
) -> List[Dict[str, Any]]:
"""Find jobs that haven't run in max_age_hours."""
import time
from datetime import datetime, timezone
stale = []
now = time.time()
for job in jobs:
last_run = job.get("last_run_at")
if not last_run:
# Never ran — check creation time
created = job.get("created_at")
if created:
try:
dt = datetime.fromisoformat(created.replace("Z", "+00:00"))
age_hours = (now - dt.timestamp()) / 3600
if age_hours > max_age_hours:
stale.append(job)
except Exception:
stale.append(job)
else:
stale.append(job)
else:
try:
dt = datetime.fromisoformat(last_run.replace("Z", "+00:00"))
age_hours = (now - dt.timestamp()) / 3600
if age_hours > max_age_hours:
stale.append(job)
except Exception:
pass
return stale
def format_job(job: Dict[str, Any]) -> str:
"""Format a job for display."""
name = job.get("name", job.get("id", "?"))
schedule = job.get("schedule_display", "?")
repeat = job.get("repeat", {})
completed = repeat.get("completed", 0)
times = repeat.get("times")
enabled = job.get("enabled", True)
state = job.get("state", "unknown")
last_run = job.get("last_run_at", "never")
status = "enabled" if enabled else "disabled"
if state == "paused":
status = "paused"
repeat_str = f"{completed}/{times}" if times else f"{completed}/∞"
return f" {name:40s} | {schedule:20s} | done: {repeat_str:8s} | {status}"
def main():
parser = argparse.ArgumentParser(description="Audit and clean up dead cron jobs")
parser.add_argument("--disable", action="store_true", help="Disable dead jobs")
parser.add_argument("--delete", action="store_true", help="Delete dead jobs")
parser.add_argument("--threshold", type=int, default=0, help="Completion threshold (default: 0)")
parser.add_argument("--older-than", type=str, help="Find jobs with no runs in N days (e.g., 7d)")
parser.add_argument("--dry-run", action="store_true", help="Show what would change")
args = parser.parse_args()
jobs = load_jobs()
if not jobs:
print("No jobs found.")
return
print(f"Total jobs: {len(jobs)}")
# Find dead jobs
dead = find_dead_jobs(jobs, args.threshold)
print(f"Jobs with <= {args.threshold} completions: {len(dead)}")
if args.older_than:
days = int(args.older_than.rstrip("d"))
stale = find_stale_jobs(jobs, max_age_hours=days * 24)
print(f"Jobs with no runs in {days} days: {len(stale)}")
dead = list({j["id"]: j for j in dead + stale}.values())
if not dead:
print("No dead jobs found.")
return
print(f"\nDead jobs ({len(dead)}):")
for job in dead:
print(format_job(job))
if args.disable:
if args.dry_run:
print(f"\nDRY RUN: Would disable {len(dead)} jobs")
return
job_ids = {j["id"] for j in dead}
for job in jobs:
if job["id"] in job_ids:
job["enabled"] = False
job["state"] = "disabled"
save_jobs(jobs)
print(f"\nDisabled {len(dead)} jobs.")
elif args.delete:
if args.dry_run:
print(f"\nDRY RUN: Would delete {len(dead)} jobs")
return
job_ids = {j["id"] for j in dead}
jobs = [j for j in jobs if j["id"] not in job_ids]
save_jobs(jobs)
print(f"\nDeleted {len(dead)} jobs.")
else:
print(f"\nUse --disable or --delete to take action. Add --dry-run to preview.")
if __name__ == "__main__":
main()