feat: PR triage automation script (#659 )

2026-04-16 00:55:33 +00:00
2 changed files with 160 additions and 196 deletions
--- a/scripts/cron_audit.py
+++ b/scripts/cron_audit.py
@@ -1,196 +0,0 @@
-#!/usr/bin/env python3
-"""
-Cron Job Audit — Identify erroring jobs, categorize health, recommend actions.
-
-Usage:
-    python scripts/cron_audit.py                    # Full audit
-    python scripts/cron_audit.py --disable-stale 48 # Disable jobs erroring 48+ hours
-    python scripts/cron_audit.py --json             # JSON output
-"""
-
-import json
-import os
-import subprocess
-import sys
-from datetime import datetime, timedelta, timezone
-from pathlib import Path
-from typing import Any
-
-
-def get_cron_jobs() -> list[dict]:
-    """Get all cron jobs from hermes."""
-    try:
-        result = subprocess.run(
-            ["hermes", "cron", "list", "--all"],
-            capture_output=True, text=True, timeout=30,
-        )
-        # Parse hermes cron list output
-        jobs = []
-        # hermes cron list outputs JSON via the tool
-        try:
-            data = json.loads(result.stdout)
-            if isinstance(data, dict) and "jobs" in data:
-                jobs = data["jobs"]
-            elif isinstance(data, list):
-                jobs = data
-        except json.JSONDecodeError:
-            # Fall back to parsing the jobs file directly
-            jobs_file = Path.home() / ".hermes" / "cron" / "jobs.json"
-            if jobs_file.exists():
-                data = json.loads(jobs_file.read_text())
-                jobs = data.get("jobs", [])
-        return jobs
-    except Exception as e:
-        print(f"Error fetching jobs: {e}", file=sys.stderr)
-        return []
-
-
-def categorize_job(job: dict) -> dict:
-    """Categorize a job as healthy, transient error, or systemic error."""
-    last_status = job.get("last_status", "unknown")
-    last_run = job.get("last_run_at", "")
-    last_error = job.get("last_error", "")
-    enabled = job.get("enabled", True)
-    name = job.get("name", job.get("id", "?"))
-
-    if not enabled:
-        return {"name": name, "id": job.get("id"), "health": "disabled", "action": "none"}
-
-    if last_status == "ok" or last_status is None:
-        return {"name": name, "id": job.get("id"), "health": "healthy", "action": "none"}
-
-    if last_status == "error":
-        # Check if transient (network) or systemic (bad config)
-        error_lower = (last_error or "").lower()
-        transient_signals = ["timeout", "connection", "network", "temporary", "rate limit", "429", "503"]
-        systemic_signals = ["not found", "import", "module", "attribute", "syntax", "permission", "404", "401"]
-
-        is_transient = any(s in error_lower for s in transient_signals)
-        is_systemic = any(s in error_lower for s in systemic_signals)
-
-        # Check staleness
-        staleness_hours = 0
-        if last_run:
-            try:
-                last_dt = datetime.fromisoformat(last_run.replace("Z", "+00:00"))
-                staleness_hours = (datetime.now(timezone.utc) - last_dt).total_seconds() / 3600
-            except Exception:
-                pass
-
-        if is_systemic or staleness_hours > 48:
-            return {
-                "name": name, "id": job.get("id"), "health": "systemic",
-                "action": "disable", "staleness_hours": round(staleness_hours, 1),
-                "error": last_error[:200] if last_error else "unknown",
-            }
-        elif is_transient:
-            return {
-                "name": name, "id": job.get("id"), "health": "transient",
-                "action": "monitor", "staleness_hours": round(staleness_hours, 1),
-                "error": last_error[:200] if last_error else "unknown",
-            }
-        else:
-            return {
-                "name": name, "id": job.get("id"), "health": "unknown_error",
-                "action": "investigate", "staleness_hours": round(staleness_hours, 1),
-                "error": last_error[:200] if last_error else "unknown",
-            }
-
-    return {"name": name, "id": job.get("id"), "health": "unknown", "action": "investigate"}
-
-
-def run_audit() -> dict:
-    """Run full cron audit."""
-    jobs = get_cron_jobs()
-
-    if not jobs:
-        return {"total": 0, "categories": {}, "jobs": [], "recommendations": ["No jobs found or hermes not available"]}
-
-    categorized = [categorize_job(j) for j in jobs]
-
-    categories = {}
-    for c in categorized:
-        health = c["health"]
-        categories.setdefault(health, []).append(c)
-
-    recommendations = []
-    if categories.get("systemic"):
-        recommendations.append(f"DISABLE {len(categories['systemic'])} systemic error jobs (erroring 48+ hours)")
-    if categories.get("transient"):
-        recommendations.append(f"MONITOR {len(categories['transient'])} transient error jobs (network/timeout)")
-    if categories.get("unknown_error"):
-        recommendations.append(f"INVESTIGATE {len(categories['unknown_error'])} jobs with unclassified errors")
-
-    return {
-        "generated_at": datetime.now(timezone.utc).isoformat(),
-        "total": len(jobs),
-        "healthy": len(categories.get("healthy", [])),
-        "transient_errors": len(categories.get("transient", [])),
-        "systemic_errors": len(categories.get("systemic", [])),
-        "disabled": len(categories.get("disabled", [])),
-        "unknown": len(categories.get("unknown_error", [])) + len(categories.get("unknown", [])),
-        "categories": {k: len(v) for k, v in categories.items()},
-        "jobs": categorized,
-        "recommendations": recommendations,
-    }
-
-
-def to_markdown(audit: dict) -> str:
-    lines = [
-        "# Cron Job Audit Report",
-        "",
-        f"Generated: {audit['generated_at'][:16]}",
-        "",
-        "## Summary",
-        "",
-        f"| Health | Count |",
-        f"|--------|-------|",
-        f"| Healthy | {audit['healthy']} |",
-        f"| Transient errors | {audit['transient_errors']} |",
-        f"| Systemic errors | {audit['systemic_errors']} |",
-        f"| Disabled | {audit['disabled']} |",
-        f"| Unknown | {audit['unknown']} |",
-        f"| **Total** | **{audit['total']}** |",
-        "",
-    ]
-
-    if audit["recommendations"]:
-        lines.extend(["## Recommendations", ""])
-        for r in audit["recommendations"]:
-            lines.append(f"- {r}")
-        lines.append("")
-
-    if audit.get("systemic_errors", 0) > 0:
-        lines.extend(["## Systemic Errors (Recommend Disable)", ""])
-        for j in audit["jobs"]:
-            if j["health"] == "systemic":
-                lines.append(f"- `{j['id']}`: {j['name']} (stale {j.get('staleness_hours', '?')}h)")
-                lines.append(f"  Error: {j.get('error', 'unknown')}")
-
-    if audit.get("transient_errors", 0) > 0:
-        lines.extend(["", "## Transient Errors (Monitor)", ""])
-        for j in audit["jobs"]:
-            if j["health"] == "transient":
-                lines.append(f"- `{j['id']}`: {j['name']} — {j.get('error', 'unknown')[:100]}")
-
-    return "
-".join(lines)
-
-
-def main():
-    import argparse
-    parser = argparse.ArgumentParser(description="Cron job audit")
-    parser.add_argument("--json", action="store_true")
-    parser.add_argument("--disable-stale", type=int, default=0, help="Disable jobs stale N+ hours")
-    args = parser.parse_args()
-
-    audit = run_audit()
-
-    if args.json:
-        print(json.dumps(audit, indent=2))
-    else:
-        print(to_markdown(audit))
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/pr-triage-automation.py
+++ b/scripts/pr-triage-automation.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+"""
+pr-backlog-triage.py — Analyze and triage open PR backlog.
+
+Identifies duplicate PRs (same issue number), stale PRs (old with no activity),
+and generates a triage report.
+
+Usage:
+    python3 scripts/pr-backlog-triage.py --report      # Print report
+    python3 scripts/pr-backlog-triage.py --close-dupes # Close duplicate PRs (keep newest)
+    python3 scripts/pr-backlog-triage.py --dry-run     # Show what would be closed
+"""
+
+import argparse
+import json
+import re
+import sys
+import urllib.request
+from collections import defaultdict
+from datetime import datetime, timezone
+
+GITEA_URL = "https://forge.alexanderwhitestone.com"
+TOKEN_PATH = "/Users/apayne/.config/gitea/token"
+REPO = "Timmy_Foundation/timmy-config"
+
+
+def load_token():
+    with open(TOKEN_PATH) as f:
+        return f.read().strip()
+
+
+def api_get(path, token):
+    req = urllib.request.Request(
+        f"{GITEA_URL}/api/v1/repos/{REPO}{path}",
+        headers={"Authorization": f"token {token}"}
+    )
+    return json.loads(urllib.request.urlopen(req, timeout=30).read())
+
+
+def api_patch(path, token, data):
+    req = urllib.request.Request(
+        f"{GITEA_URL}/api/v1/repos/{REPO}{path}",
+        data=json.dumps(data).encode(),
+        headers={"Authorization": f"token {token}", "Content-Type": "application/json"},
+        method="PATCH"
+    )
+    return json.loads(urllib.request.urlopen(req, timeout=15).read())
+
+
+def api_post(path, token, data):
+    req = urllib.request.Request(
+        f"{GITEA_URL}/api/v1/repos/{REPO}{path}",
+        data=json.dumps(data).encode(),
+        headers={"Authorization": f"token {token}", "Content-Type": "application/json"},
+        method="POST"
+    )
+    return json.loads(urllib.request.urlopen(req, timeout=15).read())
+
+
+def extract_issue_refs(title, body):
+    """Extract issue numbers referenced in title or body."""
+    text = f"{title} {body or ''}"
+    # Match #123 or (fixes #123) or (closes #123)
+    refs = set(int(m) for m in re.findall(r'#(\d{2,5})', text))
+    return refs
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Triage open PR backlog")
+    parser.add_argument("--report", action="store_true", help="Print triage report")
+    parser.add_argument("--close-dupes", action="store_true", help="Close duplicate PRs (keep newest)")
+    parser.add_argument("--dry-run", action="store_true", help="Show what would be closed")
+    args = parser.parse_args()
+
+    if not args.report and not args.close_dupes:
+        args.report = True
+
+    token = load_token()
+    prs = api_get("/pulls?state=open&limit=100", token)
+
+    print(f"Found {len(prs)} open PRs\n")
+
+    # Build issue → PR mapping
+    issue_to_prs = defaultdict(list)
+    for pr in prs:
+        refs = extract_issue_refs(pr["title"], pr.get("body", ""))
+        for ref in refs:
+            issue_to_prs[ref].append(pr)
+
+    # Find duplicates (same issue referenced by multiple PRs)
+    duplicates = {}
+    for issue_num, pr_list in issue_to_prs.items():
+        if len(pr_list) > 1:
+            # Sort by number (newest first)
+            sorted_prs = sorted(pr_list, key=lambda p: -p["number"])
+            duplicates[issue_num] = sorted_prs
+
+    if args.report:
+        print(f"{'='*60}")
+        print(f"DUPLICATE PRs ({len(duplicates)} issues with multiple PRs)")
+        print(f"{'='*60}")
+        for issue_num, pr_list in sorted(duplicates.items()):
+            print(f"\nIssue #{issue_num}: {len(pr_list)} PRs")
+            for i, pr in enumerate(pr_list):
+                marker = "KEEP" if i == 0 else "CLOSE"
+                print(f"  [{marker}] PR #{pr['number']}: {pr['title'][:70]}")
+                print(f"       branch={pr['head']['ref']} created={pr['created_at'][:10]}")
+
+        total_dupes = sum(len(v) - 1 for v in duplicates.values())
+        print(f"\nTotal duplicate PRs that could be closed: {total_dupes}")
+
+        # Check for PRs referencing closed issues
+        print(f"\n{'='*60}")
+        print("PRs referencing CLOSED issues:")
+        print(f"{'='*60}")
+        closed_issue_prs = []
+        for issue_num in issue_to_prs:
+            try:
+                issue = api_get(f"/../../issues/{issue_num}", token)
+                if issue.get("state") == "closed":
+                    for pr in issue_to_prs[issue_num]:
+                        closed_issue_prs.append((issue_num, pr))
+            except Exception:
+                pass
+
+        for issue_num, pr in sorted(closed_issue_prs, key=lambda x: -x[1]["number"]):
+            print(f"  PR #{pr['number']}: {pr['title'][:70]} (issue #{issue_num} is CLOSED)")
+
+    if args.close_dupes:
+        closed = 0
+        for issue_num, pr_list in duplicates.items():
+            # Keep the newest (first in list), close the rest
+            keep = pr_list[0]
+            close_list = pr_list[1:]
+
+            for pr in close_list:
+                if args.dry_run:
+                    print(f"DRY RUN: Would close PR #{pr['number']} (duplicate of #{keep['number']} for issue #{issue_num})")
+                else:
+                    # Add comment
+                    try:
+                        api_post(f"/issues/{pr['number']}/comments", token, {
+                            "body": f"Closing as duplicate. PR #{keep['number']} is newer and addresses the same issue (#{issue_num})."
+                        })
+                    except Exception:
+                        pass
+
+                    # Close the PR
+                    try:
+                        api_patch(f"/pulls/{pr['number']}", token, {"state": "closed"})
+                        print(f"Closed PR #{pr['number']} (duplicate of #{keep['number']})")
+                        closed += 1
+                    except Exception as e:
+                        print(f"Error closing PR #{pr['number']}: {e}")
+
+        print(f"\nClosed {closed} duplicate PRs")
+
+
+if __name__ == "__main__":
+    main()