#!/usr/bin/env python3 """ pr_triage.py — Automated PR triage with optional auto-merge (Issue #659). Fetches open PRs, categorizes, detects duplicates/stale refs, generates report, and optionally auto-merges safe training-data PRs. Usage: python3 scripts/pr_triage.py Timmy_Foundation/timmy-config python3 scripts/pr_triage.py Timmy_Foundation/timmy-config --auto-merge python3 scripts/pr_triage.py Timmy_Foundation/hermes-agent --json python3 scripts/pr_triage.py --org Timmy_Foundation --auto-merge python3 scripts/pr_triage.py --file-as-issue Timmy_Foundation/timmy-config """ import argparse import json import os import re import sys import time from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Optional, Tuple from urllib.request import Request, urlopen from urllib.error import HTTPError GITEA_URL = "https://forge.alexanderwhitestone.com" ISSUE_RE = re.compile(r"#(\d+)") # Auto-merge: only these categories are "safe" SAFE_MERGE_CATEGORIES = {"training_data", "docs"} CATEGORY_KEYWORDS = { "training_data": ["500", "pairs", "scene description", "lyrics", "prompt", "training data", "corpus", "pairs"], "bug_fix": ["fix", "bug", "patch", "hotfix", "resolve", "repair"], "feature": ["feat", "add", "implement", "feature", "new"], "docs": ["doc", "readme", "changelog", "guide"], "ops": ["ops", "deploy", "ci", "cd", "pipeline", "ansible"], "security": ["security", "xss", "injection", "auth", "vulnerability"], } # ─── API helpers ────────────────────────────────────────────────────── def get_token() -> str: p = Path(os.path.expanduser("~/.config/gitea/token")) if p.exists(): return p.read_text().strip() t = os.environ.get("GITEA_TOKEN", "") if not t: print("ERROR: No token. ~/.config/gitea/token or GITEA_TOKEN", file=sys.stderr) sys.exit(1) return t def api(method: str, path: str, token: str, data: dict = None, params: dict = None) -> Any: url = f"{GITEA_URL}/api/v1{path}" if params: url += "?" + "&".join(f"{k}={v}" for k, v in params.items()) body = json.dumps(data).encode() if data else None req = Request(url, data=body, headers={ "Authorization": f"token {token}", "Content-Type": "application/json", }, method=method) try: return json.loads(urlopen(req, timeout=30).read()) except HTTPError as e: err_body = e.read().decode() if e.fp else "" return {"_error": e.code, "_body": err_body[:300]} # ─── Triage logic ───────────────────────────────────────────────────── def categorize(title: str) -> str: t = (title or "").lower() for cat, kws in CATEGORY_KEYWORDS.items(): if any(k in t for k in kws): return cat return "other" def refs(pr: dict) -> List[int]: text = ((pr.get("title") or "") + " " + (pr.get("body") or "")) return sorted(set(int(n) for n in ISSUE_RE.findall(text))) def find_dupes(prs: List[dict]) -> Dict[int, List[int]]: m: Dict[int, List[int]] = {} for pr in prs: for r in refs(pr): m.setdefault(r, []).append(pr["number"]) return {k: v for k, v in m.items() if len(v) > 1} def find_stale(prs: List[dict], closed: set) -> List[dict]: out = [] for pr in prs: stale = [r for r in refs(pr) if r in closed] if stale: out.append({"pr": pr["number"], "title": pr.get("title", ""), "stale_refs": stale}) return out def get_mergeability(repo: str, token: str, pr_num: int) -> str: """Check if a PR is mergeable.""" pr = api("GET", f"/repos/{repo}/pulls/{pr_num}", token) if isinstance(pr, dict) and "_error" in pr: return "unknown" return pr.get("mergeable", "unknown") def auto_merge_safe(repo: str, token: str, prs: List[dict], dry_run: bool = True) -> List[dict]: """Auto-merge safe PRs (training data, docs) if mergeable.""" merged = [] for pr in prs: cat = categorize(pr.get("title", "")) if cat not in SAFE_MERGE_CATEGORIES: continue pr_num = pr["number"] mergeable = get_mergeability(repo, token, pr_num) if mergeable is False: merged.append({"pr": pr_num, "action": "skipped", "reason": "not mergeable"}) continue if dry_run: merged.append({"pr": pr_num, "action": "would_merge", "category": cat}) continue # Attempt merge result = api("POST", f"/repos/{repo}/pulls/{pr_num}/merge", token, { "Do": "merge", "merge_when_pipeline_succeeds": False, }) if isinstance(result, dict) and "_error" in result: merged.append({"pr": pr_num, "action": "merge_failed", "error": result.get("_body", "")[:200]}) else: merged.append({"pr": pr_num, "action": "merged", "category": cat}) return merged # ─── Reporting ──────────────────────────────────────────────────────── def analyze(repo: str, token: str) -> dict: prs = api("GET", f"/repos/{repo}/pulls", token, params={"state": "open", "limit": "100"}) if not isinstance(prs, list): return {"error": f"API error: {prs}"} closed = api("GET", f"/repos/{repo}/issues", token, params={"state": "closed", "limit": "200"}) closed_nums = set() if isinstance(closed, list): closed_nums = {i["number"] for i in closed if not i.get("pull_request")} cats: Dict[str, List[dict]] = {} for pr in prs: c = categorize(pr.get("title", "")) cats.setdefault(c, []).append({ "number": pr["number"], "title": pr.get("title", ""), "refs": refs(pr), "head": pr.get("head", {}).get("ref", ""), "files": pr.get("changed_files", 0), "created": pr.get("created_at", "")[:10], }) dupes = find_dupes(prs) stale = find_stale(prs, closed_nums) # Stats total_files = sum(pr.get("changed_files", 0) for pr in prs) total_add = sum(pr.get("additions", 0) for pr in prs) total_del = sum(pr.get("deletions", 0) for pr in prs) return { "repo": repo, "timestamp": datetime.now(timezone.utc).isoformat(), "total_open": len(prs), "total_files_changed": total_files, "total_additions": total_add, "total_deletions": total_del, "categories": {k: len(v) for k, v in cats.items()}, "category_details": cats, "duplicates": dupes, "stale_prs": stale, "closed_issues_checked": len(closed_nums), "safe_merge_candidates": len([p for p in prs if categorize(p.get("title", "")) in SAFE_MERGE_CATEGORIES]), } def to_markdown(a: dict) -> str: """Generate markdown report suitable for filing as a Gitea issue.""" ts = a.get("timestamp", "")[:16].replace("T", " ") lines = [ f"## PR Triage Report — {a['repo']}", f"**Generated:** {ts}", "", "### Summary", "", f"| Metric | Value |", f"|--------|-------|", f"| Open PRs | {a['total_open']} |", f"| Files changed | {a['total_files_changed']} |", f"| Lines added | +{a['total_additions']} |", f"| Lines deleted | -{a['total_deletions']} |", f"| Safe merge candidates | {a.get('safe_merge_candidates', 0)} |", "", "### Categories", "", "| Category | Count |", "|----------|-------|", ] for cat, n in sorted(a["categories"].items()): lines.append(f"| {cat} | {n} |") if a["duplicates"]: lines += ["", "### Duplicate PRs", ""] for issue, prs in a["duplicates"].items(): lines.append(f"- Issue #{issue} referenced by PRs: {', '.join(f'#{p}' for p in prs)}") if a["stale_prs"]: lines += ["", "### Stale PRs (reference closed issues)", ""] for s in a["stale_prs"]: refs_str = ", ".join(f"#{r}" for r in s["stale_refs"]) lines.append(f"- #{s['pr']}: {s['title'][:60]} — closed refs: {refs_str}") for cat, items in a.get("category_details", {}).items(): if not items: continue lines += ["", f"### {cat.replace('_', ' ').title()} ({len(items)})", ""] for pr in items: r = f" (refs: {', '.join(f'#{x}' for x in pr['refs'])})" if pr["refs"] else "" lines.append(f"- #{pr['number']}: {pr['title'][:70]}{r}") lines += ["", "---", "*Generated by pr_triage.py*"] return "\n".join(lines) def to_json(a: dict) -> str: return json.dumps(a, indent=2, default=str) # ─── File as issue ──────────────────────────────────────────────────── def file_as_issue(repo: str, token: str, analysis: dict) -> Optional[int]: """File the triage report as a new Gitea issue.""" body = to_markdown(analysis) ts = analysis.get("timestamp", "")[:10] result = api("POST", f"/repos/{repo}/issues", token, { "title": f"[ops] PR Triage Report — {ts}", "body": body, }) if isinstance(result, dict) and "number" in result: return result["number"] return None # ─── CLI ────────────────────────────────────────────────────────────── def main(): p = argparse.ArgumentParser(description="PR triage automation") p.add_argument("repo", nargs="?", help="Org/Repo path") p.add_argument("--org", help="Triage all repos in org") p.add_argument("--auto-merge", action="store_true", help="Auto-merge safe PRs") p.add_argument("--dry-run", action="store_true", default=True, help="Don't merge/close") p.add_argument("--json", action="store_true", help="JSON output") p.add_argument("--file-as-issue", action="store_true", help="File report as issue") p.add_argument("--output", help="Write report to file") p.add_argument("--token", help="Override token") args = p.parse_args() token = args.token or get_token() repos = [] if args.org: org_repos = api("GET", f"/orgs/{args.org}/repos", token, params={"limit": "50"}) if isinstance(org_repos, list): repos = [r["full_name"] for r in org_repos] elif args.repo: repos = [args.repo] else: p.error("Provide REPO or --org") results = [] for repo in repos: a = analyze(repo, token) if "error" in a: print(f"SKIP: {a['error']}", file=sys.stderr) continue # Auto-merge if args.auto_merge and a["safe_merge_candidates"] > 0: prs = api("GET", f"/repos/{repo}/pulls", token, params={"state": "open", "limit": "100"}) if isinstance(prs, list): merge_results = auto_merge_safe(repo, token, prs, dry_run=not args.dry_run) a["merge_actions"] = merge_results # File as issue if args.file_as_issue: issue_num = file_as_issue(repo, token, a) if issue_num: a["filed_issue"] = issue_num print(f"Filed triage report as issue #{issue_num}") results.append(a) # Output if args.json: out = to_json(results[0] if len(results) == 1 else results) else: out = "\n\n---\n\n".join(to_markdown(a) for a in results) if args.output: Path(args.output).write_text(out, encoding="utf-8") print(f"Written to {args.output}") else: print(out) # Exit 1 if stale/duplicates found total_stale = sum(len(a.get("stale_prs", [])) for a in results) total_dupes = sum(len(a.get("duplicates", {})) for a in results) if total_stale + total_dupes > 0: sys.exit(1) if __name__ == "__main__": main()