Files
timmy-config/scripts/pr_triage.py

335 lines
12 KiB
Python

#!/usr/bin/env python3
"""
pr_triage.py — Automated PR triage with optional auto-merge (Issue #659).
Fetches open PRs, categorizes, detects duplicates/stale refs, generates
report, and optionally auto-merges safe training-data PRs.
Usage:
python3 scripts/pr_triage.py Timmy_Foundation/timmy-config
python3 scripts/pr_triage.py Timmy_Foundation/timmy-config --auto-merge
python3 scripts/pr_triage.py Timmy_Foundation/hermes-agent --json
python3 scripts/pr_triage.py --org Timmy_Foundation --auto-merge
python3 scripts/pr_triage.py --file-as-issue Timmy_Foundation/timmy-config
"""
import argparse
import json
import os
import re
import sys
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from urllib.request import Request, urlopen
from urllib.error import HTTPError
GITEA_URL = "https://forge.alexanderwhitestone.com"
ISSUE_RE = re.compile(r"#(\d+)")
# Auto-merge: only these categories are "safe"
SAFE_MERGE_CATEGORIES = {"training_data", "docs"}
CATEGORY_KEYWORDS = {
"training_data": ["500", "pairs", "scene description", "lyrics", "prompt",
"training data", "corpus", "pairs"],
"bug_fix": ["fix", "bug", "patch", "hotfix", "resolve", "repair"],
"feature": ["feat", "add", "implement", "feature", "new"],
"docs": ["doc", "readme", "changelog", "guide"],
"ops": ["ops", "deploy", "ci", "cd", "pipeline", "ansible"],
"security": ["security", "xss", "injection", "auth", "vulnerability"],
}
# ─── API helpers ──────────────────────────────────────────────────────
def get_token() -> str:
p = Path(os.path.expanduser("~/.config/gitea/token"))
if p.exists():
return p.read_text().strip()
t = os.environ.get("GITEA_TOKEN", "")
if not t:
print("ERROR: No token. ~/.config/gitea/token or GITEA_TOKEN", file=sys.stderr)
sys.exit(1)
return t
def api(method: str, path: str, token: str, data: dict = None, params: dict = None) -> Any:
url = f"{GITEA_URL}/api/v1{path}"
if params:
url += "?" + "&".join(f"{k}={v}" for k, v in params.items())
body = json.dumps(data).encode() if data else None
req = Request(url, data=body, headers={
"Authorization": f"token {token}",
"Content-Type": "application/json",
}, method=method)
try:
return json.loads(urlopen(req, timeout=30).read())
except HTTPError as e:
err_body = e.read().decode() if e.fp else ""
return {"_error": e.code, "_body": err_body[:300]}
# ─── Triage logic ─────────────────────────────────────────────────────
def categorize(title: str) -> str:
t = (title or "").lower()
for cat, kws in CATEGORY_KEYWORDS.items():
if any(k in t for k in kws):
return cat
return "other"
def refs(pr: dict) -> List[int]:
text = ((pr.get("title") or "") + " " + (pr.get("body") or ""))
return sorted(set(int(n) for n in ISSUE_RE.findall(text)))
def find_dupes(prs: List[dict]) -> Dict[int, List[int]]:
m: Dict[int, List[int]] = {}
for pr in prs:
for r in refs(pr):
m.setdefault(r, []).append(pr["number"])
return {k: v for k, v in m.items() if len(v) > 1}
def find_stale(prs: List[dict], closed: set) -> List[dict]:
out = []
for pr in prs:
stale = [r for r in refs(pr) if r in closed]
if stale:
out.append({"pr": pr["number"], "title": pr.get("title", ""),
"stale_refs": stale})
return out
def get_mergeability(repo: str, token: str, pr_num: int) -> str:
"""Check if a PR is mergeable."""
pr = api("GET", f"/repos/{repo}/pulls/{pr_num}", token)
if isinstance(pr, dict) and "_error" in pr:
return "unknown"
return pr.get("mergeable", "unknown")
def auto_merge_safe(repo: str, token: str, prs: List[dict],
dry_run: bool = True) -> List[dict]:
"""Auto-merge safe PRs (training data, docs) if mergeable."""
merged = []
for pr in prs:
cat = categorize(pr.get("title", ""))
if cat not in SAFE_MERGE_CATEGORIES:
continue
pr_num = pr["number"]
mergeable = get_mergeability(repo, token, pr_num)
if mergeable is False:
merged.append({"pr": pr_num, "action": "skipped", "reason": "not mergeable"})
continue
if dry_run:
merged.append({"pr": pr_num, "action": "would_merge", "category": cat})
continue
# Attempt merge
result = api("POST", f"/repos/{repo}/pulls/{pr_num}/merge", token, {
"Do": "merge",
"merge_when_pipeline_succeeds": False,
})
if isinstance(result, dict) and "_error" in result:
merged.append({"pr": pr_num, "action": "merge_failed",
"error": result.get("_body", "")[:200]})
else:
merged.append({"pr": pr_num, "action": "merged", "category": cat})
return merged
# ─── Reporting ────────────────────────────────────────────────────────
def analyze(repo: str, token: str) -> dict:
prs = api("GET", f"/repos/{repo}/pulls", token, params={"state": "open", "limit": "100"})
if not isinstance(prs, list):
return {"error": f"API error: {prs}"}
closed = api("GET", f"/repos/{repo}/issues", token,
params={"state": "closed", "limit": "200"})
closed_nums = set()
if isinstance(closed, list):
closed_nums = {i["number"] for i in closed if not i.get("pull_request")}
cats: Dict[str, List[dict]] = {}
for pr in prs:
c = categorize(pr.get("title", ""))
cats.setdefault(c, []).append({
"number": pr["number"],
"title": pr.get("title", ""),
"refs": refs(pr),
"head": pr.get("head", {}).get("ref", ""),
"files": pr.get("changed_files", 0),
"created": pr.get("created_at", "")[:10],
})
dupes = find_dupes(prs)
stale = find_stale(prs, closed_nums)
# Stats
total_files = sum(pr.get("changed_files", 0) for pr in prs)
total_add = sum(pr.get("additions", 0) for pr in prs)
total_del = sum(pr.get("deletions", 0) for pr in prs)
return {
"repo": repo,
"timestamp": datetime.now(timezone.utc).isoformat(),
"total_open": len(prs),
"total_files_changed": total_files,
"total_additions": total_add,
"total_deletions": total_del,
"categories": {k: len(v) for k, v in cats.items()},
"category_details": cats,
"duplicates": dupes,
"stale_prs": stale,
"closed_issues_checked": len(closed_nums),
"safe_merge_candidates": len([p for p in prs
if categorize(p.get("title", "")) in SAFE_MERGE_CATEGORIES]),
}
def to_markdown(a: dict) -> str:
"""Generate markdown report suitable for filing as a Gitea issue."""
ts = a.get("timestamp", "")[:16].replace("T", " ")
lines = [
f"## PR Triage Report — {a['repo']}",
f"**Generated:** {ts}",
"",
"### Summary",
"",
f"| Metric | Value |",
f"|--------|-------|",
f"| Open PRs | {a['total_open']} |",
f"| Files changed | {a['total_files_changed']} |",
f"| Lines added | +{a['total_additions']} |",
f"| Lines deleted | -{a['total_deletions']} |",
f"| Safe merge candidates | {a.get('safe_merge_candidates', 0)} |",
"",
"### Categories",
"",
"| Category | Count |",
"|----------|-------|",
]
for cat, n in sorted(a["categories"].items()):
lines.append(f"| {cat} | {n} |")
if a["duplicates"]:
lines += ["", "### Duplicate PRs", ""]
for issue, prs in a["duplicates"].items():
lines.append(f"- Issue #{issue} referenced by PRs: {', '.join(f'#{p}' for p in prs)}")
if a["stale_prs"]:
lines += ["", "### Stale PRs (reference closed issues)", ""]
for s in a["stale_prs"]:
refs_str = ", ".join(f"#{r}" for r in s["stale_refs"])
lines.append(f"- #{s['pr']}: {s['title'][:60]} — closed refs: {refs_str}")
for cat, items in a.get("category_details", {}).items():
if not items:
continue
lines += ["", f"### {cat.replace('_', ' ').title()} ({len(items)})", ""]
for pr in items:
r = f" (refs: {', '.join(f'#{x}' for x in pr['refs'])})" if pr["refs"] else ""
lines.append(f"- #{pr['number']}: {pr['title'][:70]}{r}")
lines += ["", "---", "*Generated by pr_triage.py*"]
return "\n".join(lines)
def to_json(a: dict) -> str:
return json.dumps(a, indent=2, default=str)
# ─── File as issue ────────────────────────────────────────────────────
def file_as_issue(repo: str, token: str, analysis: dict) -> Optional[int]:
"""File the triage report as a new Gitea issue."""
body = to_markdown(analysis)
ts = analysis.get("timestamp", "")[:10]
result = api("POST", f"/repos/{repo}/issues", token, {
"title": f"[ops] PR Triage Report — {ts}",
"body": body,
})
if isinstance(result, dict) and "number" in result:
return result["number"]
return None
# ─── CLI ──────────────────────────────────────────────────────────────
def main():
p = argparse.ArgumentParser(description="PR triage automation")
p.add_argument("repo", nargs="?", help="Org/Repo path")
p.add_argument("--org", help="Triage all repos in org")
p.add_argument("--auto-merge", action="store_true", help="Auto-merge safe PRs")
p.add_argument("--dry-run", action="store_true", default=True, help="Don't merge/close")
p.add_argument("--json", action="store_true", help="JSON output")
p.add_argument("--file-as-issue", action="store_true", help="File report as issue")
p.add_argument("--output", help="Write report to file")
p.add_argument("--token", help="Override token")
args = p.parse_args()
token = args.token or get_token()
repos = []
if args.org:
org_repos = api("GET", f"/orgs/{args.org}/repos", token, params={"limit": "50"})
if isinstance(org_repos, list):
repos = [r["full_name"] for r in org_repos]
elif args.repo:
repos = [args.repo]
else:
p.error("Provide REPO or --org")
results = []
for repo in repos:
a = analyze(repo, token)
if "error" in a:
print(f"SKIP: {a['error']}", file=sys.stderr)
continue
# Auto-merge
if args.auto_merge and a["safe_merge_candidates"] > 0:
prs = api("GET", f"/repos/{repo}/pulls", token, params={"state": "open", "limit": "100"})
if isinstance(prs, list):
merge_results = auto_merge_safe(repo, token, prs,
dry_run=not args.dry_run)
a["merge_actions"] = merge_results
# File as issue
if args.file_as_issue:
issue_num = file_as_issue(repo, token, a)
if issue_num:
a["filed_issue"] = issue_num
print(f"Filed triage report as issue #{issue_num}")
results.append(a)
# Output
if args.json:
out = to_json(results[0] if len(results) == 1 else results)
else:
out = "\n\n---\n\n".join(to_markdown(a) for a in results)
if args.output:
Path(args.output).write_text(out, encoding="utf-8")
print(f"Written to {args.output}")
else:
print(out)
# Exit 1 if stale/duplicates found
total_stale = sum(len(a.get("stale_prs", [])) for a in results)
total_dupes = sum(len(a.get("duplicates", {})) for a in results)
if total_stale + total_dupes > 0:
sys.exit(1)
if __name__ == "__main__":
main()