Files
timmy-config/scripts/pr_triage.py

335 lines
12 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
2026-04-16 05:10:46 +00:00
"""
pr_triage.py Automated PR triage with optional auto-merge (Issue #659).
Fetches open PRs, categorizes, detects duplicates/stale refs, generates
report, and optionally auto-merges safe training-data PRs.
Usage:
python3 scripts/pr_triage.py Timmy_Foundation/timmy-config
python3 scripts/pr_triage.py Timmy_Foundation/timmy-config --auto-merge
python3 scripts/pr_triage.py Timmy_Foundation/hermes-agent --json
python3 scripts/pr_triage.py --org Timmy_Foundation --auto-merge
python3 scripts/pr_triage.py --file-as-issue Timmy_Foundation/timmy-config
"""
import argparse
import json
import os
import re
import sys
import time
from datetime import datetime, timezone
2026-04-16 05:10:46 +00:00
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from urllib.request import Request, urlopen
from urllib.error import HTTPError
GITEA_URL = "https://forge.alexanderwhitestone.com"
ISSUE_RE = re.compile(r"#(\d+)")
# Auto-merge: only these categories are "safe"
SAFE_MERGE_CATEGORIES = {"training_data", "docs"}
CATEGORY_KEYWORDS = {
"training_data": ["500", "pairs", "scene description", "lyrics", "prompt",
"training data", "corpus", "pairs"],
"bug_fix": ["fix", "bug", "patch", "hotfix", "resolve", "repair"],
"feature": ["feat", "add", "implement", "feature", "new"],
"docs": ["doc", "readme", "changelog", "guide"],
"ops": ["ops", "deploy", "ci", "cd", "pipeline", "ansible"],
"security": ["security", "xss", "injection", "auth", "vulnerability"],
2026-04-16 05:10:46 +00:00
}
# ─── API helpers ──────────────────────────────────────────────────────
def get_token() -> str:
p = Path(os.path.expanduser("~/.config/gitea/token"))
if p.exists():
return p.read_text().strip()
t = os.environ.get("GITEA_TOKEN", "")
if not t:
print("ERROR: No token. ~/.config/gitea/token or GITEA_TOKEN", file=sys.stderr)
sys.exit(1)
return t
def api(method: str, path: str, token: str, data: dict = None, params: dict = None) -> Any:
url = f"{GITEA_URL}/api/v1{path}"
if params:
url += "?" + "&".join(f"{k}={v}" for k, v in params.items())
body = json.dumps(data).encode() if data else None
req = Request(url, data=body, headers={
"Authorization": f"token {token}",
"Content-Type": "application/json",
}, method=method)
try:
return json.loads(urlopen(req, timeout=30).read())
except HTTPError as e:
err_body = e.read().decode() if e.fp else ""
return {"_error": e.code, "_body": err_body[:300]}
# ─── Triage logic ─────────────────────────────────────────────────────
def categorize(title: str) -> str:
t = (title or "").lower()
for cat, kws in CATEGORY_KEYWORDS.items():
if any(k in t for k in kws):
return cat
2026-04-16 05:10:46 +00:00
return "other"
def refs(pr: dict) -> List[int]:
text = ((pr.get("title") or "") + " " + (pr.get("body") or ""))
return sorted(set(int(n) for n in ISSUE_RE.findall(text)))
2026-04-16 05:10:46 +00:00
def find_dupes(prs: List[dict]) -> Dict[int, List[int]]:
m: Dict[int, List[int]] = {}
for pr in prs:
for r in refs(pr):
m.setdefault(r, []).append(pr["number"])
return {k: v for k, v in m.items() if len(v) > 1}
2026-04-16 05:10:46 +00:00
def find_stale(prs: List[dict], closed: set) -> List[dict]:
out = []
for pr in prs:
stale = [r for r in refs(pr) if r in closed]
if stale:
out.append({"pr": pr["number"], "title": pr.get("title", ""),
"stale_refs": stale})
return out
def get_mergeability(repo: str, token: str, pr_num: int) -> str:
"""Check if a PR is mergeable."""
pr = api("GET", f"/repos/{repo}/pulls/{pr_num}", token)
if isinstance(pr, dict) and "_error" in pr:
return "unknown"
return pr.get("mergeable", "unknown")
def auto_merge_safe(repo: str, token: str, prs: List[dict],
dry_run: bool = True) -> List[dict]:
"""Auto-merge safe PRs (training data, docs) if mergeable."""
merged = []
2026-04-16 05:10:46 +00:00
for pr in prs:
cat = categorize(pr.get("title", ""))
if cat not in SAFE_MERGE_CATEGORIES:
continue
pr_num = pr["number"]
mergeable = get_mergeability(repo, token, pr_num)
if mergeable is False:
merged.append({"pr": pr_num, "action": "skipped", "reason": "not mergeable"})
continue
if dry_run:
merged.append({"pr": pr_num, "action": "would_merge", "category": cat})
continue
# Attempt merge
result = api("POST", f"/repos/{repo}/pulls/{pr_num}/merge", token, {
"Do": "merge",
"merge_when_pipeline_succeeds": False,
})
if isinstance(result, dict) and "_error" in result:
merged.append({"pr": pr_num, "action": "merge_failed",
"error": result.get("_body", "")[:200]})
else:
merged.append({"pr": pr_num, "action": "merged", "category": cat})
return merged
2026-04-16 05:10:46 +00:00
# ─── Reporting ────────────────────────────────────────────────────────
def analyze(repo: str, token: str) -> dict:
prs = api("GET", f"/repos/{repo}/pulls", token, params={"state": "open", "limit": "100"})
if not isinstance(prs, list):
return {"error": f"API error: {prs}"}
closed = api("GET", f"/repos/{repo}/issues", token,
params={"state": "closed", "limit": "200"})
closed_nums = set()
if isinstance(closed, list):
closed_nums = {i["number"] for i in closed if not i.get("pull_request")}
2026-04-16 05:10:46 +00:00
cats: Dict[str, List[dict]] = {}
for pr in prs:
c = categorize(pr.get("title", ""))
cats.setdefault(c, []).append({
"number": pr["number"],
"title": pr.get("title", ""),
"refs": refs(pr),
"head": pr.get("head", {}).get("ref", ""),
"files": pr.get("changed_files", 0),
"created": pr.get("created_at", "")[:10],
})
dupes = find_dupes(prs)
stale = find_stale(prs, closed_nums)
# Stats
total_files = sum(pr.get("changed_files", 0) for pr in prs)
total_add = sum(pr.get("additions", 0) for pr in prs)
total_del = sum(pr.get("deletions", 0) for pr in prs)
return {
2026-04-16 05:10:46 +00:00
"repo": repo,
"timestamp": datetime.now(timezone.utc).isoformat(),
"total_open": len(prs),
"total_files_changed": total_files,
"total_additions": total_add,
"total_deletions": total_del,
"categories": {k: len(v) for k, v in cats.items()},
"category_details": cats,
"duplicates": dupes,
"stale_prs": stale,
"closed_issues_checked": len(closed_nums),
"safe_merge_candidates": len([p for p in prs
if categorize(p.get("title", "")) in SAFE_MERGE_CATEGORIES]),
}
def to_markdown(a: dict) -> str:
"""Generate markdown report suitable for filing as a Gitea issue."""
ts = a.get("timestamp", "")[:16].replace("T", " ")
lines = [
f"## PR Triage Report — {a['repo']}",
f"**Generated:** {ts}",
"",
"### Summary",
"",
f"| Metric | Value |",
2026-04-16 05:10:46 +00:00
f"|--------|-------|",
f"| Open PRs | {a['total_open']} |",
f"| Files changed | {a['total_files_changed']} |",
f"| Lines added | +{a['total_additions']} |",
f"| Lines deleted | -{a['total_deletions']} |",
f"| Safe merge candidates | {a.get('safe_merge_candidates', 0)} |",
2026-04-16 05:10:46 +00:00
"",
"### Categories",
2026-04-16 05:10:46 +00:00
"",
"| Category | Count |",
"|----------|-------|",
]
for cat, n in sorted(a["categories"].items()):
lines.append(f"| {cat} | {n} |")
if a["duplicates"]:
lines += ["", "### Duplicate PRs", ""]
for issue, prs in a["duplicates"].items():
lines.append(f"- Issue #{issue} referenced by PRs: {', '.join(f'#{p}' for p in prs)}")
if a["stale_prs"]:
lines += ["", "### Stale PRs (reference closed issues)", ""]
for s in a["stale_prs"]:
refs_str = ", ".join(f"#{r}" for r in s["stale_refs"])
lines.append(f"- #{s['pr']}: {s['title'][:60]} — closed refs: {refs_str}")
for cat, items in a.get("category_details", {}).items():
if not items:
continue
lines += ["", f"### {cat.replace('_', ' ').title()} ({len(items)})", ""]
for pr in items:
r = f" (refs: {', '.join(f'#{x}' for x in pr['refs'])})" if pr["refs"] else ""
lines.append(f"- #{pr['number']}: {pr['title'][:70]}{r}")
lines += ["", "---", "*Generated by pr_triage.py*"]
return "\n".join(lines)
def to_json(a: dict) -> str:
return json.dumps(a, indent=2, default=str)
2026-04-16 05:10:46 +00:00
# ─── File as issue ────────────────────────────────────────────────────
def file_as_issue(repo: str, token: str, analysis: dict) -> Optional[int]:
"""File the triage report as a new Gitea issue."""
body = to_markdown(analysis)
ts = analysis.get("timestamp", "")[:10]
result = api("POST", f"/repos/{repo}/issues", token, {
"title": f"[ops] PR Triage Report — {ts}",
"body": body,
})
if isinstance(result, dict) and "number" in result:
return result["number"]
return None
# ─── CLI ──────────────────────────────────────────────────────────────
def main():
p = argparse.ArgumentParser(description="PR triage automation")
p.add_argument("repo", nargs="?", help="Org/Repo path")
p.add_argument("--org", help="Triage all repos in org")
p.add_argument("--auto-merge", action="store_true", help="Auto-merge safe PRs")
p.add_argument("--dry-run", action="store_true", default=True, help="Don't merge/close")
p.add_argument("--json", action="store_true", help="JSON output")
p.add_argument("--file-as-issue", action="store_true", help="File report as issue")
p.add_argument("--output", help="Write report to file")
p.add_argument("--token", help="Override token")
args = p.parse_args()
token = args.token or get_token()
repos = []
if args.org:
org_repos = api("GET", f"/orgs/{args.org}/repos", token, params={"limit": "50"})
if isinstance(org_repos, list):
repos = [r["full_name"] for r in org_repos]
elif args.repo:
repos = [args.repo]
else:
p.error("Provide REPO or --org")
results = []
for repo in repos:
a = analyze(repo, token)
if "error" in a:
print(f"SKIP: {a['error']}", file=sys.stderr)
continue
# Auto-merge
if args.auto_merge and a["safe_merge_candidates"] > 0:
prs = api("GET", f"/repos/{repo}/pulls", token, params={"state": "open", "limit": "100"})
if isinstance(prs, list):
merge_results = auto_merge_safe(repo, token, prs,
dry_run=not args.dry_run)
a["merge_actions"] = merge_results
# File as issue
if args.file_as_issue:
issue_num = file_as_issue(repo, token, a)
if issue_num:
a["filed_issue"] = issue_num
print(f"Filed triage report as issue #{issue_num}")
results.append(a)
# Output
if args.json:
out = to_json(results[0] if len(results) == 1 else results)
else:
out = "\n\n---\n\n".join(to_markdown(a) for a in results)
if args.output:
Path(args.output).write_text(out, encoding="utf-8")
print(f"Written to {args.output}")
else:
print(out)
# Exit 1 if stale/duplicates found
total_stale = sum(len(a.get("stale_prs", [])) for a in results)
total_dupes = sum(len(a.get("duplicates", {})) for a in results)
if total_stale + total_dupes > 0:
sys.exit(1)
if __name__ == "__main__":
main()