335 lines
12 KiB
Python
335 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
pr_triage.py — Automated PR triage with optional auto-merge (Issue #659).
|
|
|
|
Fetches open PRs, categorizes, detects duplicates/stale refs, generates
|
|
report, and optionally auto-merges safe training-data PRs.
|
|
|
|
Usage:
|
|
python3 scripts/pr_triage.py Timmy_Foundation/timmy-config
|
|
python3 scripts/pr_triage.py Timmy_Foundation/timmy-config --auto-merge
|
|
python3 scripts/pr_triage.py Timmy_Foundation/hermes-agent --json
|
|
python3 scripts/pr_triage.py --org Timmy_Foundation --auto-merge
|
|
python3 scripts/pr_triage.py --file-as-issue Timmy_Foundation/timmy-config
|
|
"""
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
from urllib.request import Request, urlopen
|
|
from urllib.error import HTTPError
|
|
|
|
GITEA_URL = "https://forge.alexanderwhitestone.com"
|
|
ISSUE_RE = re.compile(r"#(\d+)")
|
|
|
|
# Auto-merge: only these categories are "safe"
|
|
SAFE_MERGE_CATEGORIES = {"training_data", "docs"}
|
|
|
|
CATEGORY_KEYWORDS = {
|
|
"training_data": ["500", "pairs", "scene description", "lyrics", "prompt",
|
|
"training data", "corpus", "pairs"],
|
|
"bug_fix": ["fix", "bug", "patch", "hotfix", "resolve", "repair"],
|
|
"feature": ["feat", "add", "implement", "feature", "new"],
|
|
"docs": ["doc", "readme", "changelog", "guide"],
|
|
"ops": ["ops", "deploy", "ci", "cd", "pipeline", "ansible"],
|
|
"security": ["security", "xss", "injection", "auth", "vulnerability"],
|
|
}
|
|
|
|
|
|
# ─── API helpers ──────────────────────────────────────────────────────
|
|
|
|
def get_token() -> str:
|
|
p = Path(os.path.expanduser("~/.config/gitea/token"))
|
|
if p.exists():
|
|
return p.read_text().strip()
|
|
t = os.environ.get("GITEA_TOKEN", "")
|
|
if not t:
|
|
print("ERROR: No token. ~/.config/gitea/token or GITEA_TOKEN", file=sys.stderr)
|
|
sys.exit(1)
|
|
return t
|
|
|
|
|
|
def api(method: str, path: str, token: str, data: dict = None, params: dict = None) -> Any:
|
|
url = f"{GITEA_URL}/api/v1{path}"
|
|
if params:
|
|
url += "?" + "&".join(f"{k}={v}" for k, v in params.items())
|
|
body = json.dumps(data).encode() if data else None
|
|
req = Request(url, data=body, headers={
|
|
"Authorization": f"token {token}",
|
|
"Content-Type": "application/json",
|
|
}, method=method)
|
|
try:
|
|
return json.loads(urlopen(req, timeout=30).read())
|
|
except HTTPError as e:
|
|
err_body = e.read().decode() if e.fp else ""
|
|
return {"_error": e.code, "_body": err_body[:300]}
|
|
|
|
|
|
# ─── Triage logic ─────────────────────────────────────────────────────
|
|
|
|
def categorize(title: str) -> str:
|
|
t = (title or "").lower()
|
|
for cat, kws in CATEGORY_KEYWORDS.items():
|
|
if any(k in t for k in kws):
|
|
return cat
|
|
return "other"
|
|
|
|
|
|
def refs(pr: dict) -> List[int]:
|
|
text = ((pr.get("title") or "") + " " + (pr.get("body") or ""))
|
|
return sorted(set(int(n) for n in ISSUE_RE.findall(text)))
|
|
|
|
|
|
def find_dupes(prs: List[dict]) -> Dict[int, List[int]]:
|
|
m: Dict[int, List[int]] = {}
|
|
for pr in prs:
|
|
for r in refs(pr):
|
|
m.setdefault(r, []).append(pr["number"])
|
|
return {k: v for k, v in m.items() if len(v) > 1}
|
|
|
|
|
|
def find_stale(prs: List[dict], closed: set) -> List[dict]:
|
|
out = []
|
|
for pr in prs:
|
|
stale = [r for r in refs(pr) if r in closed]
|
|
if stale:
|
|
out.append({"pr": pr["number"], "title": pr.get("title", ""),
|
|
"stale_refs": stale})
|
|
return out
|
|
|
|
|
|
def get_mergeability(repo: str, token: str, pr_num: int) -> str:
|
|
"""Check if a PR is mergeable."""
|
|
pr = api("GET", f"/repos/{repo}/pulls/{pr_num}", token)
|
|
if isinstance(pr, dict) and "_error" in pr:
|
|
return "unknown"
|
|
return pr.get("mergeable", "unknown")
|
|
|
|
|
|
def auto_merge_safe(repo: str, token: str, prs: List[dict],
|
|
dry_run: bool = True) -> List[dict]:
|
|
"""Auto-merge safe PRs (training data, docs) if mergeable."""
|
|
merged = []
|
|
for pr in prs:
|
|
cat = categorize(pr.get("title", ""))
|
|
if cat not in SAFE_MERGE_CATEGORIES:
|
|
continue
|
|
|
|
pr_num = pr["number"]
|
|
mergeable = get_mergeability(repo, token, pr_num)
|
|
|
|
if mergeable is False:
|
|
merged.append({"pr": pr_num, "action": "skipped", "reason": "not mergeable"})
|
|
continue
|
|
|
|
if dry_run:
|
|
merged.append({"pr": pr_num, "action": "would_merge", "category": cat})
|
|
continue
|
|
|
|
# Attempt merge
|
|
result = api("POST", f"/repos/{repo}/pulls/{pr_num}/merge", token, {
|
|
"Do": "merge",
|
|
"merge_when_pipeline_succeeds": False,
|
|
})
|
|
if isinstance(result, dict) and "_error" in result:
|
|
merged.append({"pr": pr_num, "action": "merge_failed",
|
|
"error": result.get("_body", "")[:200]})
|
|
else:
|
|
merged.append({"pr": pr_num, "action": "merged", "category": cat})
|
|
|
|
return merged
|
|
|
|
|
|
# ─── Reporting ────────────────────────────────────────────────────────
|
|
|
|
def analyze(repo: str, token: str) -> dict:
|
|
prs = api("GET", f"/repos/{repo}/pulls", token, params={"state": "open", "limit": "100"})
|
|
if not isinstance(prs, list):
|
|
return {"error": f"API error: {prs}"}
|
|
|
|
closed = api("GET", f"/repos/{repo}/issues", token,
|
|
params={"state": "closed", "limit": "200"})
|
|
closed_nums = set()
|
|
if isinstance(closed, list):
|
|
closed_nums = {i["number"] for i in closed if not i.get("pull_request")}
|
|
|
|
cats: Dict[str, List[dict]] = {}
|
|
for pr in prs:
|
|
c = categorize(pr.get("title", ""))
|
|
cats.setdefault(c, []).append({
|
|
"number": pr["number"],
|
|
"title": pr.get("title", ""),
|
|
"refs": refs(pr),
|
|
"head": pr.get("head", {}).get("ref", ""),
|
|
"files": pr.get("changed_files", 0),
|
|
"created": pr.get("created_at", "")[:10],
|
|
})
|
|
|
|
dupes = find_dupes(prs)
|
|
stale = find_stale(prs, closed_nums)
|
|
|
|
# Stats
|
|
total_files = sum(pr.get("changed_files", 0) for pr in prs)
|
|
total_add = sum(pr.get("additions", 0) for pr in prs)
|
|
total_del = sum(pr.get("deletions", 0) for pr in prs)
|
|
|
|
return {
|
|
"repo": repo,
|
|
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
"total_open": len(prs),
|
|
"total_files_changed": total_files,
|
|
"total_additions": total_add,
|
|
"total_deletions": total_del,
|
|
"categories": {k: len(v) for k, v in cats.items()},
|
|
"category_details": cats,
|
|
"duplicates": dupes,
|
|
"stale_prs": stale,
|
|
"closed_issues_checked": len(closed_nums),
|
|
"safe_merge_candidates": len([p for p in prs
|
|
if categorize(p.get("title", "")) in SAFE_MERGE_CATEGORIES]),
|
|
}
|
|
|
|
|
|
def to_markdown(a: dict) -> str:
|
|
"""Generate markdown report suitable for filing as a Gitea issue."""
|
|
ts = a.get("timestamp", "")[:16].replace("T", " ")
|
|
lines = [
|
|
f"## PR Triage Report — {a['repo']}",
|
|
f"**Generated:** {ts}",
|
|
"",
|
|
"### Summary",
|
|
"",
|
|
f"| Metric | Value |",
|
|
f"|--------|-------|",
|
|
f"| Open PRs | {a['total_open']} |",
|
|
f"| Files changed | {a['total_files_changed']} |",
|
|
f"| Lines added | +{a['total_additions']} |",
|
|
f"| Lines deleted | -{a['total_deletions']} |",
|
|
f"| Safe merge candidates | {a.get('safe_merge_candidates', 0)} |",
|
|
"",
|
|
"### Categories",
|
|
"",
|
|
"| Category | Count |",
|
|
"|----------|-------|",
|
|
]
|
|
for cat, n in sorted(a["categories"].items()):
|
|
lines.append(f"| {cat} | {n} |")
|
|
|
|
if a["duplicates"]:
|
|
lines += ["", "### Duplicate PRs", ""]
|
|
for issue, prs in a["duplicates"].items():
|
|
lines.append(f"- Issue #{issue} referenced by PRs: {', '.join(f'#{p}' for p in prs)}")
|
|
|
|
if a["stale_prs"]:
|
|
lines += ["", "### Stale PRs (reference closed issues)", ""]
|
|
for s in a["stale_prs"]:
|
|
refs_str = ", ".join(f"#{r}" for r in s["stale_refs"])
|
|
lines.append(f"- #{s['pr']}: {s['title'][:60]} — closed refs: {refs_str}")
|
|
|
|
for cat, items in a.get("category_details", {}).items():
|
|
if not items:
|
|
continue
|
|
lines += ["", f"### {cat.replace('_', ' ').title()} ({len(items)})", ""]
|
|
for pr in items:
|
|
r = f" (refs: {', '.join(f'#{x}' for x in pr['refs'])})" if pr["refs"] else ""
|
|
lines.append(f"- #{pr['number']}: {pr['title'][:70]}{r}")
|
|
|
|
lines += ["", "---", "*Generated by pr_triage.py*"]
|
|
return "\n".join(lines)
|
|
|
|
|
|
def to_json(a: dict) -> str:
|
|
return json.dumps(a, indent=2, default=str)
|
|
|
|
|
|
# ─── File as issue ────────────────────────────────────────────────────
|
|
|
|
def file_as_issue(repo: str, token: str, analysis: dict) -> Optional[int]:
|
|
"""File the triage report as a new Gitea issue."""
|
|
body = to_markdown(analysis)
|
|
ts = analysis.get("timestamp", "")[:10]
|
|
result = api("POST", f"/repos/{repo}/issues", token, {
|
|
"title": f"[ops] PR Triage Report — {ts}",
|
|
"body": body,
|
|
})
|
|
if isinstance(result, dict) and "number" in result:
|
|
return result["number"]
|
|
return None
|
|
|
|
|
|
# ─── CLI ──────────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
p = argparse.ArgumentParser(description="PR triage automation")
|
|
p.add_argument("repo", nargs="?", help="Org/Repo path")
|
|
p.add_argument("--org", help="Triage all repos in org")
|
|
p.add_argument("--auto-merge", action="store_true", help="Auto-merge safe PRs")
|
|
p.add_argument("--dry-run", action="store_true", default=True, help="Don't merge/close")
|
|
p.add_argument("--json", action="store_true", help="JSON output")
|
|
p.add_argument("--file-as-issue", action="store_true", help="File report as issue")
|
|
p.add_argument("--output", help="Write report to file")
|
|
p.add_argument("--token", help="Override token")
|
|
args = p.parse_args()
|
|
|
|
token = args.token or get_token()
|
|
repos = []
|
|
if args.org:
|
|
org_repos = api("GET", f"/orgs/{args.org}/repos", token, params={"limit": "50"})
|
|
if isinstance(org_repos, list):
|
|
repos = [r["full_name"] for r in org_repos]
|
|
elif args.repo:
|
|
repos = [args.repo]
|
|
else:
|
|
p.error("Provide REPO or --org")
|
|
|
|
results = []
|
|
for repo in repos:
|
|
a = analyze(repo, token)
|
|
if "error" in a:
|
|
print(f"SKIP: {a['error']}", file=sys.stderr)
|
|
continue
|
|
|
|
# Auto-merge
|
|
if args.auto_merge and a["safe_merge_candidates"] > 0:
|
|
prs = api("GET", f"/repos/{repo}/pulls", token, params={"state": "open", "limit": "100"})
|
|
if isinstance(prs, list):
|
|
merge_results = auto_merge_safe(repo, token, prs,
|
|
dry_run=not args.dry_run)
|
|
a["merge_actions"] = merge_results
|
|
|
|
# File as issue
|
|
if args.file_as_issue:
|
|
issue_num = file_as_issue(repo, token, a)
|
|
if issue_num:
|
|
a["filed_issue"] = issue_num
|
|
print(f"Filed triage report as issue #{issue_num}")
|
|
|
|
results.append(a)
|
|
|
|
# Output
|
|
if args.json:
|
|
out = to_json(results[0] if len(results) == 1 else results)
|
|
else:
|
|
out = "\n\n---\n\n".join(to_markdown(a) for a in results)
|
|
|
|
if args.output:
|
|
Path(args.output).write_text(out, encoding="utf-8")
|
|
print(f"Written to {args.output}")
|
|
else:
|
|
print(out)
|
|
|
|
# Exit 1 if stale/duplicates found
|
|
total_stale = sum(len(a.get("stale_prs", [])) for a in results)
|
|
total_dupes = sum(len(a.get("duplicates", {})) for a in results)
|
|
if total_stale + total_dupes > 0:
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|