#!/usr/bin/env python3 from __future__ import annotations import argparse import json import os import re from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Any from urllib.error import HTTPError from urllib.request import Request, urlopen API_BASE = "https://forge.alexanderwhitestone.com/api/v1" ORG = "Timmy_Foundation" SOURCE_REPO = "the-nexus" TARGET_REPO = "timmy-config" DEFAULT_TOKEN_PATH = os.path.expanduser("~/.config/gitea/token") DEFAULT_OUTPUT = "reports/2026-04-16-timmy-config-pr-backlog-audit.md" def api_get(path: str, token: str) -> Any: req = Request(API_BASE + path, headers={"Authorization": f"token {token}"}) with urlopen(req, timeout=30) as resp: return json.loads(resp.read().decode()) def extract_issue_refs(title: str = "", body: str = "", head: str = "") -> list[int]: text = " ".join(filter(None, [title, body, head])) refs: list[int] = [] seen: set[int] = set() for match in re.finditer(r"#(\d+)", text): value = int(match.group(1)) if value not in seen: seen.add(value) refs.append(value) if not refs and head: for match in re.finditer(r"(?:^|[/-])(\d+)(?:$|[/-])", head): value = int(match.group(1)) if value not in seen: seen.add(value) refs.append(value) return refs def summarize_backlog(backlog: list[dict[str, Any]], now_iso: str | None = None, stale_days: int = 7) -> dict[str, Any]: now = _parse_iso(now_iso) if now_iso else datetime.now(timezone.utc) duplicate_groups: dict[tuple[int, ...], list[dict[str, Any]]] = {} missing_reviewer = [] stale = [] mergeable = [] for pr in backlog: refs_list = pr.get("issue_refs") or extract_issue_refs( pr.get("title") or "", pr.get("body") or "", pr.get("head") or "", ) if not pr.get("issue_refs"): pr["issue_refs"] = refs_list refs = tuple(refs_list) if refs: duplicate_groups.setdefault(refs, []).append(pr) if pr.get("review_count", 0) + pr.get("requested_reviewers", 0) == 0: missing_reviewer.append(pr) updated_at = _parse_iso(pr["updated_at"]) if now - updated_at > timedelta(days=stale_days): stale.append(pr) if pr.get("mergeable"): mergeable.append(pr) dupes = [ {"issue_refs": list(refs), "prs": prs} for refs, prs in duplicate_groups.items() if len(prs) > 1 ] dupes.sort(key=lambda item: (item["issue_refs"][0] if item["issue_refs"] else 10**9)) return { "total_open_prs": len(backlog), "mergeable_count": len(mergeable), "missing_reviewer_count": len(missing_reviewer), "stale_count": len(stale), "duplicate_issue_groups": dupes, "mergeable_prs": mergeable, "missing_reviewer_prs": missing_reviewer, "stale_prs": stale, } def render_report(*, source_issue: int, source_title: str, summary: dict[str, Any], backlog: list[dict[str, Any]], generated_at: str) -> str: lines = [ f"# Timmy-config PR Backlog Audit — the-nexus #{source_issue}", "", f"Generated: {generated_at}", f"Source issue: `{source_title}`", "", "## Source Snapshot", "", "Issue #1471 claims timmy-config had 9 open PRs and the highest PR backlog in the org during the original triage snapshot.", "This audit re-queries the live PR backlog and classifies it against current forge state instead of trusting that stale count.", "", "## Live Summary", "", f"- Open PRs on `{ORG}/{TARGET_REPO}`: {summary['total_open_prs']}", f"- Mergeable right now: {summary['mergeable_count']}", f"- PRs with no reviewers or requested reviewers: {summary['missing_reviewer_count']}", f"- Stale PRs older than 7 days: {summary['stale_count']}", f"- Duplicate issue groups detected: {len(summary['duplicate_issue_groups'])}", "", "## Issue Body Drift", "", "The body of #1471 is materially stale: it references a 9-PR backlog, while the live audit found the current open-PR count above that historical snapshot.", "This means the issue should be treated as a process/report problem, not as a direct live-merge instruction.", "", "## Duplicate Issue Groups", "", ] if summary["duplicate_issue_groups"]: lines.extend(["| Issue refs | PRs |", "|---|---|"]) for group in summary["duplicate_issue_groups"]: refs = ", ".join(f"#{n}" for n in group["issue_refs"]) or "(none)" prs = "; ".join(f"#{pr['number']} ({pr['head']})" for pr in group["prs"]) lines.append(f"| {refs} | {prs} |") else: lines.append("No duplicate issue groups detected in the live backlog.") lines.extend([ "", "## Reviewer Coverage", "", ]) if summary["missing_reviewer_prs"]: lines.extend(["| PR | Title | Updated |", "|---|---|---|"]) for pr in summary["missing_reviewer_prs"][:20]: lines.append(f"| #{pr['number']} | {pr['title']} | {pr['updated_at'][:10]} |") if len(summary["missing_reviewer_prs"]) > 20: lines.append(f"| ... | ... | +{len(summary['missing_reviewer_prs']) - 20} more |") else: lines.append("All open PRs currently show reviewer coverage signals.") lines.extend([ "", "## Mergeable Snapshot", "", ]) if summary["mergeable_prs"]: lines.extend(["| PR | Title | Head branch |", "|---|---|---|"]) for pr in summary["mergeable_prs"][:20]: lines.append(f"| #{pr['number']} | {pr['title']} | `{pr['head']}` |") if len(summary["mergeable_prs"]) > 20: lines.append(f"| ... | ... | +{len(summary['mergeable_prs']) - 20} more mergeable PRs |") else: lines.append("No mergeable PRs reported in the live backlog snapshot.") lines.extend([ "", "## Stale PRs", "", ]) if summary["stale_prs"]: lines.extend(["| PR | Title | Updated |", "|---|---|---|"]) for pr in summary["stale_prs"]: lines.append(f"| #{pr['number']} | {pr['title']} | {pr['updated_at'][:10]} |") else: lines.append("No stale PRs older than 7 days were detected in the live snapshot.") lines.extend([ "", "## Recommended Next Actions", "", "1. Use the duplicate-issue groups to collapse obviously redundant PRs before attempting any merge sweep.", "2. Assign reviewers (or request them) on the PRs with zero reviewer coverage so the backlog becomes reviewable instead of merely mergeable.", "3. Prioritize mergeable PRs with unique issue refs and recent updates for the next burndown pass.", "4. Treat this report as the live reference for #1471; the original issue body is now a stale ops snapshot.", "", "## Raw Backlog Snapshot", "", "| PR | Mergeable | Review signals | Issue refs |", "|---|---|---|---|", ]) for pr in backlog[:40]: refs = ", ".join(f"#{n}" for n in pr.get("issue_refs", [])) or "(none)" review_signals = pr.get("review_count", 0) + pr.get("requested_reviewers", 0) lines.append(f"| #{pr['number']} | {pr['mergeable']} | {review_signals} | {refs} |") if len(backlog) > 40: lines.append(f"| ... | ... | ... | +{len(backlog) - 40} more PRs |") return "\n".join(lines) + "\n" def collect_backlog(repo: str, token: str) -> list[dict[str, Any]]: prs: list[dict[str, Any]] = [] for page in range(1, 6): batch = api_get(f"/repos/{ORG}/{repo}/pulls?state=open&limit=100&page={page}", token) if not batch: break for pr in batch: number = pr["number"] reviews = _safe_api_get(f"/repos/{ORG}/{repo}/pulls/{number}/reviews", token) or [] requested = _safe_api_get(f"/repos/{ORG}/{repo}/pulls/{number}/requested_reviewers", token) or {} prs.append({ "number": number, "title": pr.get("title") or "", "body": pr.get("body") or "", "head": (pr.get("head") or {}).get("ref") or "", "mergeable": bool(pr.get("mergeable")), "updated_at": pr.get("updated_at") or pr.get("created_at") or "1970-01-01T00:00:00Z", "review_count": len([r for r in reviews if r.get("state")]), "requested_reviewers": len(requested.get("users", []) or []), "issue_refs": extract_issue_refs(pr.get("title") or "", pr.get("body") or "", (pr.get("head") or {}).get("ref") or ""), }) if len(batch) < 100: break return prs def _safe_api_get(path: str, token: str): try: return api_get(path, token) except HTTPError: return None def _parse_iso(value: str) -> datetime: return datetime.fromisoformat(value.replace("Z", "+00:00")) def main() -> int: parser = argparse.ArgumentParser(description="Audit the live timmy-config PR backlog for the-nexus issue #1471.") parser.add_argument("--issue", type=int, default=1471) parser.add_argument("--source-repo", default=SOURCE_REPO) parser.add_argument("--target-repo", default=TARGET_REPO) parser.add_argument("--output", default=DEFAULT_OUTPUT) parser.add_argument("--token-file", default=DEFAULT_TOKEN_PATH) args = parser.parse_args() token = Path(args.token_file).read_text(encoding="utf-8").strip() issue = api_get(f"/repos/{ORG}/{args.source_repo}/issues/{args.issue}", token) backlog = collect_backlog(args.target_repo, token) summary = summarize_backlog(backlog) generated_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") report = render_report( source_issue=args.issue, source_title=issue.get("title") or "", summary=summary, backlog=backlog, generated_at=generated_at, ) out = Path(args.output) out.parent.mkdir(parents=True, exist_ok=True) out.write_text(report, encoding="utf-8") print(out) return 0 if __name__ == "__main__": raise SystemExit(main())