#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import os
import re
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterable
from urllib.request import Request, urlopen

API_BASE = "https://forge.alexanderwhitestone.com/api/v1"
ORG = "Timmy_Foundation"
DEFAULT_TOKEN_PATH = os.path.expanduser("~/.config/gitea/token")


@dataclass(frozen=True)
class PullSummary:
    number: int
    title: str
    state: str
    merged: bool
    head: str
    body: str
    url: str


@dataclass(frozen=True)
class IssueAuditRow:
    number: int
    title: str
    state: str
    classification: str
    pr_summary: str
    issue_url: str

    def to_dict(self) -> dict[str, object]:
        return {
            "number": self.number,
            "title": self.title,
            "state": self.state,
            "classification": self.classification,
            "pr_summary": self.pr_summary,
            "issue_url": self.issue_url,
        }


def extract_issue_numbers(body: str) -> list[int]:
    numbers: list[int] = []
    seen: set[int] = set()
    for match in re.finditer(r"#(\d+)(?:-(\d+))?", body or ""):
        start = int(match.group(1))
        end = match.group(2)
        if end is None:
            if start not in seen:
                seen.add(start)
                numbers.append(start)
            continue
        stop = int(end)
        step = 1 if stop >= start else -1
        for value in range(start, stop + step, step):
            if value not in seen:
                seen.add(value)
                numbers.append(value)
    return numbers


def api_get(path: str, token: str):
    req = Request(API_BASE + path, headers={"Authorization": f"token {token}"})
    with urlopen(req, timeout=30) as resp:
        return json.loads(resp.read().decode())


def collect_pull_summaries(repo: str, token: str) -> list[PullSummary]:
    pulls: list[PullSummary] = []
    for state in ("open", "closed"):
        page = 1
        while True:
            batch = api_get(f"/repos/{ORG}/{repo}/pulls?state={state}&limit=100&page={page}", token)
            if not batch:
                break
            for pr in batch:
                pulls.append(
                    PullSummary(
                        number=pr["number"],
                        title=pr.get("title") or "",
                        state=pr.get("state") or state,
                        merged=bool(pr.get("merged")),
                        head=(pr.get("head") or {}).get("ref") or "",
                        body=pr.get("body") or "",
                        url=pr.get("html_url") or pr.get("url") or "",
                    )
                )
            page += 1
    return pulls


def match_prs(issue_num: int, pulls: Iterable[PullSummary]) -> list[PullSummary]:
    matches: list[PullSummary] = []
    for pr in pulls:
        text = f"{pr.title} {pr.head} {pr.body}"
        if f"#{issue_num}" in text or pr.head == f"fix/{issue_num}" or f"/{issue_num}" in pr.head or f"-{issue_num}" in pr.head:
            matches.append(pr)
    return matches


def classify_issue(issue: dict, related_prs: list[PullSummary]) -> IssueAuditRow:
    number = issue["number"]
    title = issue.get("title") or ""
    state = issue.get("state") or "unknown"
    issue_url = issue.get("html_url") or issue.get("url") or ""

    if state == "closed":
        classification = "already_closed"
        pr_summary = summarize_prs(related_prs) or "issue already closed"
    else:
        merged = [pr for pr in related_prs if pr.merged]
        open_prs = [pr for pr in related_prs if pr.state == "open"]
        closed_unmerged = [pr for pr in related_prs if pr.state != "open" and not pr.merged]
        if merged:
            classification = "closure_candidate"
            pr_summary = summarize_prs(merged)
        elif open_prs:
            classification = "active_pr"
            pr_summary = summarize_prs(open_prs)
        elif closed_unmerged:
            classification = "needs_manual_review"
            pr_summary = summarize_prs(closed_unmerged)
        else:
            classification = "needs_manual_review"
            pr_summary = "no matching PR found"

    return IssueAuditRow(
        number=number,
        title=title,
        state=state,
        classification=classification,
        pr_summary=pr_summary,
        issue_url=issue_url,
    )


def summarize_prs(prs: Iterable[PullSummary]) -> str:
    parts = []
    for pr in prs:
        if pr.merged:
            parts.append(f"merged PR #{pr.number}")
        else:
            parts.append(f"{pr.state} PR #{pr.number}")
    return ", ".join(parts)


def render_report(source_issue: int, source_title: str, referenced_rows: list[dict], generated_at: str) -> str:
    closure = [row for row in referenced_rows if row["classification"] == "closure_candidate"]
    active = [row for row in referenced_rows if row["classification"] == "active_pr"]
    manual = [row for row in referenced_rows if row["classification"] == "needs_manual_review"]
    closed = [row for row in referenced_rows if row["classification"] == "already_closed"]

    def table(rows: list[dict]) -> str:
        if not rows:
            return "| None |\n|---|\n| None |"
        lines = ["| Issue | State | Classification | PR Summary |", "|---|---|---|---|"]
        for row in rows:
            lines.append(
                f"| #{row['number']} | {row['state']} | {row['classification'].replace('_', ' ')} | {row['pr_summary']} |"
            )
        return "\n".join(lines)

    return "\n".join(
        [
            f"# Burn Lane Empty Audit — timmy-home #{source_issue}",
            "",
            f"Generated: {generated_at}",
            f"Source issue: `{source_title}`",
            "",
            "## Source Snapshot",
            "",
            "Issue #662 is an operational status note, not a normal feature request. Its body is a historical snapshot of one burn lane claiming the queue was exhausted and recommending bulk closure of stale-open items.",
            "",
            "## Live Summary",
            "",
            f"- Referenced issues audited: {len(referenced_rows)}",
            f"- Already closed: {len(closed)}",
            f"- Open but likely closure candidates (merged PR found): {len(closure)}",
            f"- Open with active PRs: {len(active)}",
            f"- Open / needs manual review: {len(manual)}",
            "",
            "## Issue Body Drift",
            "",
            "The body of #662 is not current truth. It mixes closed issues, open issues, ranges, and process notes into one static snapshot. This audit re-queries every referenced issue and classifies it against live forge state instead of trusting the original note.",
            "",
            table(referenced_rows),
            "",
            "## Closure Candidates",
            "",
            "These issues are still open but already have merged PR evidence in the forge and should be reviewed for bulk closure.",
            "",
            table(closure),
            "",
            "## Still Open / Needs Manual Review",
            "",
            "These issues either have no matching PR signal or still have an active PR / ambiguous state and should stay in a human review lane.",
            "",
            table(active + manual),
            "",
            "## Recommendation",
            "",
            "1. Close the `closure_candidate` issues in one deliberate ops pass after a final spot-check on main.",
            "2. Leave `active_pr` items open until the current PRs are merged or closed.",
            "3. Investigate `needs_manual_review` items individually — they may be report-only, assigned elsewhere, or still actionable.",
            "4. Use this audit artifact instead of the raw body text of #662 for future lane-empty claims.",
        ]
    )


def run_audit(issue_number: int, repo: str, token: str, output_path: Path) -> Path:
    issue = api_get(f"/repos/{ORG}/{repo}/issues/{issue_number}", token)
    referenced = extract_issue_numbers(issue.get("body") or "")
    pulls = collect_pull_summaries(repo, token)
    rows: list[dict] = []
    for ref in referenced:
        try:
            ref_issue = api_get(f"/repos/{ORG}/{repo}/issues/{ref}", token)
        except Exception:
            rows.append(
                IssueAuditRow(
                    number=ref,
                    title="missing or inaccessible",
                    state="unknown",
                    classification="needs_manual_review",
                    pr_summary="issue lookup failed",
                    issue_url="",
                ).to_dict()
            )
            continue
        rows.append(classify_issue(ref_issue, match_prs(ref, pulls)).to_dict())

    generated_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
    report = render_report(issue_number, issue.get("title") or "", rows, generated_at)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    output_path.write_text(report + "\n", encoding="utf-8")
    return output_path


def main() -> int:
    parser = argparse.ArgumentParser(description="Audit a 'burn lane empty' issue body against live forge state.")
    parser.add_argument("--issue", type=int, default=662)
    parser.add_argument("--repo", default="timmy-home")
    parser.add_argument(
        "--output",
        default="reports/production/2026-04-16-burn-lane-empty-audit.md",
        help="Repo-relative output path for the generated markdown report.",
    )
    parser.add_argument("--token-file", default=DEFAULT_TOKEN_PATH)
    args = parser.parse_args()

    token = Path(args.token_file).read_text(encoding="utf-8").strip()
    output = run_audit(args.issue, args.repo, token, Path(args.output))
    print(output)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())