This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/scripts/backfill_retro.py
Kimi Agent 015d858be5 fix: auto-detect issue number in cycle retro from git branch (#495)
## Summary
- `cycle_retro.py` now auto-detects issue number from the git branch name (e.g. `kimi/issue-492` → `492`) when `--issue` is not provided
- `backfill_retro.py` now skips the PR number suffix Gitea appends to titles so it does not confuse PR numbers with issue numbers
- Added tests for both fixes

Fixes #492

Co-authored-by: kimi <kimi@localhost>
Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/495
Co-authored-by: Kimi Agent <kimi@timmy.local>
Co-committed-by: Kimi Agent <kimi@timmy.local>
2026-03-19 16:13:35 -04:00

233 lines
7.4 KiB
Python

#!/usr/bin/env python3
"""Backfill cycle retrospective data from Gitea merged PRs and git log.
One-time script to seed .loop/retro/cycles.jsonl and summary.json
from existing history so the LOOPSTAT panel isn't empty.
"""
import json
import os
import re
import subprocess
from datetime import datetime, timezone
from pathlib import Path
from urllib.request import Request, urlopen
REPO_ROOT = Path(__file__).resolve().parent.parent
RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl"
SUMMARY_FILE = REPO_ROOT / ".loop" / "retro" / "summary.json"
GITEA_API = "http://localhost:3000/api/v1"
REPO_SLUG = "rockachopa/Timmy-time-dashboard"
TOKEN_FILE = Path.home() / ".hermes" / "gitea_token"
TAG_RE = re.compile(r"\[([^\]]+)\]")
CYCLE_RE = re.compile(r"\[loop-cycle-(\d+)\]", re.IGNORECASE)
ISSUE_RE = re.compile(r"#(\d+)")
def get_token() -> str:
return TOKEN_FILE.read_text().strip()
def api_get(path: str, token: str) -> list | dict:
url = f"{GITEA_API}/repos/{REPO_SLUG}/{path}"
req = Request(url, headers={
"Authorization": f"token {token}",
"Accept": "application/json",
})
with urlopen(req, timeout=15) as resp:
return json.loads(resp.read())
def get_all_merged_prs(token: str) -> list[dict]:
"""Fetch all merged PRs from Gitea."""
all_prs = []
page = 1
while True:
batch = api_get(f"pulls?state=closed&sort=created&limit=50&page={page}", token)
if not batch:
break
merged = [p for p in batch if p.get("merged")]
all_prs.extend(merged)
if len(batch) < 50:
break
page += 1
return all_prs
def get_pr_diff_stats(token: str, pr_number: int) -> dict:
"""Get diff stats for a PR."""
try:
pr = api_get(f"pulls/{pr_number}", token)
return {
"additions": pr.get("additions", 0),
"deletions": pr.get("deletions", 0),
"changed_files": pr.get("changed_files", 0),
}
except Exception:
return {"additions": 0, "deletions": 0, "changed_files": 0}
def classify_pr(title: str, body: str) -> str:
"""Guess issue type from PR title/body."""
tags = set()
for match in TAG_RE.finditer(title):
tags.add(match.group(1).lower())
lower = title.lower()
if "fix" in lower or "bug" in tags:
return "bug"
elif "feat" in lower or "feature" in tags:
return "feature"
elif "refactor" in lower or "refactor" in tags:
return "refactor"
elif "test" in lower:
return "feature"
elif "policy" in lower or "chore" in lower:
return "refactor"
return "unknown"
def extract_cycle_number(title: str) -> int | None:
m = CYCLE_RE.search(title)
return int(m.group(1)) if m else None
def extract_issue_number(title: str, body: str, pr_number: int | None = None) -> int | None:
"""Extract the issue number from PR body/title, ignoring the PR number itself.
Gitea appends "(#N)" to PR titles where N is the PR number — skip that
so we don't confuse it with the linked issue.
"""
for text in [body or "", title]:
for m in ISSUE_RE.finditer(text):
num = int(m.group(1))
if num != pr_number:
return num
return None
def estimate_duration(pr: dict) -> int:
"""Estimate cycle duration from PR created_at to merged_at."""
try:
created = datetime.fromisoformat(pr["created_at"].replace("Z", "+00:00"))
merged = datetime.fromisoformat(pr["merged_at"].replace("Z", "+00:00"))
delta = (merged - created).total_seconds()
# Cap at 1200s (max cycle time) — some PRs sit open for days
return min(int(delta), 1200)
except (KeyError, ValueError, TypeError):
return 0
def main():
token = get_token()
print("[backfill] Fetching merged PRs from Gitea...")
prs = get_all_merged_prs(token)
print(f"[backfill] Found {len(prs)} merged PRs")
# Sort oldest first
prs.sort(key=lambda p: p.get("merged_at", ""))
entries = []
cycle_counter = 0
for pr in prs:
title = pr.get("title", "")
body = pr.get("body", "") or ""
pr_num = pr["number"]
cycle = extract_cycle_number(title)
if cycle is None:
cycle_counter += 1
cycle = cycle_counter
else:
cycle_counter = max(cycle_counter, cycle)
issue = extract_issue_number(title, body, pr_number=pr_num)
issue_type = classify_pr(title, body)
duration = estimate_duration(pr)
diff = get_pr_diff_stats(token, pr_num)
merged_at = pr.get("merged_at", "")
entry = {
"timestamp": merged_at,
"cycle": cycle,
"issue": issue,
"type": issue_type,
"success": True, # it merged, so it succeeded
"duration": duration,
"tests_passed": 0, # can't recover this
"tests_added": 0,
"files_changed": diff["changed_files"],
"lines_added": diff["additions"],
"lines_removed": diff["deletions"],
"kimi_panes": 0,
"pr": pr_num,
"reason": "",
"notes": f"backfilled from PR#{pr_num}: {title[:80]}",
}
entries.append(entry)
print(f" PR#{pr_num:>3d} cycle={cycle:>3d} #{issue or '-':<5} "
f"+{diff['additions']:<5d} -{diff['deletions']:<5d} {issue_type:<8s} "
f"{title[:50]}")
# Write cycles.jsonl
RETRO_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(RETRO_FILE, "w") as f:
for entry in entries:
f.write(json.dumps(entry) + "\n")
print(f"\n[backfill] Wrote {len(entries)} entries to {RETRO_FILE}")
# Generate summary
generate_summary(entries)
print(f"[backfill] Wrote summary to {SUMMARY_FILE}")
def generate_summary(entries: list[dict]):
"""Compute rolling summary from entries."""
window = 50
recent = entries[-window:]
if not recent:
return
successes = [e for e in recent if e.get("success")]
durations = [e["duration"] for e in recent if e.get("duration", 0) > 0]
type_stats: dict[str, dict] = {}
for e in recent:
t = e.get("type", "unknown")
if t not in type_stats:
type_stats[t] = {"count": 0, "success": 0, "total_duration": 0}
type_stats[t]["count"] += 1
if e.get("success"):
type_stats[t]["success"] += 1
type_stats[t]["total_duration"] += e.get("duration", 0)
for t, stats in type_stats.items():
if stats["count"] > 0:
stats["success_rate"] = round(stats["success"] / stats["count"], 2)
stats["avg_duration"] = round(stats["total_duration"] / stats["count"])
summary = {
"updated_at": datetime.now(timezone.utc).isoformat(),
"window": len(recent),
"total_cycles": len(entries),
"success_rate": round(len(successes) / len(recent), 2) if recent else 0,
"avg_duration_seconds": round(sum(durations) / len(durations)) if durations else 0,
"total_lines_added": sum(e.get("lines_added", 0) for e in recent),
"total_lines_removed": sum(e.get("lines_removed", 0) for e in recent),
"total_prs_merged": sum(1 for e in recent if e.get("pr")),
"by_type": type_stats,
"quarantine_candidates": {},
"recent_failures": [],
}
SUMMARY_FILE.write_text(json.dumps(summary, indent=2) + "\n")
if __name__ == "__main__":
main()