diff --git a/scripts/backfill_retro.py b/scripts/backfill_retro.py new file mode 100644 index 00000000..67f37f2f --- /dev/null +++ b/scripts/backfill_retro.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python3 +"""Backfill cycle retrospective data from Gitea merged PRs and git log. + +One-time script to seed .loop/retro/cycles.jsonl and summary.json +from existing history so the LOOPSTAT panel isn't empty. +""" + +import json +import os +import re +import subprocess +from datetime import datetime, timezone +from pathlib import Path +from urllib.request import Request, urlopen + +REPO_ROOT = Path(__file__).resolve().parent.parent +RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl" +SUMMARY_FILE = REPO_ROOT / ".loop" / "retro" / "summary.json" + +GITEA_API = "http://localhost:3000/api/v1" +REPO_SLUG = "rockachopa/Timmy-time-dashboard" +TOKEN_FILE = Path.home() / ".hermes" / "gitea_token" + +TAG_RE = re.compile(r"\[([^\]]+)\]") +CYCLE_RE = re.compile(r"\[loop-cycle-(\d+)\]", re.IGNORECASE) +ISSUE_RE = re.compile(r"#(\d+)") + + +def get_token() -> str: + return TOKEN_FILE.read_text().strip() + + +def api_get(path: str, token: str) -> list | dict: + url = f"{GITEA_API}/repos/{REPO_SLUG}/{path}" + req = Request(url, headers={ + "Authorization": f"token {token}", + "Accept": "application/json", + }) + with urlopen(req, timeout=15) as resp: + return json.loads(resp.read()) + + +def get_all_merged_prs(token: str) -> list[dict]: + """Fetch all merged PRs from Gitea.""" + all_prs = [] + page = 1 + while True: + batch = api_get(f"pulls?state=closed&sort=created&limit=50&page={page}", token) + if not batch: + break + merged = [p for p in batch if p.get("merged")] + all_prs.extend(merged) + if len(batch) < 50: + break + page += 1 + return all_prs + + +def get_pr_diff_stats(token: str, pr_number: int) -> dict: + """Get diff stats for a PR.""" + try: + pr = api_get(f"pulls/{pr_number}", token) + return { + "additions": pr.get("additions", 0), + "deletions": pr.get("deletions", 0), + "changed_files": pr.get("changed_files", 0), + } + except Exception: + return {"additions": 0, "deletions": 0, "changed_files": 0} + + +def classify_pr(title: str, body: str) -> str: + """Guess issue type from PR title/body.""" + tags = set() + for match in TAG_RE.finditer(title): + tags.add(match.group(1).lower()) + + lower = title.lower() + if "fix" in lower or "bug" in tags: + return "bug" + elif "feat" in lower or "feature" in tags: + return "feature" + elif "refactor" in lower or "refactor" in tags: + return "refactor" + elif "test" in lower: + return "feature" + elif "policy" in lower or "chore" in lower: + return "refactor" + return "unknown" + + +def extract_cycle_number(title: str) -> int | None: + m = CYCLE_RE.search(title) + return int(m.group(1)) if m else None + + +def extract_issue_number(title: str, body: str) -> int | None: + # Try body first (usually has "closes #N") + for text in [body or "", title]: + m = ISSUE_RE.search(text) + if m: + return int(m.group(1)) + return None + + +def estimate_duration(pr: dict) -> int: + """Estimate cycle duration from PR created_at to merged_at.""" + try: + created = datetime.fromisoformat(pr["created_at"].replace("Z", "+00:00")) + merged = datetime.fromisoformat(pr["merged_at"].replace("Z", "+00:00")) + delta = (merged - created).total_seconds() + # Cap at 1200s (max cycle time) — some PRs sit open for days + return min(int(delta), 1200) + except (KeyError, ValueError, TypeError): + return 0 + + +def main(): + token = get_token() + + print("[backfill] Fetching merged PRs from Gitea...") + prs = get_all_merged_prs(token) + print(f"[backfill] Found {len(prs)} merged PRs") + + # Sort oldest first + prs.sort(key=lambda p: p.get("merged_at", "")) + + entries = [] + cycle_counter = 0 + + for pr in prs: + title = pr.get("title", "") + body = pr.get("body", "") or "" + pr_num = pr["number"] + + cycle = extract_cycle_number(title) + if cycle is None: + cycle_counter += 1 + cycle = cycle_counter + else: + cycle_counter = max(cycle_counter, cycle) + + issue = extract_issue_number(title, body) + issue_type = classify_pr(title, body) + duration = estimate_duration(pr) + diff = get_pr_diff_stats(token, pr_num) + + merged_at = pr.get("merged_at", "") + + entry = { + "timestamp": merged_at, + "cycle": cycle, + "issue": issue, + "type": issue_type, + "success": True, # it merged, so it succeeded + "duration": duration, + "tests_passed": 0, # can't recover this + "tests_added": 0, + "files_changed": diff["changed_files"], + "lines_added": diff["additions"], + "lines_removed": diff["deletions"], + "kimi_panes": 0, + "pr": pr_num, + "reason": "", + "notes": f"backfilled from PR#{pr_num}: {title[:80]}", + } + entries.append(entry) + print(f" PR#{pr_num:>3d} cycle={cycle:>3d} #{issue or '-':<5} " + f"+{diff['additions']:<5d} -{diff['deletions']:<5d} {issue_type:<8s} " + f"{title[:50]}") + + # Write cycles.jsonl + RETRO_FILE.parent.mkdir(parents=True, exist_ok=True) + with open(RETRO_FILE, "w") as f: + for entry in entries: + f.write(json.dumps(entry) + "\n") + print(f"\n[backfill] Wrote {len(entries)} entries to {RETRO_FILE}") + + # Generate summary + generate_summary(entries) + print(f"[backfill] Wrote summary to {SUMMARY_FILE}") + + +def generate_summary(entries: list[dict]): + """Compute rolling summary from entries.""" + window = 50 + recent = entries[-window:] + if not recent: + return + + successes = [e for e in recent if e.get("success")] + durations = [e["duration"] for e in recent if e.get("duration", 0) > 0] + + type_stats: dict[str, dict] = {} + for e in recent: + t = e.get("type", "unknown") + if t not in type_stats: + type_stats[t] = {"count": 0, "success": 0, "total_duration": 0} + type_stats[t]["count"] += 1 + if e.get("success"): + type_stats[t]["success"] += 1 + type_stats[t]["total_duration"] += e.get("duration", 0) + + for t, stats in type_stats.items(): + if stats["count"] > 0: + stats["success_rate"] = round(stats["success"] / stats["count"], 2) + stats["avg_duration"] = round(stats["total_duration"] / stats["count"]) + + summary = { + "updated_at": datetime.now(timezone.utc).isoformat(), + "window": len(recent), + "total_cycles": len(entries), + "success_rate": round(len(successes) / len(recent), 2) if recent else 0, + "avg_duration_seconds": round(sum(durations) / len(durations)) if durations else 0, + "total_lines_added": sum(e.get("lines_added", 0) for e in recent), + "total_lines_removed": sum(e.get("lines_removed", 0) for e in recent), + "total_prs_merged": sum(1 for e in recent if e.get("pr")), + "by_type": type_stats, + "quarantine_candidates": {}, + "recent_failures": [], + } + + SUMMARY_FILE.write_text(json.dumps(summary, indent=2) + "\n") + + +if __name__ == "__main__": + main() diff --git a/scripts/cycle_retro.py b/scripts/cycle_retro.py new file mode 100644 index 00000000..35031857 --- /dev/null +++ b/scripts/cycle_retro.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +"""Cycle retrospective logger for the Timmy dev loop. + +Called after each cycle completes (success or failure). +Appends a structured entry to .loop/retro/cycles.jsonl. + +Usage: + python3 scripts/cycle_retro.py --cycle 42 --success --issue 85 \ + --type bug --duration 480 --tests-passed 1450 --tests-added 3 \ + --files-changed 2 --lines-added 45 --lines-removed 12 \ + --kimi-panes 2 --pr 155 + + python3 scripts/cycle_retro.py --cycle 43 --failure --issue 90 \ + --type feature --duration 1200 --reason "tox failed: 3 errors" +""" + +from __future__ import annotations + +import argparse +import json +import sys +from datetime import datetime, timezone +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl" +SUMMARY_FILE = REPO_ROOT / ".loop" / "retro" / "summary.json" + +# How many recent entries to include in rolling summary +SUMMARY_WINDOW = 50 + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser(description="Log a cycle retrospective") + p.add_argument("--cycle", type=int, required=True) + p.add_argument("--issue", type=int, default=None) + p.add_argument("--type", choices=["bug", "feature", "refactor", "philosophy", "unknown"], + default="unknown") + + outcome = p.add_mutually_exclusive_group(required=True) + outcome.add_argument("--success", action="store_true") + outcome.add_argument("--failure", action="store_true") + + p.add_argument("--duration", type=int, default=0, help="Cycle time in seconds") + p.add_argument("--tests-passed", type=int, default=0) + p.add_argument("--tests-added", type=int, default=0) + p.add_argument("--files-changed", type=int, default=0) + p.add_argument("--lines-added", type=int, default=0) + p.add_argument("--lines-removed", type=int, default=0) + p.add_argument("--kimi-panes", type=int, default=0) + p.add_argument("--pr", type=int, default=None, help="PR number if merged") + p.add_argument("--reason", type=str, default="", help="Failure reason") + p.add_argument("--notes", type=str, default="", help="Free-form observations") + + return p.parse_args() + + +def update_summary() -> None: + """Compute rolling summary statistics from recent cycles.""" + if not RETRO_FILE.exists(): + return + + entries = [] + for line in RETRO_FILE.read_text().strip().splitlines(): + try: + entries.append(json.loads(line)) + except json.JSONDecodeError: + continue + + recent = entries[-SUMMARY_WINDOW:] + if not recent: + return + + successes = [e for e in recent if e.get("success")] + failures = [e for e in recent if not e.get("success")] + durations = [e["duration"] for e in recent if e.get("duration", 0) > 0] + + # Per-type stats + type_stats: dict[str, dict] = {} + for e in recent: + t = e.get("type", "unknown") + if t not in type_stats: + type_stats[t] = {"count": 0, "success": 0, "total_duration": 0} + type_stats[t]["count"] += 1 + if e.get("success"): + type_stats[t]["success"] += 1 + type_stats[t]["total_duration"] += e.get("duration", 0) + + for t, stats in type_stats.items(): + if stats["count"] > 0: + stats["success_rate"] = round(stats["success"] / stats["count"], 2) + stats["avg_duration"] = round(stats["total_duration"] / stats["count"]) + + # Quarantine candidates (failed 2+ times) + issue_failures: dict[int, int] = {} + for e in recent: + if not e.get("success") and e.get("issue"): + issue_failures[e["issue"]] = issue_failures.get(e["issue"], 0) + 1 + quarantine_candidates = {k: v for k, v in issue_failures.items() if v >= 2} + + summary = { + "updated_at": datetime.now(timezone.utc).isoformat(), + "window": len(recent), + "total_cycles": len(entries), + "success_rate": round(len(successes) / len(recent), 2) if recent else 0, + "avg_duration_seconds": round(sum(durations) / len(durations)) if durations else 0, + "total_lines_added": sum(e.get("lines_added", 0) for e in recent), + "total_lines_removed": sum(e.get("lines_removed", 0) for e in recent), + "total_prs_merged": sum(1 for e in recent if e.get("pr")), + "by_type": type_stats, + "quarantine_candidates": quarantine_candidates, + "recent_failures": [ + {"cycle": e["cycle"], "issue": e.get("issue"), "reason": e.get("reason", "")} + for e in failures[-5:] + ], + } + + SUMMARY_FILE.write_text(json.dumps(summary, indent=2) + "\n") + + +def main() -> None: + args = parse_args() + + entry = { + "timestamp": datetime.now(timezone.utc).isoformat(), + "cycle": args.cycle, + "issue": args.issue, + "type": args.type, + "success": args.success, + "duration": args.duration, + "tests_passed": args.tests_passed, + "tests_added": args.tests_added, + "files_changed": args.files_changed, + "lines_added": args.lines_added, + "lines_removed": args.lines_removed, + "kimi_panes": args.kimi_panes, + "pr": args.pr, + "reason": args.reason if args.failure else "", + "notes": args.notes, + } + + RETRO_FILE.parent.mkdir(parents=True, exist_ok=True) + with open(RETRO_FILE, "a") as f: + f.write(json.dumps(entry) + "\n") + + update_summary() + + status = "✓ SUCCESS" if args.success else "✗ FAILURE" + print(f"[retro] Cycle {args.cycle} {status}", end="") + if args.issue: + print(f" (#{args.issue} {args.type})", end="") + if args.duration: + print(f" — {args.duration}s", end="") + if args.failure and args.reason: + print(f" — {args.reason}", end="") + print() + + +if __name__ == "__main__": + main() diff --git a/scripts/deep_triage.sh b/scripts/deep_triage.sh new file mode 100644 index 00000000..9361eb69 --- /dev/null +++ b/scripts/deep_triage.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# ── Deep Triage — Hermes + Timmy collaborative issue triage ──────────── +# Runs periodically (every ~20 dev cycles). Wakes Hermes for intelligent +# triage, then consults Timmy for feedback before finalizing. +# +# Output: updated .loop/queue.json, refined issues, retro entry +# ─────────────────────────────────────────────────────────────────────── + +set -uo pipefail + +REPO="$HOME/Timmy-Time-dashboard" +QUEUE="$REPO/.loop/queue.json" +RETRO="$REPO/.loop/retro/deep-triage.jsonl" +TIMMY="$REPO/.venv/bin/timmy" +PROMPT_FILE="$REPO/scripts/deep_triage_prompt.md" + +export PATH="$HOME/.local/bin:$HOME/.hermes/bin:/usr/local/bin:$PATH" + +mkdir -p "$(dirname "$RETRO")" + +log() { echo "[deep-triage] $(date '+%H:%M:%S') $*"; } + +# ── Gather context for the prompt ────────────────────────────────────── +QUEUE_CONTENTS="" +if [ -f "$QUEUE" ]; then + QUEUE_CONTENTS=$(cat "$QUEUE") +fi + +LAST_RETRO="" +if [ -f "$RETRO" ]; then + LAST_RETRO=$(tail -1 "$RETRO" 2>/dev/null) +fi + +SUMMARY="" +if [ -f "$REPO/.loop/retro/summary.json" ]; then + SUMMARY=$(cat "$REPO/.loop/retro/summary.json") +fi + +# ── Build dynamic prompt ────────────────────────────────────────────── +PROMPT=$(cat "$PROMPT_FILE") + +PROMPT="$PROMPT + +═══════════════════════════════════════════════════════════════════════════════ +CURRENT CONTEXT (auto-injected) +═══════════════════════════════════════════════════════════════════════════════ + +CURRENT QUEUE (.loop/queue.json): +$QUEUE_CONTENTS + +CYCLE SUMMARY (.loop/retro/summary.json): +$SUMMARY + +LAST DEEP TRIAGE RETRO: +$LAST_RETRO + +Do your work now." + +# ── Run Hermes ───────────────────────────────────────────────────────── +log "Starting deep triage..." +RESULT=$(hermes chat --yolo -q "$PROMPT" 2>&1) +EXIT_CODE=$? + +if [ $EXIT_CODE -ne 0 ]; then + log "Deep triage failed (exit $EXIT_CODE)" +fi + +log "Deep triage complete." diff --git a/scripts/deep_triage_prompt.md b/scripts/deep_triage_prompt.md new file mode 100644 index 00000000..dada5b35 --- /dev/null +++ b/scripts/deep_triage_prompt.md @@ -0,0 +1,145 @@ +You are the deep triage agent for the Timmy development loop. + +REPO: ~/Timmy-Time-dashboard +API: http://localhost:3000/api/v1/repos/rockachopa/Timmy-time-dashboard +GITEA TOKEN: ~/.hermes/gitea_token +QUEUE: ~/Timmy-Time-dashboard/.loop/queue.json +TIMMY CLI: ~/Timmy-Time-dashboard/.venv/bin/timmy + +═══════════════════════════════════════════════════════════════════════════════ +YOUR JOB +═══════════════════════════════════════════════════════════════════════════════ + +You are NOT coding. You are thinking. Your job is to make the dev loop's +work queue excellent — well-scoped, well-prioritized, aligned with the +north star of building sovereign Timmy. + +You run periodically (roughly every 20 dev cycles). The fast mechanical +scorer handles the basics. You handle the hard stuff: + + 1. Breaking big issues into small, actionable sub-issues + 2. Writing acceptance criteria for vague issues + 3. Identifying issues that should be closed (stale, duplicate, pointless) + 4. Spotting gaps — what's NOT in the issue queue that should be + 5. Adjusting priorities based on what the cycle retros are showing + 6. Consulting Timmy about the plan (see TIMMY CONSULTATION below) + +═══════════════════════════════════════════════════════════════════════════════ +TIMMY CONSULTATION — THE DOGFOOD STEP +═══════════════════════════════════════════════════════════════════════════════ + +Before you finalize the triage, you MUST consult Timmy. He is the product. +He should have a voice in his own development. + +THE PROTOCOL: + 1. Draft your triage plan (what to prioritize, what to close, what to add) + 2. Summarize the plan in 200 words or less + 3. Ask Timmy for feedback: + + ~/Timmy-Time-dashboard/.venv/bin/timmy chat --session-id triage \ + "The development loop triage is planning the next batch of work. + Here's the plan: [YOUR SUMMARY]. As the product being built, + do you have feedback? What do you think is most important for + your own growth? What are you struggling with? Keep it to + 3-4 sentences." + + 4. Read Timmy's response. ACTUALLY CONSIDER IT: + - If Timmy identifies a real gap, add it to the queue + - If Timmy asks for something that conflicts with priorities, note + WHY you're not doing it (don't just ignore him) + - If Timmy is confused or gives a useless answer, that itself is + signal — file a [timmy-capability] issue about what he couldn't do + 5. Document what Timmy said and how you responded in the retro + +If Timmy is unavailable (timeout, crash, offline): proceed without him, +but note it in the retro. His absence is also signal. + +Timeout: 60 seconds. If he doesn't respond, move on. + +═══════════════════════════════════════════════════════════════════════════════ +TRIAGE RUBRIC +═══════════════════════════════════════════════════════════════════════════════ + +For each open issue, evaluate: + +SCOPE (0-3): + 0 = vague, no files mentioned, unclear what changes + 1 = general area known but could touch many files + 2 = specific files named, bounded change + 3 = exact function/method identified, surgical fix + +ACCEPTANCE (0-3): + 0 = no success criteria + 1 = hand-wavy ("it should work") + 2 = specific behavior described + 3 = test case described or exists + +ALIGNMENT (0-3): + 0 = doesn't connect to roadmap + 1 = nice-to-have + 2 = supports current milestone + 3 = blocks other work or fixes broken main + +ACTIONS PER SCORE: + 7-9: Ready. Ensure it's in queue.json with correct priority. + 4-6: Refine. Add a comment with missing info (files, criteria, scope). + If YOU can fill in the gaps from reading the code, do it. + 0-3: Close or deprioritize. Comment explaining why. + +═══════════════════════════════════════════════════════════════════════════════ +READING THE RETROS +═══════════════════════════════════════════════════════════════════════════════ + +The cycle summary tells you what's actually happening in the dev loop. +Use it: + + - High failure rate on a type → those issues need better scoping + - Long avg duration → issues are too big, break them down + - Quarantine candidates → investigate, maybe close or rewrite + - Success rate dropping → something systemic, file a [bug] issue + +The last deep triage retro tells you what Timmy said last time and what +happened. Follow up: + + - Did we act on Timmy's feedback? What was the result? + - Did issues we refined last time succeed in the dev loop? + - Are we getting better at scoping? + +═══════════════════════════════════════════════════════════════════════════════ +OUTPUT +═══════════════════════════════════════════════════════════════════════════════ + +When done, you MUST: + +1. Update .loop/queue.json with the refined, ranked queue + Format: [{"issue": N, "score": S, "title": "...", "type": "...", + "files": [...], "ready": true}, ...] + +2. Append a retro entry to .loop/retro/deep-triage.jsonl (one JSON line): + { + "timestamp": "ISO8601", + "issues_reviewed": N, + "issues_refined": [list of issue numbers you added detail to], + "issues_closed": [list of issue numbers you recommended closing], + "issues_created": [list of new issue numbers you filed], + "queue_size": N, + "timmy_available": true/false, + "timmy_feedback": "what timmy said (verbatim, trimmed to 200 chars)", + "timmy_feedback_acted_on": "what you did with his feedback", + "observations": "free-form notes about queue health" + } + +3. If you created or closed issues, do it via the Gitea API. + Tag new issues: [triage-generated] [type] + +═══════════════════════════════════════════════════════════════════════════════ +RULES +═══════════════════════════════════════════════════════════════════════════════ + +- Do NOT write code. Do NOT create PRs. You are triaging, not building. +- Do NOT close issues without commenting why. +- Do NOT ignore Timmy's feedback without documenting your reasoning. +- Philosophy issues are valid but lowest priority for the dev loop. + Don't close them — just don't put them in the dev queue. +- When in doubt, file a new issue rather than expanding an existing one. + Small issues > big issues. Always. diff --git a/scripts/triage_score.py b/scripts/triage_score.py new file mode 100644 index 00000000..7d29b599 --- /dev/null +++ b/scripts/triage_score.py @@ -0,0 +1,360 @@ +#!/usr/bin/env python3 +"""Mechanical triage scoring for the Timmy dev loop. + +Reads open issues from Gitea, scores them on scope/acceptance/alignment, +writes a ranked queue to .loop/queue.json. No LLM calls — pure heuristics. + +Run: python3 scripts/triage_score.py +Env: GITEA_TOKEN (or reads ~/.hermes/gitea_token) + GITEA_API (default: http://localhost:3000/api/v1) + REPO_SLUG (default: rockachopa/Timmy-time-dashboard) +""" + +from __future__ import annotations + +import json +import os +import re +import sys +from datetime import datetime, timezone +from pathlib import Path + +# ── Config ────────────────────────────────────────────────────────────── +GITEA_API = os.environ.get("GITEA_API", "http://localhost:3000/api/v1") +REPO_SLUG = os.environ.get("REPO_SLUG", "rockachopa/Timmy-time-dashboard") +TOKEN_FILE = Path.home() / ".hermes" / "gitea_token" +REPO_ROOT = Path(__file__).resolve().parent.parent +QUEUE_FILE = REPO_ROOT / ".loop" / "queue.json" +RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "triage.jsonl" +QUARANTINE_FILE = REPO_ROOT / ".loop" / "quarantine.json" +CYCLE_RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl" + +# Minimum score to be considered "ready" +READY_THRESHOLD = 5 +# How many recent cycle retros to check for quarantine +QUARANTINE_LOOKBACK = 20 + +# ── Helpers ───────────────────────────────────────────────────────────── + +def get_token() -> str: + token = os.environ.get("GITEA_TOKEN", "").strip() + if not token and TOKEN_FILE.exists(): + token = TOKEN_FILE.read_text().strip() + if not token: + print("[triage] ERROR: No Gitea token found", file=sys.stderr) + sys.exit(1) + return token + + +def api_get(path: str, token: str) -> list | dict: + """Minimal HTTP GET using urllib (no dependencies).""" + import urllib.request + url = f"{GITEA_API}/repos/{REPO_SLUG}/{path}" + req = urllib.request.Request(url, headers={ + "Authorization": f"token {token}", + "Accept": "application/json", + }) + with urllib.request.urlopen(req, timeout=15) as resp: + return json.loads(resp.read()) + + +def load_quarantine() -> dict: + """Load quarantined issues {issue_num: {reason, quarantined_at, failures}}.""" + if QUARANTINE_FILE.exists(): + try: + return json.loads(QUARANTINE_FILE.read_text()) + except (json.JSONDecodeError, OSError): + pass + return {} + + +def save_quarantine(q: dict) -> None: + QUARANTINE_FILE.parent.mkdir(parents=True, exist_ok=True) + QUARANTINE_FILE.write_text(json.dumps(q, indent=2) + "\n") + + +def load_cycle_failures() -> dict[int, int]: + """Count failures per issue from recent cycle retros.""" + failures: dict[int, int] = {} + if not CYCLE_RETRO_FILE.exists(): + return failures + lines = CYCLE_RETRO_FILE.read_text().strip().splitlines() + for line in lines[-QUARANTINE_LOOKBACK:]: + try: + entry = json.loads(line) + if not entry.get("success", True): + issue = entry.get("issue") + if issue: + failures[issue] = failures.get(issue, 0) + 1 + except (json.JSONDecodeError, KeyError): + continue + return failures + + +# ── Scoring ───────────────────────────────────────────────────────────── + +# Patterns that indicate file/function specificity +FILE_PATTERNS = re.compile( + r"(?:src/|tests/|scripts/|\.py|\.html|\.js|\.yaml|\.toml|\.sh)", re.IGNORECASE +) +FUNCTION_PATTERNS = re.compile( + r"(?:def |class |function |method |`\w+\(\)`)", re.IGNORECASE +) + +# Patterns that indicate acceptance criteria +ACCEPTANCE_PATTERNS = re.compile( + r"(?:should|must|expect|verify|assert|test.?case|acceptance|criteria" + r"|pass(?:es|ing)|fail(?:s|ing)|return(?:s)?|raise(?:s)?)", + re.IGNORECASE, +) +TEST_PATTERNS = re.compile( + r"(?:tox|pytest|test_\w+|\.test\.|assert\s)", re.IGNORECASE +) + +# Tags in issue titles +TAG_PATTERN = re.compile(r"\[([^\]]+)\]") + +# Priority labels / tags +BUG_TAGS = {"bug", "broken", "crash", "error", "fix", "regression", "hotfix"} +FEATURE_TAGS = {"feature", "feat", "enhancement", "capability", "timmy-capability"} +REFACTOR_TAGS = {"refactor", "cleanup", "tech-debt", "optimization", "perf"} +META_TAGS = {"philosophy", "soul-gap", "discussion", "question", "rfc"} +LOOP_TAG = "loop-generated" + + +def extract_tags(title: str, labels: list[str]) -> set[str]: + """Pull tags from [bracket] notation in title + Gitea labels.""" + tags = set() + for match in TAG_PATTERN.finditer(title): + tags.add(match.group(1).lower().strip()) + for label in labels: + tags.add(label.lower().strip()) + return tags + + +def score_scope(title: str, body: str, tags: set[str]) -> int: + """0-3: How well-scoped is this issue?""" + text = f"{title}\n{body}" + score = 0 + + # Mentions specific files? + if FILE_PATTERNS.search(text): + score += 1 + + # Mentions specific functions/classes? + if FUNCTION_PATTERNS.search(text): + score += 1 + + # Short, focused title (not a novel)? + clean_title = TAG_PATTERN.sub("", title).strip() + if len(clean_title) < 80: + score += 1 + + # Philosophy/meta issues are inherently unscoped for dev work + if tags & META_TAGS: + score = max(0, score - 2) + + return min(3, score) + + +def score_acceptance(title: str, body: str, tags: set[str]) -> int: + """0-3: Does this have clear acceptance criteria?""" + text = f"{title}\n{body}" + score = 0 + + # Has acceptance-related language? + matches = len(ACCEPTANCE_PATTERNS.findall(text)) + if matches >= 3: + score += 2 + elif matches >= 1: + score += 1 + + # Mentions specific tests? + if TEST_PATTERNS.search(text): + score += 1 + + # Has a "## Problem" + "## Solution" or similar structure? + if re.search(r"##\s*(problem|solution|expected|actual|steps)", body, re.IGNORECASE): + score += 1 + + # Philosophy issues don't have testable criteria + if tags & META_TAGS: + score = max(0, score - 1) + + return min(3, score) + + +def score_alignment(title: str, body: str, tags: set[str]) -> int: + """0-3: How aligned is this with the north star?""" + score = 0 + + # Bug on main = highest priority + if tags & BUG_TAGS: + score += 3 + return min(3, score) + + # Refactors that improve code health + if tags & REFACTOR_TAGS: + score += 2 + + # Features that grow Timmy's capabilities + if tags & FEATURE_TAGS: + score += 2 + + # Loop-generated issues get a small boost (the loop found real problems) + if LOOP_TAG in tags: + score += 1 + + # Philosophy issues are important but not dev-actionable + if tags & META_TAGS: + score = 0 + + return min(3, score) + + +def score_issue(issue: dict) -> dict: + """Score a single issue. Returns enriched dict.""" + title = issue.get("title", "") + body = issue.get("body", "") or "" + labels = [l["name"] for l in issue.get("labels", [])] + tags = extract_tags(title, labels) + number = issue["number"] + + scope = score_scope(title, body, tags) + acceptance = score_acceptance(title, body, tags) + alignment = score_alignment(title, body, tags) + total = scope + acceptance + alignment + + # Determine issue type + if tags & BUG_TAGS: + issue_type = "bug" + elif tags & FEATURE_TAGS: + issue_type = "feature" + elif tags & REFACTOR_TAGS: + issue_type = "refactor" + elif tags & META_TAGS: + issue_type = "philosophy" + else: + issue_type = "unknown" + + # Extract mentioned files from body + files = list(set(re.findall(r"(?:src|tests|scripts)/[\w/.]+\.(?:py|html|js|yaml)", body))) + + return { + "issue": number, + "title": TAG_PATTERN.sub("", title).strip(), + "type": issue_type, + "score": total, + "scope": scope, + "acceptance": acceptance, + "alignment": alignment, + "tags": sorted(tags), + "files": files[:10], + "ready": total >= READY_THRESHOLD, + } + + +# ── Quarantine ────────────────────────────────────────────────────────── + +def update_quarantine(scored: list[dict]) -> list[dict]: + """Auto-quarantine issues that have failed >= 2 times. Returns filtered list.""" + failures = load_cycle_failures() + quarantine = load_quarantine() + now = datetime.now(timezone.utc).isoformat() + + filtered = [] + for item in scored: + num = item["issue"] + fail_count = failures.get(num, 0) + str_num = str(num) + + if fail_count >= 2 and str_num not in quarantine: + quarantine[str_num] = { + "reason": f"Failed {fail_count} times in recent cycles", + "quarantined_at": now, + "failures": fail_count, + } + print(f"[triage] QUARANTINED #{num}: failed {fail_count} times") + continue + + if str_num in quarantine: + print(f"[triage] Skipping #{num} (quarantined)") + continue + + filtered.append(item) + + save_quarantine(quarantine) + return filtered + + +# ── Main ──────────────────────────────────────────────────────────────── + +def run_triage() -> list[dict]: + token = get_token() + + # Fetch all open issues (paginate) + page = 1 + all_issues: list[dict] = [] + while True: + batch = api_get(f"issues?state=open&limit=50&page={page}&type=issues", token) + if not batch: + break + all_issues.extend(batch) + if len(batch) < 50: + break + page += 1 + + print(f"[triage] Fetched {len(all_issues)} open issues") + + # Score each + scored = [score_issue(i) for i in all_issues] + + # Auto-quarantine repeat failures + scored = update_quarantine(scored) + + # Sort: ready first, then by score descending, bugs always on top + def sort_key(item: dict) -> tuple: + return ( + 0 if item["type"] == "bug" else 1, + -item["score"], + item["issue"], + ) + + scored.sort(key=sort_key) + + # Write queue (ready items only) + ready = [s for s in scored if s["ready"]] + not_ready = [s for s in scored if not s["ready"]] + + QUEUE_FILE.parent.mkdir(parents=True, exist_ok=True) + QUEUE_FILE.write_text(json.dumps(ready, indent=2) + "\n") + + # Write retro entry + retro_entry = { + "timestamp": datetime.now(timezone.utc).isoformat(), + "total_open": len(all_issues), + "scored": len(scored), + "ready": len(ready), + "not_ready": len(not_ready), + "top_issue": ready[0]["issue"] if ready else None, + "quarantined": len(load_quarantine()), + } + RETRO_FILE.parent.mkdir(parents=True, exist_ok=True) + with open(RETRO_FILE, "a") as f: + f.write(json.dumps(retro_entry) + "\n") + + # Summary + print(f"[triage] Ready: {len(ready)} | Not ready: {len(not_ready)}") + for item in ready[:5]: + flag = "🐛" if item["type"] == "bug" else "✦" + print(f" {flag} #{item['issue']} score={item['score']} {item['title'][:60]}") + if not_ready: + print(f"[triage] Low-scoring ({len(not_ready)}):") + for item in not_ready[:3]: + print(f" #{item['issue']} score={item['score']} {item['title'][:50]}") + + return ready + + +if __name__ == "__main__": + run_triage()