#!/usr/bin/env python3 """Cycle retrospective logger for the Timmy dev loop. Called after each cycle completes (success or failure). Appends a structured entry to .loop/retro/cycles.jsonl. SUCCESS DEFINITION: A cycle is only "success" if BOTH conditions are met: 1. The hermes process exited cleanly (exit code 0) 2. Main is green (smoke test passes on main after merge) A cycle that merges a PR but leaves main red is a FAILURE. The --main-green flag records the smoke test result. Usage: python3 scripts/cycle_retro.py --cycle 42 --success --main-green --issue 85 \ --type bug --duration 480 --tests-passed 1450 --tests-added 3 \ --files-changed 2 --lines-added 45 --lines-removed 12 \ --kimi-panes 2 --pr 155 python3 scripts/cycle_retro.py --cycle 43 --failure --issue 90 \ --type feature --duration 1200 --reason "tox failed: 3 errors" python3 scripts/cycle_retro.py --cycle 44 --success --no-main-green \ --reason "PR merged but tests fail on main" """ from __future__ import annotations import argparse import json import sys from datetime import datetime, timezone from pathlib import Path REPO_ROOT = Path(__file__).resolve().parent.parent RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl" SUMMARY_FILE = REPO_ROOT / ".loop" / "retro" / "summary.json" # How many recent entries to include in rolling summary SUMMARY_WINDOW = 50 def parse_args() -> argparse.Namespace: p = argparse.ArgumentParser(description="Log a cycle retrospective") p.add_argument("--cycle", type=int, required=True) p.add_argument("--issue", type=int, default=None) p.add_argument("--type", choices=["bug", "feature", "refactor", "philosophy", "unknown"], default="unknown") outcome = p.add_mutually_exclusive_group(required=True) outcome.add_argument("--success", action="store_true") outcome.add_argument("--failure", action="store_true") p.add_argument("--duration", type=int, default=0, help="Cycle time in seconds") p.add_argument("--tests-passed", type=int, default=0) p.add_argument("--tests-added", type=int, default=0) p.add_argument("--files-changed", type=int, default=0) p.add_argument("--lines-added", type=int, default=0) p.add_argument("--lines-removed", type=int, default=0) p.add_argument("--kimi-panes", type=int, default=0) p.add_argument("--pr", type=int, default=None, help="PR number if merged") p.add_argument("--reason", type=str, default="", help="Failure reason") p.add_argument("--notes", type=str, default="", help="Free-form observations") p.add_argument("--main-green", action="store_true", default=False, help="Smoke test passed on main after this cycle") p.add_argument("--no-main-green", dest="main_green", action="store_false", help="Smoke test failed or was not run") return p.parse_args() def update_summary() -> None: """Compute rolling summary statistics from recent cycles.""" if not RETRO_FILE.exists(): return entries = [] for line in RETRO_FILE.read_text().strip().splitlines(): try: entries.append(json.loads(line)) except json.JSONDecodeError: continue recent = entries[-SUMMARY_WINDOW:] if not recent: return # Only count entries with real measured data for rates. # Backfilled entries lack main_green/hermes_clean fields — exclude them. measured = [e for e in recent if "main_green" in e] successes = [e for e in measured if e.get("success")] failures = [e for e in measured if not e.get("success")] main_green_count = sum(1 for e in measured if e.get("main_green")) hermes_clean_count = sum(1 for e in measured if e.get("hermes_clean")) durations = [e["duration"] for e in recent if e.get("duration", 0) > 0] # Per-type stats (only from measured entries for rates) type_stats: dict[str, dict] = {} for e in recent: t = e.get("type", "unknown") if t not in type_stats: type_stats[t] = {"count": 0, "measured": 0, "success": 0, "total_duration": 0} type_stats[t]["count"] += 1 type_stats[t]["total_duration"] += e.get("duration", 0) if "main_green" in e: type_stats[t]["measured"] += 1 if e.get("success"): type_stats[t]["success"] += 1 for t, stats in type_stats.items(): if stats["measured"] > 0: stats["success_rate"] = round(stats["success"] / stats["measured"], 2) else: stats["success_rate"] = -1 if stats["count"] > 0: stats["avg_duration"] = round(stats["total_duration"] / stats["count"]) # Quarantine candidates (failed 2+ times) issue_failures: dict[int, int] = {} for e in recent: if not e.get("success") and e.get("issue"): issue_failures[e["issue"]] = issue_failures.get(e["issue"], 0) + 1 quarantine_candidates = {k: v for k, v in issue_failures.items() if v >= 2} summary = { "updated_at": datetime.now(timezone.utc).isoformat(), "window": len(recent), "measured_cycles": len(measured), "total_cycles": len(entries), "success_rate": round(len(successes) / len(measured), 2) if measured else -1, "main_green_rate": round(main_green_count / len(measured), 2) if measured else -1, "hermes_clean_rate": round(hermes_clean_count / len(measured), 2) if measured else -1, "avg_duration_seconds": round(sum(durations) / len(durations)) if durations else 0, "total_lines_added": sum(e.get("lines_added", 0) for e in recent), "total_lines_removed": sum(e.get("lines_removed", 0) for e in recent), "total_prs_merged": sum(1 for e in recent if e.get("pr")), "by_type": type_stats, "quarantine_candidates": quarantine_candidates, "recent_failures": [ {"cycle": e["cycle"], "issue": e.get("issue"), "reason": e.get("reason", "")} for e in failures[-5:] ], } SUMMARY_FILE.write_text(json.dumps(summary, indent=2) + "\n") def main() -> None: args = parse_args() # A cycle is only truly successful if hermes exited clean AND main is green truly_success = args.success and args.main_green entry = { "timestamp": datetime.now(timezone.utc).isoformat(), "cycle": args.cycle, "issue": args.issue, "type": args.type, "success": truly_success, "hermes_clean": args.success, "main_green": args.main_green, "duration": args.duration, "tests_passed": args.tests_passed, "tests_added": args.tests_added, "files_changed": args.files_changed, "lines_added": args.lines_added, "lines_removed": args.lines_removed, "kimi_panes": args.kimi_panes, "pr": args.pr, "reason": args.reason if (args.failure or not args.main_green) else "", "notes": args.notes, } RETRO_FILE.parent.mkdir(parents=True, exist_ok=True) with open(RETRO_FILE, "a") as f: f.write(json.dumps(entry) + "\n") update_summary() status = "✓ SUCCESS" if args.success else "✗ FAILURE" print(f"[retro] Cycle {args.cycle} {status}", end="") if args.issue: print(f" (#{args.issue} {args.type})", end="") if args.duration: print(f" — {args.duration}s", end="") if args.failure and args.reason: print(f" — {args.reason}", end="") print() if __name__ == "__main__": main()