Files
Timmy-time-dashboard/scripts/cycle_retro.py

342 lines
13 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""Cycle retrospective logger for the Timmy dev loop.
Called after each cycle completes (success or failure).
Appends a structured entry to .loop/retro/cycles.jsonl.
EPOCH NOTATION (turnover system):
Each cycle carries a symbolic epoch tag alongside the raw integer:
WW.D:NNN
turnover glyph marks epoch-aware cycles
WW ISO week-of-year (0153)
D ISO weekday (1=Mon 7=Sun)
NNN daily cycle counter, zero-padded, resets at midnight UTC
Example: 12.3:042 Week 12, Wednesday, 42nd cycle of the day.
The raw `cycle` integer is preserved for backward compatibility.
The `epoch` field carries the symbolic notation.
SUCCESS DEFINITION:
A cycle is only "success" if BOTH conditions are met:
1. The hermes process exited cleanly (exit code 0)
2. Main is green (smoke test passes on main after merge)
A cycle that merges a PR but leaves main red is a FAILURE.
The --main-green flag records the smoke test result.
Usage:
python3 scripts/cycle_retro.py --cycle 42 --success --main-green --issue 85 \
--type bug --duration 480 --tests-passed 1450 --tests-added 3 \
--files-changed 2 --lines-added 45 --lines-removed 12 \
--kimi-panes 2 --pr 155
python3 scripts/cycle_retro.py --cycle 43 --failure --issue 90 \
--type feature --duration 1200 --reason "tox failed: 3 errors"
python3 scripts/cycle_retro.py --cycle 44 --success --no-main-green \
--reason "PR merged but tests fail on main"
"""
from __future__ import annotations
import argparse
import json
import re
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parent.parent
RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl"
SUMMARY_FILE = REPO_ROOT / ".loop" / "retro" / "summary.json"
EPOCH_COUNTER_FILE = REPO_ROOT / ".loop" / "retro" / ".epoch_counter"
CYCLE_RESULT_FILE = REPO_ROOT / ".loop" / "cycle_result.json"
# How many recent entries to include in rolling summary
SUMMARY_WINDOW = 50
# Branch patterns that encode an issue number, e.g. kimi/issue-492
BRANCH_ISSUE_RE = re.compile(r"issue[/-](\d+)", re.IGNORECASE)
def detect_issue_from_branch() -> int | None:
"""Try to extract an issue number from the current git branch name."""
try:
branch = subprocess.check_output(
["git", "rev-parse", "--abbrev-ref", "HEAD"],
stderr=subprocess.DEVNULL,
text=True,
).strip()
except (subprocess.CalledProcessError, FileNotFoundError):
return None
m = BRANCH_ISSUE_RE.search(branch)
return int(m.group(1)) if m else None
# ── Epoch turnover ────────────────────────────────────────────────────────
def _epoch_tag(now: datetime | None = None) -> tuple[str, dict]:
"""Generate the symbolic epoch tag and advance the daily counter.
Returns (epoch_string, epoch_parts) where epoch_parts is a dict with
week, weekday, daily_n for structured storage.
The daily counter persists in .epoch_counter as a two-line file:
line 1: ISO date (YYYY-MM-DD) of the current epoch day
line 2: integer count
When the date rolls over, the counter resets to 1.
"""
if now is None:
now = datetime.now(timezone.utc)
iso_cal = now.isocalendar() # (year, week, weekday)
week = iso_cal[1]
weekday = iso_cal[2]
today_str = now.strftime("%Y-%m-%d")
# Read / reset daily counter
daily_n = 1
EPOCH_COUNTER_FILE.parent.mkdir(parents=True, exist_ok=True)
if EPOCH_COUNTER_FILE.exists():
try:
lines = EPOCH_COUNTER_FILE.read_text().strip().splitlines()
if len(lines) == 2 and lines[0] == today_str:
daily_n = int(lines[1]) + 1
except (ValueError, IndexError):
pass # corrupt file — reset
# Persist
EPOCH_COUNTER_FILE.write_text(f"{today_str}\n{daily_n}\n")
tag = f"\u27f3{week:02d}.{weekday}:{daily_n:03d}"
parts = {"week": week, "weekday": weekday, "daily_n": daily_n}
return tag, parts
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Log a cycle retrospective")
p.add_argument("--cycle", type=int, required=True)
p.add_argument("--issue", type=int, default=None)
p.add_argument("--type", choices=["bug", "feature", "refactor", "philosophy", "unknown"],
default="unknown")
outcome = p.add_mutually_exclusive_group(required=True)
outcome.add_argument("--success", action="store_true")
outcome.add_argument("--failure", action="store_true")
p.add_argument("--duration", type=int, default=0, help="Cycle time in seconds")
p.add_argument("--tests-passed", type=int, default=0)
p.add_argument("--tests-added", type=int, default=0)
p.add_argument("--files-changed", type=int, default=0)
p.add_argument("--lines-added", type=int, default=0)
p.add_argument("--lines-removed", type=int, default=0)
p.add_argument("--kimi-panes", type=int, default=0)
p.add_argument("--pr", type=int, default=None, help="PR number if merged")
p.add_argument("--reason", type=str, default="", help="Failure reason")
p.add_argument("--notes", type=str, default="", help="Free-form observations")
p.add_argument("--main-green", action="store_true", default=False,
help="Smoke test passed on main after this cycle")
p.add_argument("--no-main-green", dest="main_green", action="store_false",
help="Smoke test failed or was not run")
return p.parse_args()
def update_summary() -> None:
"""Compute rolling summary statistics from recent cycles."""
if not RETRO_FILE.exists():
return
entries = []
for line in RETRO_FILE.read_text().strip().splitlines():
try:
entries.append(json.loads(line))
except json.JSONDecodeError:
continue
recent = entries[-SUMMARY_WINDOW:]
if not recent:
return
# Only count entries with real measured data for rates.
# Backfilled entries lack main_green/hermes_clean fields — exclude them.
measured = [e for e in recent if "main_green" in e]
successes = [e for e in measured if e.get("success")]
failures = [e for e in measured if not e.get("success")]
main_green_count = sum(1 for e in measured if e.get("main_green"))
hermes_clean_count = sum(1 for e in measured if e.get("hermes_clean"))
durations = [e["duration"] for e in recent if e.get("duration", 0) > 0]
# Per-type stats (only from measured entries for rates)
type_stats: dict[str, dict] = {}
for e in recent:
t = e.get("type", "unknown")
if t not in type_stats:
type_stats[t] = {"count": 0, "measured": 0, "success": 0, "total_duration": 0}
type_stats[t]["count"] += 1
type_stats[t]["total_duration"] += e.get("duration", 0)
if "main_green" in e:
type_stats[t]["measured"] += 1
if e.get("success"):
type_stats[t]["success"] += 1
for t, stats in type_stats.items():
if stats["measured"] > 0:
stats["success_rate"] = round(stats["success"] / stats["measured"], 2)
else:
stats["success_rate"] = -1
if stats["count"] > 0:
stats["avg_duration"] = round(stats["total_duration"] / stats["count"])
# Quarantine candidates (failed 2+ times)
issue_failures: dict[int, int] = {}
for e in recent:
if not e.get("success") and e.get("issue"):
issue_failures[e["issue"]] = issue_failures.get(e["issue"], 0) + 1
quarantine_candidates = {k: v for k, v in issue_failures.items() if v >= 2}
# Epoch turnover stats — cycles per week/day from epoch-tagged entries
epoch_entries = [e for e in recent if e.get("epoch")]
by_week: dict[int, int] = {}
by_weekday: dict[int, int] = {}
for e in epoch_entries:
w = e.get("epoch_week")
d = e.get("epoch_weekday")
if w is not None:
by_week[w] = by_week.get(w, 0) + 1
if d is not None:
by_weekday[d] = by_weekday.get(d, 0) + 1
# Current epoch — latest entry's epoch tag
current_epoch = epoch_entries[-1].get("epoch", "") if epoch_entries else ""
# Weekday names for display
weekday_glyphs = {1: "Mon", 2: "Tue", 3: "Wed", 4: "Thu",
5: "Fri", 6: "Sat", 7: "Sun"}
by_weekday_named = {weekday_glyphs.get(k, str(k)): v
for k, v in sorted(by_weekday.items())}
summary = {
"updated_at": datetime.now(timezone.utc).isoformat(),
"current_epoch": current_epoch,
"window": len(recent),
"measured_cycles": len(measured),
"total_cycles": len(entries),
"success_rate": round(len(successes) / len(measured), 2) if measured else -1,
"main_green_rate": round(main_green_count / len(measured), 2) if measured else -1,
"hermes_clean_rate": round(hermes_clean_count / len(measured), 2) if measured else -1,
"avg_duration_seconds": round(sum(durations) / len(durations)) if durations else 0,
"total_lines_added": sum(e.get("lines_added", 0) for e in recent),
"total_lines_removed": sum(e.get("lines_removed", 0) for e in recent),
"total_prs_merged": sum(1 for e in recent if e.get("pr")),
"by_type": type_stats,
"by_week": dict(sorted(by_week.items())),
"by_weekday": by_weekday_named,
"quarantine_candidates": quarantine_candidates,
"recent_failures": [
{"cycle": e["cycle"], "epoch": e.get("epoch", ""),
"issue": e.get("issue"), "reason": e.get("reason", "")}
for e in failures[-5:]
],
}
SUMMARY_FILE.write_text(json.dumps(summary, indent=2) + "\n")
def _load_cycle_result() -> dict:
"""Read .loop/cycle_result.json if it exists; return empty dict on failure."""
if not CYCLE_RESULT_FILE.exists():
return {}
try:
raw = CYCLE_RESULT_FILE.read_text().strip()
# Strip hermes fence markers (```json ... ```) if present
if raw.startswith("```"):
lines = raw.splitlines()
lines = [l for l in lines if not l.startswith("```")]
raw = "\n".join(lines)
return json.loads(raw)
except (json.JSONDecodeError, OSError):
return {}
def main() -> None:
args = parse_args()
# Backfill from cycle_result.json when CLI args have defaults
cr = _load_cycle_result()
if cr:
if args.issue is None and cr.get("issue"):
args.issue = int(cr["issue"])
if args.type == "unknown" and cr.get("type"):
args.type = cr["type"]
if args.tests_passed == 0 and cr.get("tests_passed"):
args.tests_passed = int(cr["tests_passed"])
if not args.notes and cr.get("notes"):
args.notes = cr["notes"]
# Consume-once: delete after reading so stale results don't poison future cycles
CYCLE_RESULT_FILE.unlink(missing_ok=True)
# Auto-detect issue from branch when not explicitly provided
if args.issue is None:
args.issue = detect_issue_from_branch()
# Reject idle cycles — no issue and no duration means nothing happened
if not args.issue and args.duration == 0:
print(f"[retro] Cycle {args.cycle} skipped — idle (no issue, no duration)")
return
# A cycle is only truly successful if hermes exited clean AND main is green
truly_success = args.success and args.main_green
# Generate epoch turnover tag
now = datetime.now(timezone.utc)
epoch_tag, epoch_parts = _epoch_tag(now)
entry = {
"timestamp": now.isoformat(),
"cycle": args.cycle,
"epoch": epoch_tag,
"epoch_week": epoch_parts["week"],
"epoch_weekday": epoch_parts["weekday"],
"epoch_daily_n": epoch_parts["daily_n"],
"issue": args.issue,
"type": args.type,
"success": truly_success,
"hermes_clean": args.success,
"main_green": args.main_green,
"duration": args.duration,
"tests_passed": args.tests_passed,
"tests_added": args.tests_added,
"files_changed": args.files_changed,
"lines_added": args.lines_added,
"lines_removed": args.lines_removed,
"kimi_panes": args.kimi_panes,
"pr": args.pr,
"reason": args.reason if (args.failure or not args.main_green) else "",
"notes": args.notes,
}
RETRO_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(RETRO_FILE, "a") as f:
f.write(json.dumps(entry) + "\n")
update_summary()
status = "✓ SUCCESS" if args.success else "✗ FAILURE"
print(f"[retro] {epoch_tag} Cycle {args.cycle} {status}", end="")
if args.issue:
print(f" (#{args.issue} {args.type})", end="")
if args.duration:
print(f"{args.duration}s", end="")
if args.failure and args.reason:
print(f"{args.reason}", end="")
print()
if __name__ == "__main__":
main()