diff --git a/scripts/cycle_retro.py b/scripts/cycle_retro.py index 5d2bb50..ae34cd3 100644 --- a/scripts/cycle_retro.py +++ b/scripts/cycle_retro.py @@ -86,26 +86,34 @@ def update_summary() -> None: if not recent: return - successes = [e for e in recent if e.get("success")] - failures = [e for e in recent if not e.get("success")] - main_green_count = sum(1 for e in recent if e.get("main_green")) - hermes_clean_count = sum(1 for e in recent if e.get("hermes_clean", e.get("success"))) + # Only count entries with real measured data for rates. + # Backfilled entries lack main_green/hermes_clean fields — exclude them. + measured = [e for e in recent if "main_green" in e] + successes = [e for e in measured if e.get("success")] + failures = [e for e in measured if not e.get("success")] + main_green_count = sum(1 for e in measured if e.get("main_green")) + hermes_clean_count = sum(1 for e in measured if e.get("hermes_clean")) durations = [e["duration"] for e in recent if e.get("duration", 0) > 0] - # Per-type stats + # Per-type stats (only from measured entries for rates) type_stats: dict[str, dict] = {} for e in recent: t = e.get("type", "unknown") if t not in type_stats: - type_stats[t] = {"count": 0, "success": 0, "total_duration": 0} + type_stats[t] = {"count": 0, "measured": 0, "success": 0, "total_duration": 0} type_stats[t]["count"] += 1 - if e.get("success"): - type_stats[t]["success"] += 1 type_stats[t]["total_duration"] += e.get("duration", 0) + if "main_green" in e: + type_stats[t]["measured"] += 1 + if e.get("success"): + type_stats[t]["success"] += 1 for t, stats in type_stats.items(): + if stats["measured"] > 0: + stats["success_rate"] = round(stats["success"] / stats["measured"], 2) + else: + stats["success_rate"] = -1 if stats["count"] > 0: - stats["success_rate"] = round(stats["success"] / stats["count"], 2) stats["avg_duration"] = round(stats["total_duration"] / stats["count"]) # Quarantine candidates (failed 2+ times) @@ -118,10 +126,11 @@ def update_summary() -> None: summary = { "updated_at": datetime.now(timezone.utc).isoformat(), "window": len(recent), + "measured_cycles": len(measured), "total_cycles": len(entries), - "success_rate": round(len(successes) / len(recent), 2) if recent else 0, - "main_green_rate": round(main_green_count / len(recent), 2) if recent else 0, - "hermes_clean_rate": round(hermes_clean_count / len(recent), 2) if recent else 0, + "success_rate": round(len(successes) / len(measured), 2) if measured else -1, + "main_green_rate": round(main_green_count / len(measured), 2) if measured else -1, + "hermes_clean_rate": round(hermes_clean_count / len(measured), 2) if measured else -1, "avg_duration_seconds": round(sum(durations) / len(durations)) if durations else 0, "total_lines_added": sum(e.get("lines_added", 0) for e in recent), "total_lines_removed": sum(e.get("lines_removed", 0) for e in recent),