fix: exclude backfilled data from success rate calculations (#189)

Backfilled retro entries lack main_green/hermes_clean fields (survivorship bias). Now rates are computed only from measured entries. LOOPSTAT shows "no data yet" instead of fake 100%. Co-authored-by: Kimi Agent <kimi@timmy.local> Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/189 Co-authored-by: hermes <hermes@timmy.local> Co-committed-by: hermes <hermes@timmy.local>
2026-03-15 12:29:27 -04:00
parent 766add6415
commit efef0cd7a2
1 changed files with 21 additions and 12 deletions
--- a/scripts/cycle_retro.py
+++ b/scripts/cycle_retro.py
@@ -86,26 +86,34 @@ def update_summary() -> None:
    if not recent:
        return

-    successes = [e for e in recent if e.get("success")]
-    failures = [e for e in recent if not e.get("success")]
-    main_green_count = sum(1 for e in recent if e.get("main_green"))
-    hermes_clean_count = sum(1 for e in recent if e.get("hermes_clean", e.get("success")))
+    # Only count entries with real measured data for rates.
+    # Backfilled entries lack main_green/hermes_clean fields — exclude them.
+    measured = [e for e in recent if "main_green" in e]
+    successes = [e for e in measured if e.get("success")]
+    failures = [e for e in measured if not e.get("success")]
+    main_green_count = sum(1 for e in measured if e.get("main_green"))
+    hermes_clean_count = sum(1 for e in measured if e.get("hermes_clean"))
    durations = [e["duration"] for e in recent if e.get("duration", 0) > 0]

-    # Per-type stats
+    # Per-type stats (only from measured entries for rates)
    type_stats: dict[str, dict] = {}
    for e in recent:
        t = e.get("type", "unknown")
        if t not in type_stats:
-            type_stats[t] = {"count": 0, "success": 0, "total_duration": 0}
+            type_stats[t] = {"count": 0, "measured": 0, "success": 0, "total_duration": 0}
        type_stats[t]["count"] += 1
-        if e.get("success"):
-            type_stats[t]["success"] += 1
        type_stats[t]["total_duration"] += e.get("duration", 0)
+        if "main_green" in e:
+            type_stats[t]["measured"] += 1
+            if e.get("success"):
+                type_stats[t]["success"] += 1

    for t, stats in type_stats.items():
+        if stats["measured"] > 0:
+            stats["success_rate"] = round(stats["success"] / stats["measured"], 2)
+        else:
+            stats["success_rate"] = -1
        if stats["count"] > 0:
-            stats["success_rate"] = round(stats["success"] / stats["count"], 2)
            stats["avg_duration"] = round(stats["total_duration"] / stats["count"])

    # Quarantine candidates (failed 2+ times)
@@ -118,10 +126,11 @@ def update_summary() -> None:
    summary = {
        "updated_at": datetime.now(timezone.utc).isoformat(),
        "window": len(recent),
+        "measured_cycles": len(measured),
        "total_cycles": len(entries),
-        "success_rate": round(len(successes) / len(recent), 2) if recent else 0,
-        "main_green_rate": round(main_green_count / len(recent), 2) if recent else 0,
-        "hermes_clean_rate": round(hermes_clean_count / len(recent), 2) if recent else 0,
+        "success_rate": round(len(successes) / len(measured), 2) if measured else -1,
+        "main_green_rate": round(main_green_count / len(measured), 2) if measured else -1,
+        "hermes_clean_rate": round(hermes_clean_count / len(measured), 2) if measured else -1,
        "avg_duration_seconds": round(sum(durations) / len(durations)) if durations else 0,
        "total_lines_added": sum(e.get("lines_added", 0) for e in recent),
        "total_lines_removed": sum(e.get("lines_removed", 0) for e in recent),