fix: exclude backfilled data from success rate calculations (#189)
All checks were successful
Tests / lint (push) Successful in 3s
Tests / test (push) Successful in 1m3s

Backfilled retro entries lack main_green/hermes_clean fields (survivorship bias). Now rates are computed only from measured entries. LOOPSTAT shows "no data yet" instead of fake 100%.

Co-authored-by: Kimi Agent <kimi@timmy.local>
Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/189
Co-authored-by: hermes <hermes@timmy.local>
Co-committed-by: hermes <hermes@timmy.local>
This commit was merged in pull request #189.
This commit is contained in:
2026-03-15 12:29:27 -04:00
committed by rockachopa
parent 766add6415
commit efef0cd7a2

View File

@@ -86,26 +86,34 @@ def update_summary() -> None:
if not recent:
return
successes = [e for e in recent if e.get("success")]
failures = [e for e in recent if not e.get("success")]
main_green_count = sum(1 for e in recent if e.get("main_green"))
hermes_clean_count = sum(1 for e in recent if e.get("hermes_clean", e.get("success")))
# Only count entries with real measured data for rates.
# Backfilled entries lack main_green/hermes_clean fields — exclude them.
measured = [e for e in recent if "main_green" in e]
successes = [e for e in measured if e.get("success")]
failures = [e for e in measured if not e.get("success")]
main_green_count = sum(1 for e in measured if e.get("main_green"))
hermes_clean_count = sum(1 for e in measured if e.get("hermes_clean"))
durations = [e["duration"] for e in recent if e.get("duration", 0) > 0]
# Per-type stats
# Per-type stats (only from measured entries for rates)
type_stats: dict[str, dict] = {}
for e in recent:
t = e.get("type", "unknown")
if t not in type_stats:
type_stats[t] = {"count": 0, "success": 0, "total_duration": 0}
type_stats[t] = {"count": 0, "measured": 0, "success": 0, "total_duration": 0}
type_stats[t]["count"] += 1
if e.get("success"):
type_stats[t]["success"] += 1
type_stats[t]["total_duration"] += e.get("duration", 0)
if "main_green" in e:
type_stats[t]["measured"] += 1
if e.get("success"):
type_stats[t]["success"] += 1
for t, stats in type_stats.items():
if stats["measured"] > 0:
stats["success_rate"] = round(stats["success"] / stats["measured"], 2)
else:
stats["success_rate"] = -1
if stats["count"] > 0:
stats["success_rate"] = round(stats["success"] / stats["count"], 2)
stats["avg_duration"] = round(stats["total_duration"] / stats["count"])
# Quarantine candidates (failed 2+ times)
@@ -118,10 +126,11 @@ def update_summary() -> None:
summary = {
"updated_at": datetime.now(timezone.utc).isoformat(),
"window": len(recent),
"measured_cycles": len(measured),
"total_cycles": len(entries),
"success_rate": round(len(successes) / len(recent), 2) if recent else 0,
"main_green_rate": round(main_green_count / len(recent), 2) if recent else 0,
"hermes_clean_rate": round(hermes_clean_count / len(recent), 2) if recent else 0,
"success_rate": round(len(successes) / len(measured), 2) if measured else -1,
"main_green_rate": round(main_green_count / len(measured), 2) if measured else -1,
"hermes_clean_rate": round(hermes_clean_count / len(measured), 2) if measured else -1,
"avg_duration_seconds": round(sum(durations) / len(durations)) if durations else 0,
"total_lines_added": sum(e.get("lines_added", 0) for e in recent),
"total_lines_removed": sum(e.get("lines_removed", 0) for e in recent),