Generates comprehensive reports from overnight loop JSONL data: **Features:** - Reads ~/shared/overnight-loop/*.jsonl - Produces JSON and Markdown reports - Pass/fail statistics with pass rates - Duration analysis (avg, median, p95) - Per-task breakdowns - Hourly timeline trends - Error pattern analysis - Auto-generated recommendations **Reports:** - ~/timmy/reports/scorecard_YYYYMMDD.json (structured) - ~/timmy/reports/scorecard_YYYYMMDD.md (human-readable) **Usage:** python uni-wizard/scripts/generate_scorecard.py Closes #79
389 lines
14 KiB
Python
389 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
JSONL Scorecard Generator for Uni-Wizard
|
|
Analyzes overnight loop results and produces comprehensive reports
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from collections import defaultdict
|
|
from typing import Dict, List, Any
|
|
import statistics
|
|
|
|
|
|
class ScorecardGenerator:
|
|
"""
|
|
Generates scorecards from overnight loop JSONL data.
|
|
|
|
Analyzes:
|
|
- Pass/fail rates
|
|
- Response times (avg, median, p95)
|
|
- Per-task breakdowns
|
|
- Error patterns
|
|
- Timeline trends
|
|
"""
|
|
|
|
def __init__(self, input_dir: str = "~/shared/overnight-loop"):
|
|
self.input_dir = Path(input_dir).expanduser()
|
|
self.tasks = []
|
|
self.stats = {
|
|
"total": 0,
|
|
"passed": 0,
|
|
"failed": 0,
|
|
"pass_rate": 0.0,
|
|
"durations": [],
|
|
"by_task": defaultdict(lambda: {"total": 0, "passed": 0, "failed": 0, "durations": []}),
|
|
"by_hour": defaultdict(lambda: {"total": 0, "passed": 0, "durations": []}),
|
|
"errors": defaultdict(int)
|
|
}
|
|
|
|
def load_jsonl(self, filepath: Path) -> List[Dict]:
|
|
"""Load and parse a JSONL file, handling errors gracefully"""
|
|
tasks = []
|
|
with open(filepath, 'r') as f:
|
|
for line_num, line in enumerate(f, 1):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
task = json.loads(line)
|
|
tasks.append(task)
|
|
except json.JSONDecodeError:
|
|
print(f"Warning: Skipping malformed line {line_num} in {filepath}")
|
|
continue
|
|
return tasks
|
|
|
|
def load_all(self):
|
|
"""Load all JSONL files from input directory"""
|
|
if not self.input_dir.exists():
|
|
print(f"Input directory not found: {self.input_dir}")
|
|
return
|
|
|
|
jsonl_files = list(self.input_dir.glob("*.jsonl"))
|
|
if not jsonl_files:
|
|
print(f"No .jsonl files found in {self.input_dir}")
|
|
return
|
|
|
|
for filepath in sorted(jsonl_files):
|
|
print(f"Loading: {filepath.name}")
|
|
tasks = self.load_jsonl(filepath)
|
|
self.tasks.extend(tasks)
|
|
|
|
print(f"Loaded {len(self.tasks)} tasks from {len(jsonl_files)} files")
|
|
|
|
def analyze(self):
|
|
"""Analyze all loaded tasks"""
|
|
if not self.tasks:
|
|
print("No tasks to analyze")
|
|
return
|
|
|
|
for task in self.tasks:
|
|
self._process_task(task)
|
|
|
|
# Calculate overall pass rate
|
|
if self.stats["total"] > 0:
|
|
self.stats["pass_rate"] = (self.stats["passed"] / self.stats["total"]) * 100
|
|
|
|
print(f"Analysis complete: {self.stats['passed']}/{self.stats['total']} passed ({self.stats['pass_rate']:.1f}%)")
|
|
|
|
def _process_task(self, task: Dict):
|
|
"""Process a single task record"""
|
|
# Basic stats
|
|
self.stats["total"] += 1
|
|
|
|
status = task.get("status", "unknown")
|
|
duration = task.get("duration_s", 0)
|
|
task_type = task.get("task", "unknown")
|
|
timestamp = task.get("timestamp", "")
|
|
|
|
# Pass/fail
|
|
if status == "pass":
|
|
self.stats["passed"] += 1
|
|
self.stats["by_task"][task_type]["passed"] += 1
|
|
else:
|
|
self.stats["failed"] += 1
|
|
self.stats["by_task"][task_type]["failed"] += 1
|
|
|
|
# Track error patterns
|
|
error = task.get("error", "unknown_error")
|
|
self.stats["errors"][error] += 1
|
|
|
|
# Durations
|
|
self.stats["durations"].append(duration)
|
|
self.stats["by_task"][task_type]["durations"].append(duration)
|
|
self.stats["by_task"][task_type]["total"] += 1
|
|
|
|
# Hourly breakdown
|
|
if timestamp:
|
|
try:
|
|
hour = timestamp[:13] # YYYY-MM-DDTHH
|
|
self.stats["by_hour"][hour]["total"] += 1
|
|
if status == "pass":
|
|
self.stats["by_hour"][hour]["passed"] += 1
|
|
self.stats["by_hour"][hour]["durations"].append(duration)
|
|
except:
|
|
pass
|
|
|
|
def calculate_duration_stats(self, durations: List[float]) -> Dict[str, float]:
|
|
"""Calculate duration statistics"""
|
|
if not durations:
|
|
return {"avg": 0, "median": 0, "p95": 0, "min": 0, "max": 0}
|
|
|
|
sorted_durations = sorted(durations)
|
|
n = len(sorted_durations)
|
|
|
|
return {
|
|
"avg": round(statistics.mean(durations), 2),
|
|
"median": round(statistics.median(durations), 2),
|
|
"p95": round(sorted_durations[int(n * 0.95)] if n > 1 else sorted_durations[0], 2),
|
|
"min": round(min(durations), 2),
|
|
"max": round(max(durations), 2)
|
|
}
|
|
|
|
def generate_json(self) -> Dict:
|
|
"""Generate structured JSON report"""
|
|
duration_stats = self.calculate_duration_stats(self.stats["durations"])
|
|
|
|
report = {
|
|
"generated_at": datetime.now().isoformat(),
|
|
"summary": {
|
|
"total_tasks": self.stats["total"],
|
|
"passed": self.stats["passed"],
|
|
"failed": self.stats["failed"],
|
|
"pass_rate": round(self.stats["pass_rate"], 2),
|
|
"duration_stats": duration_stats
|
|
},
|
|
"by_task": {},
|
|
"by_hour": {},
|
|
"errors": dict(self.stats["errors"]),
|
|
"recommendations": self._generate_recommendations()
|
|
}
|
|
|
|
# Per-task breakdown
|
|
for task_type, data in self.stats["by_task"].items():
|
|
if data["total"] > 0:
|
|
pass_rate = (data["passed"] / data["total"]) * 100
|
|
report["by_task"][task_type] = {
|
|
"total": data["total"],
|
|
"passed": data["passed"],
|
|
"failed": data["failed"],
|
|
"pass_rate": round(pass_rate, 2),
|
|
"duration_stats": self.calculate_duration_stats(data["durations"])
|
|
}
|
|
|
|
# Hourly breakdown
|
|
for hour, data in sorted(self.stats["by_hour"].items()):
|
|
if data["total"] > 0:
|
|
pass_rate = (data["passed"] / data["total"]) * 100
|
|
report["by_hour"][hour] = {
|
|
"total": data["total"],
|
|
"passed": data["passed"],
|
|
"pass_rate": round(pass_rate, 2),
|
|
"avg_duration": round(statistics.mean(data["durations"]), 2) if data["durations"] else 0
|
|
}
|
|
|
|
return report
|
|
|
|
def generate_markdown(self) -> str:
|
|
"""Generate markdown report"""
|
|
json_report = self.generate_json()
|
|
|
|
md = f"""# Overnight Loop Scorecard
|
|
|
|
**Generated:** {json_report['generated_at']}
|
|
|
|
---
|
|
|
|
## Summary
|
|
|
|
| Metric | Value |
|
|
|--------|-------|
|
|
| Total Tasks | {json_report['summary']['total_tasks']} |
|
|
| Passed | {json_report['summary']['passed']} ✅ |
|
|
| Failed | {json_report['summary']['failed']} ❌ |
|
|
| **Pass Rate** | **{json_report['summary']['pass_rate']:.1f}%** |
|
|
|
|
### Duration Statistics
|
|
|
|
| Metric | Value (seconds) |
|
|
|--------|-----------------|
|
|
| Average | {json_report['summary']['duration_stats']['avg']} |
|
|
| Median | {json_report['summary']['duration_stats']['median']} |
|
|
| P95 | {json_report['summary']['duration_stats']['p95']} |
|
|
| Min | {json_report['summary']['duration_stats']['min']} |
|
|
| Max | {json_report['summary']['duration_stats']['max']} |
|
|
|
|
---
|
|
|
|
## Per-Task Breakdown
|
|
|
|
| Task | Total | Passed | Failed | Pass Rate | Avg Duration |
|
|
|------|-------|--------|--------|-----------|--------------|
|
|
"""
|
|
|
|
# Sort by pass rate (ascending - worst first)
|
|
sorted_tasks = sorted(
|
|
json_report['by_task'].items(),
|
|
key=lambda x: x[1]['pass_rate']
|
|
)
|
|
|
|
for task_type, data in sorted_tasks:
|
|
status = "✅" if data['pass_rate'] >= 90 else "⚠️" if data['pass_rate'] >= 70 else "❌"
|
|
md += f"| {task_type} | {data['total']} | {data['passed']} | {data['failed']} | {status} {data['pass_rate']:.1f}% | {data['duration_stats']['avg']}s |\n"
|
|
|
|
md += """
|
|
---
|
|
|
|
## Timeline (Hourly)
|
|
|
|
| Hour | Tasks | Passed | Pass Rate | Avg Duration |
|
|
|------|-------|--------|-----------|--------------|
|
|
"""
|
|
|
|
for hour, data in sorted(json_report['by_hour'].items()):
|
|
trend = "📈" if data['pass_rate'] >= 90 else "📊" if data['pass_rate'] >= 70 else "📉"
|
|
md += f"| {hour} | {data['total']} | {data['passed']} | {trend} {data['pass_rate']:.1f}% | {data['avg_duration']}s |\n"
|
|
|
|
md += """
|
|
---
|
|
|
|
## Error Analysis
|
|
|
|
| Error Pattern | Count |
|
|
|---------------|-------|
|
|
"""
|
|
|
|
for error, count in sorted(json_report['errors'].items(), key=lambda x: x[1], reverse=True):
|
|
md += f"| {error} | {count} |\n"
|
|
|
|
md += """
|
|
---
|
|
|
|
## Recommendations
|
|
|
|
"""
|
|
|
|
for rec in json_report['recommendations']:
|
|
md += f"- {rec}\n"
|
|
|
|
md += """
|
|
---
|
|
|
|
*Generated by Uni-Wizard Scorecard Generator*
|
|
"""
|
|
|
|
return md
|
|
|
|
def _generate_recommendations(self) -> List[str]:
|
|
"""Generate recommendations based on analysis"""
|
|
recommendations = []
|
|
|
|
# Check overall pass rate
|
|
if self.stats["pass_rate"] < 70:
|
|
recommendations.append(f"⚠️ Overall pass rate ({self.stats['pass_rate']:.1f}%) is concerning. Review infrastructure health.")
|
|
elif self.stats["pass_rate"] >= 95:
|
|
recommendations.append(f"✅ Excellent pass rate ({self.stats['pass_rate']:.1f}%). System is performing well.")
|
|
|
|
# Check for failing tasks
|
|
failing_tasks = []
|
|
for task_type, data in self.stats["by_task"].items():
|
|
if data["total"] > 0:
|
|
pass_rate = (data["passed"] / data["total"]) * 100
|
|
if pass_rate < 50:
|
|
failing_tasks.append(task_type)
|
|
|
|
if failing_tasks:
|
|
recommendations.append(f"❌ Tasks with <50% pass rate: {', '.join(failing_tasks)}. Consider debugging or removing.")
|
|
|
|
# Check for slow tasks
|
|
slow_tasks = []
|
|
for task_type, data in self.stats["by_task"].items():
|
|
if data["durations"]:
|
|
avg = statistics.mean(data["durations"])
|
|
if avg > 30: # Tasks taking >30s on average
|
|
slow_tasks.append(f"{task_type} ({avg:.1f}s)")
|
|
|
|
if slow_tasks:
|
|
recommendations.append(f"⏱️ Slow tasks detected: {', '.join(slow_tasks)}. Consider optimization.")
|
|
|
|
# Check error patterns
|
|
if self.stats["errors"]:
|
|
top_error = max(self.stats["errors"].items(), key=lambda x: x[1])
|
|
recommendations.append(f"🔍 Most common error: '{top_error[0]}' ({top_error[1]} occurrences). Investigate root cause.")
|
|
|
|
# Timeline trend
|
|
if len(self.stats["by_hour"]) >= 2:
|
|
hours = sorted(self.stats["by_hour"].keys())
|
|
first_hour = hours[0]
|
|
last_hour = hours[-1]
|
|
|
|
first_rate = (self.stats["by_hour"][first_hour]["passed"] / self.stats["by_hour"][first_hour]["total"]) * 100
|
|
last_rate = (self.stats["by_hour"][last_hour]["passed"] / self.stats["by_hour"][last_hour]["total"]) * 100
|
|
|
|
if last_rate > first_rate + 10:
|
|
recommendations.append(f"📈 Performance improving over time (+{last_rate - first_rate:.1f}% pass rate).")
|
|
elif last_rate < first_rate - 10:
|
|
recommendations.append(f"📉 Performance degrading over time (-{first_rate - last_rate:.1f}% pass rate). Check for resource exhaustion.")
|
|
|
|
return recommendations
|
|
|
|
def save_reports(self, output_dir: str = "~/timmy/reports"):
|
|
"""Save JSON and markdown reports"""
|
|
output_path = Path(output_dir).expanduser()
|
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
date_str = datetime.now().strftime("%Y%m%d")
|
|
|
|
# Save JSON
|
|
json_file = output_path / f"scorecard_{date_str}.json"
|
|
json_report = self.generate_json()
|
|
with open(json_file, 'w') as f:
|
|
json.dump(json_report, f, indent=2)
|
|
print(f"JSON report saved: {json_file}")
|
|
|
|
# Save Markdown
|
|
md_file = output_path / f"scorecard_{date_str}.md"
|
|
md_report = self.generate_markdown()
|
|
with open(md_file, 'w') as f:
|
|
f.write(md_report)
|
|
print(f"Markdown report saved: {md_file}")
|
|
|
|
return json_file, md_file
|
|
|
|
|
|
def main():
|
|
"""CLI entry point"""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Generate scorecard from overnight loop JSONL")
|
|
parser.add_argument("--input", "-i", default="~/shared/overnight-loop", help="Input directory with JSONL files")
|
|
parser.add_argument("--output", "-o", default="~/timmy/reports", help="Output directory for reports")
|
|
|
|
args = parser.parse_args()
|
|
|
|
print("="*60)
|
|
print("UNI-WIZARD SCORECARD GENERATOR")
|
|
print("="*60)
|
|
print()
|
|
|
|
generator = ScorecardGenerator(input_dir=args.input)
|
|
generator.load_all()
|
|
generator.analyze()
|
|
|
|
if generator.stats["total"] > 0:
|
|
json_file, md_file = generator.save_reports(output_dir=args.output)
|
|
print()
|
|
print("="*60)
|
|
print("REPORTS GENERATED")
|
|
print("="*60)
|
|
print(f"JSON: {json_file}")
|
|
print(f"Markdown: {md_file}")
|
|
else:
|
|
print("No data to report")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|