#!/usr/bin/env python3 """ JSONL Scorecard Generator for Uni-Wizard Analyzes overnight loop results and produces comprehensive reports """ import json import sys from pathlib import Path from datetime import datetime from collections import defaultdict from typing import Dict, List, Any import statistics class ScorecardGenerator: """ Generates scorecards from overnight loop JSONL data. Analyzes: - Pass/fail rates - Response times (avg, median, p95) - Per-task breakdowns - Error patterns - Timeline trends """ def __init__(self, input_dir: str = "~/shared/overnight-loop"): self.input_dir = Path(input_dir).expanduser() self.tasks = [] self.stats = { "total": 0, "passed": 0, "failed": 0, "pass_rate": 0.0, "durations": [], "by_task": defaultdict(lambda: {"total": 0, "passed": 0, "failed": 0, "durations": []}), "by_hour": defaultdict(lambda: {"total": 0, "passed": 0, "durations": []}), "errors": defaultdict(int) } def load_jsonl(self, filepath: Path) -> List[Dict]: """Load and parse a JSONL file, handling errors gracefully""" tasks = [] with open(filepath, 'r') as f: for line_num, line in enumerate(f, 1): line = line.strip() if not line: continue try: task = json.loads(line) tasks.append(task) except json.JSONDecodeError: print(f"Warning: Skipping malformed line {line_num} in {filepath}") continue return tasks def load_all(self): """Load all JSONL files from input directory""" if not self.input_dir.exists(): print(f"Input directory not found: {self.input_dir}") return jsonl_files = list(self.input_dir.glob("*.jsonl")) if not jsonl_files: print(f"No .jsonl files found in {self.input_dir}") return for filepath in sorted(jsonl_files): print(f"Loading: {filepath.name}") tasks = self.load_jsonl(filepath) self.tasks.extend(tasks) print(f"Loaded {len(self.tasks)} tasks from {len(jsonl_files)} files") def analyze(self): """Analyze all loaded tasks""" if not self.tasks: print("No tasks to analyze") return for task in self.tasks: self._process_task(task) # Calculate overall pass rate if self.stats["total"] > 0: self.stats["pass_rate"] = (self.stats["passed"] / self.stats["total"]) * 100 print(f"Analysis complete: {self.stats['passed']}/{self.stats['total']} passed ({self.stats['pass_rate']:.1f}%)") def _process_task(self, task: Dict): """Process a single task record""" # Basic stats self.stats["total"] += 1 status = task.get("status", "unknown") duration = task.get("duration_s", 0) task_type = task.get("task", "unknown") timestamp = task.get("timestamp", "") # Pass/fail if status == "pass": self.stats["passed"] += 1 self.stats["by_task"][task_type]["passed"] += 1 else: self.stats["failed"] += 1 self.stats["by_task"][task_type]["failed"] += 1 # Track error patterns error = task.get("error", "unknown_error") self.stats["errors"][error] += 1 # Durations self.stats["durations"].append(duration) self.stats["by_task"][task_type]["durations"].append(duration) self.stats["by_task"][task_type]["total"] += 1 # Hourly breakdown if timestamp: try: hour = timestamp[:13] # YYYY-MM-DDTHH self.stats["by_hour"][hour]["total"] += 1 if status == "pass": self.stats["by_hour"][hour]["passed"] += 1 self.stats["by_hour"][hour]["durations"].append(duration) except: pass def calculate_duration_stats(self, durations: List[float]) -> Dict[str, float]: """Calculate duration statistics""" if not durations: return {"avg": 0, "median": 0, "p95": 0, "min": 0, "max": 0} sorted_durations = sorted(durations) n = len(sorted_durations) return { "avg": round(statistics.mean(durations), 2), "median": round(statistics.median(durations), 2), "p95": round(sorted_durations[int(n * 0.95)] if n > 1 else sorted_durations[0], 2), "min": round(min(durations), 2), "max": round(max(durations), 2) } def generate_json(self) -> Dict: """Generate structured JSON report""" duration_stats = self.calculate_duration_stats(self.stats["durations"]) report = { "generated_at": datetime.now().isoformat(), "summary": { "total_tasks": self.stats["total"], "passed": self.stats["passed"], "failed": self.stats["failed"], "pass_rate": round(self.stats["pass_rate"], 2), "duration_stats": duration_stats }, "by_task": {}, "by_hour": {}, "errors": dict(self.stats["errors"]), "recommendations": self._generate_recommendations() } # Per-task breakdown for task_type, data in self.stats["by_task"].items(): if data["total"] > 0: pass_rate = (data["passed"] / data["total"]) * 100 report["by_task"][task_type] = { "total": data["total"], "passed": data["passed"], "failed": data["failed"], "pass_rate": round(pass_rate, 2), "duration_stats": self.calculate_duration_stats(data["durations"]) } # Hourly breakdown for hour, data in sorted(self.stats["by_hour"].items()): if data["total"] > 0: pass_rate = (data["passed"] / data["total"]) * 100 report["by_hour"][hour] = { "total": data["total"], "passed": data["passed"], "pass_rate": round(pass_rate, 2), "avg_duration": round(statistics.mean(data["durations"]), 2) if data["durations"] else 0 } return report def generate_markdown(self) -> str: """Generate markdown report""" json_report = self.generate_json() md = f"""# Overnight Loop Scorecard **Generated:** {json_report['generated_at']} --- ## Summary | Metric | Value | |--------|-------| | Total Tasks | {json_report['summary']['total_tasks']} | | Passed | {json_report['summary']['passed']} ✅ | | Failed | {json_report['summary']['failed']} ❌ | | **Pass Rate** | **{json_report['summary']['pass_rate']:.1f}%** | ### Duration Statistics | Metric | Value (seconds) | |--------|-----------------| | Average | {json_report['summary']['duration_stats']['avg']} | | Median | {json_report['summary']['duration_stats']['median']} | | P95 | {json_report['summary']['duration_stats']['p95']} | | Min | {json_report['summary']['duration_stats']['min']} | | Max | {json_report['summary']['duration_stats']['max']} | --- ## Per-Task Breakdown | Task | Total | Passed | Failed | Pass Rate | Avg Duration | |------|-------|--------|--------|-----------|--------------| """ # Sort by pass rate (ascending - worst first) sorted_tasks = sorted( json_report['by_task'].items(), key=lambda x: x[1]['pass_rate'] ) for task_type, data in sorted_tasks: status = "✅" if data['pass_rate'] >= 90 else "⚠️" if data['pass_rate'] >= 70 else "❌" md += f"| {task_type} | {data['total']} | {data['passed']} | {data['failed']} | {status} {data['pass_rate']:.1f}% | {data['duration_stats']['avg']}s |\n" md += """ --- ## Timeline (Hourly) | Hour | Tasks | Passed | Pass Rate | Avg Duration | |------|-------|--------|-----------|--------------| """ for hour, data in sorted(json_report['by_hour'].items()): trend = "📈" if data['pass_rate'] >= 90 else "📊" if data['pass_rate'] >= 70 else "📉" md += f"| {hour} | {data['total']} | {data['passed']} | {trend} {data['pass_rate']:.1f}% | {data['avg_duration']}s |\n" md += """ --- ## Error Analysis | Error Pattern | Count | |---------------|-------| """ for error, count in sorted(json_report['errors'].items(), key=lambda x: x[1], reverse=True): md += f"| {error} | {count} |\n" md += """ --- ## Recommendations """ for rec in json_report['recommendations']: md += f"- {rec}\n" md += """ --- *Generated by Uni-Wizard Scorecard Generator* """ return md def _generate_recommendations(self) -> List[str]: """Generate recommendations based on analysis""" recommendations = [] # Check overall pass rate if self.stats["pass_rate"] < 70: recommendations.append(f"⚠️ Overall pass rate ({self.stats['pass_rate']:.1f}%) is concerning. Review infrastructure health.") elif self.stats["pass_rate"] >= 95: recommendations.append(f"✅ Excellent pass rate ({self.stats['pass_rate']:.1f}%). System is performing well.") # Check for failing tasks failing_tasks = [] for task_type, data in self.stats["by_task"].items(): if data["total"] > 0: pass_rate = (data["passed"] / data["total"]) * 100 if pass_rate < 50: failing_tasks.append(task_type) if failing_tasks: recommendations.append(f"❌ Tasks with <50% pass rate: {', '.join(failing_tasks)}. Consider debugging or removing.") # Check for slow tasks slow_tasks = [] for task_type, data in self.stats["by_task"].items(): if data["durations"]: avg = statistics.mean(data["durations"]) if avg > 30: # Tasks taking >30s on average slow_tasks.append(f"{task_type} ({avg:.1f}s)") if slow_tasks: recommendations.append(f"⏱️ Slow tasks detected: {', '.join(slow_tasks)}. Consider optimization.") # Check error patterns if self.stats["errors"]: top_error = max(self.stats["errors"].items(), key=lambda x: x[1]) recommendations.append(f"🔍 Most common error: '{top_error[0]}' ({top_error[1]} occurrences). Investigate root cause.") # Timeline trend if len(self.stats["by_hour"]) >= 2: hours = sorted(self.stats["by_hour"].keys()) first_hour = hours[0] last_hour = hours[-1] first_rate = (self.stats["by_hour"][first_hour]["passed"] / self.stats["by_hour"][first_hour]["total"]) * 100 last_rate = (self.stats["by_hour"][last_hour]["passed"] / self.stats["by_hour"][last_hour]["total"]) * 100 if last_rate > first_rate + 10: recommendations.append(f"📈 Performance improving over time (+{last_rate - first_rate:.1f}% pass rate).") elif last_rate < first_rate - 10: recommendations.append(f"📉 Performance degrading over time (-{first_rate - last_rate:.1f}% pass rate). Check for resource exhaustion.") return recommendations def save_reports(self, output_dir: str = "~/timmy/reports"): """Save JSON and markdown reports""" output_path = Path(output_dir).expanduser() output_path.mkdir(parents=True, exist_ok=True) date_str = datetime.now().strftime("%Y%m%d") # Save JSON json_file = output_path / f"scorecard_{date_str}.json" json_report = self.generate_json() with open(json_file, 'w') as f: json.dump(json_report, f, indent=2) print(f"JSON report saved: {json_file}") # Save Markdown md_file = output_path / f"scorecard_{date_str}.md" md_report = self.generate_markdown() with open(md_file, 'w') as f: f.write(md_report) print(f"Markdown report saved: {md_file}") return json_file, md_file def main(): """CLI entry point""" import argparse parser = argparse.ArgumentParser(description="Generate scorecard from overnight loop JSONL") parser.add_argument("--input", "-i", default="~/shared/overnight-loop", help="Input directory with JSONL files") parser.add_argument("--output", "-o", default="~/timmy/reports", help="Output directory for reports") args = parser.parse_args() print("="*60) print("UNI-WIZARD SCORECARD GENERATOR") print("="*60) print() generator = ScorecardGenerator(input_dir=args.input) generator.load_all() generator.analyze() if generator.stats["total"] > 0: json_file, md_file = generator.save_reports(output_dir=args.output) print() print("="*60) print("REPORTS GENERATED") print("="*60) print(f"JSON: {json_file}") print(f"Markdown: {md_file}") else: print("No data to report") if __name__ == "__main__": main()