diff --git a/docs/SCORECARD.md b/docs/SCORECARD.md new file mode 100644 index 0000000..5fa9751 --- /dev/null +++ b/docs/SCORECARD.md @@ -0,0 +1,125 @@ +# Scorecard Generator Documentation + +## Overview + +The Scorecard Generator analyzes overnight loop JSONL data and produces comprehensive reports with statistics, trends, and recommendations. + +## Usage + +### Basic Usage + +```bash +# Generate scorecard from default input directory +python uni-wizard/scripts/generate_scorecard.py + +# Specify custom input/output directories +python uni-wizard/scripts/generate_scorecard.py \ + --input ~/shared/overnight-loop \ + --output ~/timmy/reports +``` + +### Cron Setup + +```bash +# Generate scorecard every morning at 6 AM +0 6 * * * /root/timmy/venv/bin/python /root/timmy/uni-wizard/scripts/generate_scorecard.py +``` + +## Input Format + +JSONL files in `~/shared/overnight-loop/*.jsonl`: + +```json +{"task": "read-soul", "status": "pass", "duration_s": 19.7, "timestamp": "2026-03-29T21:54:12Z"} +{"task": "check-health", "status": "fail", "duration_s": 5.2, "error": "timeout", "timestamp": "2026-03-29T22:15:33Z"} +``` + +Fields: +- `task`: Task identifier +- `status`: "pass" or "fail" +- `duration_s`: Execution time in seconds +- `timestamp`: ISO 8601 timestamp +- `error`: Error message (for failed tasks) + +## Output + +### JSON Report + +`~/timmy/reports/scorecard_YYYYMMDD.json`: + +```json +{ + "generated_at": "2026-03-30T06:00:00Z", + "summary": { + "total_tasks": 100, + "passed": 95, + "failed": 5, + "pass_rate": 95.0, + "duration_stats": { + "avg": 12.5, + "median": 10.2, + "p95": 45.0, + "min": 1.2, + "max": 120.5 + } + }, + "by_task": {...}, + "by_hour": {...}, + "errors": {...}, + "recommendations": [...] +} +``` + +### Markdown Report + +`~/timmy/reports/scorecard_YYYYMMDD.md`: + +- Executive summary with pass/fail counts +- Duration statistics (avg, median, p95) +- Per-task breakdown with pass rates +- Hourly timeline showing performance trends +- Error analysis with frequency counts +- Actionable recommendations + +## Report Interpretation + +### Pass Rate Thresholds + +| Pass Rate | Status | Action | +|-----------|--------|--------| +| 95%+ | ✅ Excellent | Continue current operations | +| 85-94% | ⚠️ Good | Monitor for degradation | +| 70-84% | ⚠️ Fair | Review failing tasks | +| <70% | ❌ Poor | Immediate investigation required | + +### Duration Guidelines + +| Duration | Assessment | +|----------|------------| +| <5s | Fast | +| 5-15s | Normal | +| 15-30s | Slow | +| >30s | Very slow - consider optimization | + +## Troubleshooting + +### No JSONL files found + +```bash +# Check input directory +ls -la ~/shared/overnight-loop/ + +# Ensure Syncthing is syncing +systemctl status syncthing@root +``` + +### Malformed lines + +The generator skips malformed lines with a warning. Check the JSONL files for syntax errors. + +### Empty reports + +If no data exists, verify: +1. Overnight loop is running and writing JSONL +2. File permissions allow reading +3. Input path is correct diff --git a/uni-wizard/scripts/generate_scorecard.py b/uni-wizard/scripts/generate_scorecard.py new file mode 100644 index 0000000..dd8d715 --- /dev/null +++ b/uni-wizard/scripts/generate_scorecard.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python3 +""" +JSONL Scorecard Generator for Uni-Wizard +Analyzes overnight loop results and produces comprehensive reports +""" + +import json +import sys +from pathlib import Path +from datetime import datetime +from collections import defaultdict +from typing import Dict, List, Any +import statistics + + +class ScorecardGenerator: + """ + Generates scorecards from overnight loop JSONL data. + + Analyzes: + - Pass/fail rates + - Response times (avg, median, p95) + - Per-task breakdowns + - Error patterns + - Timeline trends + """ + + def __init__(self, input_dir: str = "~/shared/overnight-loop"): + self.input_dir = Path(input_dir).expanduser() + self.tasks = [] + self.stats = { + "total": 0, + "passed": 0, + "failed": 0, + "pass_rate": 0.0, + "durations": [], + "by_task": defaultdict(lambda: {"total": 0, "passed": 0, "failed": 0, "durations": []}), + "by_hour": defaultdict(lambda: {"total": 0, "passed": 0, "durations": []}), + "errors": defaultdict(int) + } + + def load_jsonl(self, filepath: Path) -> List[Dict]: + """Load and parse a JSONL file, handling errors gracefully""" + tasks = [] + with open(filepath, 'r') as f: + for line_num, line in enumerate(f, 1): + line = line.strip() + if not line: + continue + try: + task = json.loads(line) + tasks.append(task) + except json.JSONDecodeError: + print(f"Warning: Skipping malformed line {line_num} in {filepath}") + continue + return tasks + + def load_all(self): + """Load all JSONL files from input directory""" + if not self.input_dir.exists(): + print(f"Input directory not found: {self.input_dir}") + return + + jsonl_files = list(self.input_dir.glob("*.jsonl")) + if not jsonl_files: + print(f"No .jsonl files found in {self.input_dir}") + return + + for filepath in sorted(jsonl_files): + print(f"Loading: {filepath.name}") + tasks = self.load_jsonl(filepath) + self.tasks.extend(tasks) + + print(f"Loaded {len(self.tasks)} tasks from {len(jsonl_files)} files") + + def analyze(self): + """Analyze all loaded tasks""" + if not self.tasks: + print("No tasks to analyze") + return + + for task in self.tasks: + self._process_task(task) + + # Calculate overall pass rate + if self.stats["total"] > 0: + self.stats["pass_rate"] = (self.stats["passed"] / self.stats["total"]) * 100 + + print(f"Analysis complete: {self.stats['passed']}/{self.stats['total']} passed ({self.stats['pass_rate']:.1f}%)") + + def _process_task(self, task: Dict): + """Process a single task record""" + # Basic stats + self.stats["total"] += 1 + + status = task.get("status", "unknown") + duration = task.get("duration_s", 0) + task_type = task.get("task", "unknown") + timestamp = task.get("timestamp", "") + + # Pass/fail + if status == "pass": + self.stats["passed"] += 1 + self.stats["by_task"][task_type]["passed"] += 1 + else: + self.stats["failed"] += 1 + self.stats["by_task"][task_type]["failed"] += 1 + + # Track error patterns + error = task.get("error", "unknown_error") + self.stats["errors"][error] += 1 + + # Durations + self.stats["durations"].append(duration) + self.stats["by_task"][task_type]["durations"].append(duration) + self.stats["by_task"][task_type]["total"] += 1 + + # Hourly breakdown + if timestamp: + try: + hour = timestamp[:13] # YYYY-MM-DDTHH + self.stats["by_hour"][hour]["total"] += 1 + if status == "pass": + self.stats["by_hour"][hour]["passed"] += 1 + self.stats["by_hour"][hour]["durations"].append(duration) + except: + pass + + def calculate_duration_stats(self, durations: List[float]) -> Dict[str, float]: + """Calculate duration statistics""" + if not durations: + return {"avg": 0, "median": 0, "p95": 0, "min": 0, "max": 0} + + sorted_durations = sorted(durations) + n = len(sorted_durations) + + return { + "avg": round(statistics.mean(durations), 2), + "median": round(statistics.median(durations), 2), + "p95": round(sorted_durations[int(n * 0.95)] if n > 1 else sorted_durations[0], 2), + "min": round(min(durations), 2), + "max": round(max(durations), 2) + } + + def generate_json(self) -> Dict: + """Generate structured JSON report""" + duration_stats = self.calculate_duration_stats(self.stats["durations"]) + + report = { + "generated_at": datetime.now().isoformat(), + "summary": { + "total_tasks": self.stats["total"], + "passed": self.stats["passed"], + "failed": self.stats["failed"], + "pass_rate": round(self.stats["pass_rate"], 2), + "duration_stats": duration_stats + }, + "by_task": {}, + "by_hour": {}, + "errors": dict(self.stats["errors"]), + "recommendations": self._generate_recommendations() + } + + # Per-task breakdown + for task_type, data in self.stats["by_task"].items(): + if data["total"] > 0: + pass_rate = (data["passed"] / data["total"]) * 100 + report["by_task"][task_type] = { + "total": data["total"], + "passed": data["passed"], + "failed": data["failed"], + "pass_rate": round(pass_rate, 2), + "duration_stats": self.calculate_duration_stats(data["durations"]) + } + + # Hourly breakdown + for hour, data in sorted(self.stats["by_hour"].items()): + if data["total"] > 0: + pass_rate = (data["passed"] / data["total"]) * 100 + report["by_hour"][hour] = { + "total": data["total"], + "passed": data["passed"], + "pass_rate": round(pass_rate, 2), + "avg_duration": round(statistics.mean(data["durations"]), 2) if data["durations"] else 0 + } + + return report + + def generate_markdown(self) -> str: + """Generate markdown report""" + json_report = self.generate_json() + + md = f"""# Overnight Loop Scorecard + +**Generated:** {json_report['generated_at']} + +--- + +## Summary + +| Metric | Value | +|--------|-------| +| Total Tasks | {json_report['summary']['total_tasks']} | +| Passed | {json_report['summary']['passed']} ✅ | +| Failed | {json_report['summary']['failed']} ❌ | +| **Pass Rate** | **{json_report['summary']['pass_rate']:.1f}%** | + +### Duration Statistics + +| Metric | Value (seconds) | +|--------|-----------------| +| Average | {json_report['summary']['duration_stats']['avg']} | +| Median | {json_report['summary']['duration_stats']['median']} | +| P95 | {json_report['summary']['duration_stats']['p95']} | +| Min | {json_report['summary']['duration_stats']['min']} | +| Max | {json_report['summary']['duration_stats']['max']} | + +--- + +## Per-Task Breakdown + +| Task | Total | Passed | Failed | Pass Rate | Avg Duration | +|------|-------|--------|--------|-----------|--------------| +""" + + # Sort by pass rate (ascending - worst first) + sorted_tasks = sorted( + json_report['by_task'].items(), + key=lambda x: x[1]['pass_rate'] + ) + + for task_type, data in sorted_tasks: + status = "✅" if data['pass_rate'] >= 90 else "⚠️" if data['pass_rate'] >= 70 else "❌" + md += f"| {task_type} | {data['total']} | {data['passed']} | {data['failed']} | {status} {data['pass_rate']:.1f}% | {data['duration_stats']['avg']}s |\n" + + md += """ +--- + +## Timeline (Hourly) + +| Hour | Tasks | Passed | Pass Rate | Avg Duration | +|------|-------|--------|-----------|--------------| +""" + + for hour, data in sorted(json_report['by_hour'].items()): + trend = "📈" if data['pass_rate'] >= 90 else "📊" if data['pass_rate'] >= 70 else "📉" + md += f"| {hour} | {data['total']} | {data['passed']} | {trend} {data['pass_rate']:.1f}% | {data['avg_duration']}s |\n" + + md += """ +--- + +## Error Analysis + +| Error Pattern | Count | +|---------------|-------| +""" + + for error, count in sorted(json_report['errors'].items(), key=lambda x: x[1], reverse=True): + md += f"| {error} | {count} |\n" + + md += """ +--- + +## Recommendations + +""" + + for rec in json_report['recommendations']: + md += f"- {rec}\n" + + md += """ +--- + +*Generated by Uni-Wizard Scorecard Generator* +""" + + return md + + def _generate_recommendations(self) -> List[str]: + """Generate recommendations based on analysis""" + recommendations = [] + + # Check overall pass rate + if self.stats["pass_rate"] < 70: + recommendations.append(f"⚠️ Overall pass rate ({self.stats['pass_rate']:.1f}%) is concerning. Review infrastructure health.") + elif self.stats["pass_rate"] >= 95: + recommendations.append(f"✅ Excellent pass rate ({self.stats['pass_rate']:.1f}%). System is performing well.") + + # Check for failing tasks + failing_tasks = [] + for task_type, data in self.stats["by_task"].items(): + if data["total"] > 0: + pass_rate = (data["passed"] / data["total"]) * 100 + if pass_rate < 50: + failing_tasks.append(task_type) + + if failing_tasks: + recommendations.append(f"❌ Tasks with <50% pass rate: {', '.join(failing_tasks)}. Consider debugging or removing.") + + # Check for slow tasks + slow_tasks = [] + for task_type, data in self.stats["by_task"].items(): + if data["durations"]: + avg = statistics.mean(data["durations"]) + if avg > 30: # Tasks taking >30s on average + slow_tasks.append(f"{task_type} ({avg:.1f}s)") + + if slow_tasks: + recommendations.append(f"⏱️ Slow tasks detected: {', '.join(slow_tasks)}. Consider optimization.") + + # Check error patterns + if self.stats["errors"]: + top_error = max(self.stats["errors"].items(), key=lambda x: x[1]) + recommendations.append(f"🔍 Most common error: '{top_error[0]}' ({top_error[1]} occurrences). Investigate root cause.") + + # Timeline trend + if len(self.stats["by_hour"]) >= 2: + hours = sorted(self.stats["by_hour"].keys()) + first_hour = hours[0] + last_hour = hours[-1] + + first_rate = (self.stats["by_hour"][first_hour]["passed"] / self.stats["by_hour"][first_hour]["total"]) * 100 + last_rate = (self.stats["by_hour"][last_hour]["passed"] / self.stats["by_hour"][last_hour]["total"]) * 100 + + if last_rate > first_rate + 10: + recommendations.append(f"📈 Performance improving over time (+{last_rate - first_rate:.1f}% pass rate).") + elif last_rate < first_rate - 10: + recommendations.append(f"📉 Performance degrading over time (-{first_rate - last_rate:.1f}% pass rate). Check for resource exhaustion.") + + return recommendations + + def save_reports(self, output_dir: str = "~/timmy/reports"): + """Save JSON and markdown reports""" + output_path = Path(output_dir).expanduser() + output_path.mkdir(parents=True, exist_ok=True) + + date_str = datetime.now().strftime("%Y%m%d") + + # Save JSON + json_file = output_path / f"scorecard_{date_str}.json" + json_report = self.generate_json() + with open(json_file, 'w') as f: + json.dump(json_report, f, indent=2) + print(f"JSON report saved: {json_file}") + + # Save Markdown + md_file = output_path / f"scorecard_{date_str}.md" + md_report = self.generate_markdown() + with open(md_file, 'w') as f: + f.write(md_report) + print(f"Markdown report saved: {md_file}") + + return json_file, md_file + + +def main(): + """CLI entry point""" + import argparse + + parser = argparse.ArgumentParser(description="Generate scorecard from overnight loop JSONL") + parser.add_argument("--input", "-i", default="~/shared/overnight-loop", help="Input directory with JSONL files") + parser.add_argument("--output", "-o", default="~/timmy/reports", help="Output directory for reports") + + args = parser.parse_args() + + print("="*60) + print("UNI-WIZARD SCORECARD GENERATOR") + print("="*60) + print() + + generator = ScorecardGenerator(input_dir=args.input) + generator.load_all() + generator.analyze() + + if generator.stats["total"] > 0: + json_file, md_file = generator.save_reports(output_dir=args.output) + print() + print("="*60) + print("REPORTS GENERATED") + print("="*60) + print(f"JSON: {json_file}") + print(f"Markdown: {md_file}") + else: + print("No data to report") + + +if __name__ == "__main__": + main()