Merge pull request '[#79] JSONL Scorecard Generator - overnight loop analysis' (#102) from feature/scorecard-generator into main

2026-03-30 15:58:11 +00:00
parent 973f3bbe5a 5f549bf1f6
commit 276f2c32dd
2 changed files with 513 additions and 0 deletions
--- a/docs/SCORECARD.md
+++ b/docs/SCORECARD.md
@@ -0,0 +1,125 @@
+# Scorecard Generator Documentation
+
+## Overview
+
+The Scorecard Generator analyzes overnight loop JSONL data and produces comprehensive reports with statistics, trends, and recommendations.
+
+## Usage
+
+### Basic Usage
+
+```bash
+# Generate scorecard from default input directory
+python uni-wizard/scripts/generate_scorecard.py
+
+# Specify custom input/output directories
+python uni-wizard/scripts/generate_scorecard.py \
+    --input ~/shared/overnight-loop \
+    --output ~/timmy/reports
+```
+
+### Cron Setup
+
+```bash
+# Generate scorecard every morning at 6 AM
+0 6 * * * /root/timmy/venv/bin/python /root/timmy/uni-wizard/scripts/generate_scorecard.py
+```
+
+## Input Format
+
+JSONL files in `~/shared/overnight-loop/*.jsonl`:
+
+```json
+{"task": "read-soul", "status": "pass", "duration_s": 19.7, "timestamp": "2026-03-29T21:54:12Z"}
+{"task": "check-health", "status": "fail", "duration_s": 5.2, "error": "timeout", "timestamp": "2026-03-29T22:15:33Z"}
+```
+
+Fields:
+- `task`: Task identifier
+- `status`: "pass" or "fail"
+- `duration_s`: Execution time in seconds
+- `timestamp`: ISO 8601 timestamp
+- `error`: Error message (for failed tasks)
+
+## Output
+
+### JSON Report
+
+`~/timmy/reports/scorecard_YYYYMMDD.json`:
+
+```json
+{
+  "generated_at": "2026-03-30T06:00:00Z",
+  "summary": {
+    "total_tasks": 100,
+    "passed": 95,
+    "failed": 5,
+    "pass_rate": 95.0,
+    "duration_stats": {
+      "avg": 12.5,
+      "median": 10.2,
+      "p95": 45.0,
+      "min": 1.2,
+      "max": 120.5
+    }
+  },
+  "by_task": {...},
+  "by_hour": {...},
+  "errors": {...},
+  "recommendations": [...]
+}
+```
+
+### Markdown Report
+
+`~/timmy/reports/scorecard_YYYYMMDD.md`:
+
+- Executive summary with pass/fail counts
+- Duration statistics (avg, median, p95)
+- Per-task breakdown with pass rates
+- Hourly timeline showing performance trends
+- Error analysis with frequency counts
+- Actionable recommendations
+
+## Report Interpretation
+
+### Pass Rate Thresholds
+
+| Pass Rate | Status | Action |
+|-----------|--------|--------|
+| 95%+ | ✅ Excellent | Continue current operations |
+| 85-94% | ⚠️ Good | Monitor for degradation |
+| 70-84% | ⚠️ Fair | Review failing tasks |
+| <70% | ❌ Poor | Immediate investigation required |
+
+### Duration Guidelines
+
+| Duration | Assessment |
+|----------|------------|
+| <5s | Fast |
+| 5-15s | Normal |
+| 15-30s | Slow |
+| >30s | Very slow - consider optimization |
+
+## Troubleshooting
+
+### No JSONL files found
+
+```bash
+# Check input directory
+ls -la ~/shared/overnight-loop/
+
+# Ensure Syncthing is syncing
+systemctl status syncthing@root
+```
+
+### Malformed lines
+
+The generator skips malformed lines with a warning. Check the JSONL files for syntax errors.
+
+### Empty reports
+
+If no data exists, verify:
+1. Overnight loop is running and writing JSONL
+2. File permissions allow reading
+3. Input path is correct
--- a/uni-wizard/scripts/generate_scorecard.py
+++ b/uni-wizard/scripts/generate_scorecard.py
@@ -0,0 +1,388 @@
+#!/usr/bin/env python3
+"""
+JSONL Scorecard Generator for Uni-Wizard
+Analyzes overnight loop results and produces comprehensive reports
+"""
+
+import json
+import sys
+from pathlib import Path
+from datetime import datetime
+from collections import defaultdict
+from typing import Dict, List, Any
+import statistics
+
+
+class ScorecardGenerator:
+    """
+    Generates scorecards from overnight loop JSONL data.
+    
+    Analyzes:
+    - Pass/fail rates
+    - Response times (avg, median, p95)
+    - Per-task breakdowns
+    - Error patterns
+    - Timeline trends
+    """
+    
+    def __init__(self, input_dir: str = "~/shared/overnight-loop"):
+        self.input_dir = Path(input_dir).expanduser()
+        self.tasks = []
+        self.stats = {
+            "total": 0,
+            "passed": 0,
+            "failed": 0,
+            "pass_rate": 0.0,
+            "durations": [],
+            "by_task": defaultdict(lambda: {"total": 0, "passed": 0, "failed": 0, "durations": []}),
+            "by_hour": defaultdict(lambda: {"total": 0, "passed": 0, "durations": []}),
+            "errors": defaultdict(int)
+        }
+    
+    def load_jsonl(self, filepath: Path) -> List[Dict]:
+        """Load and parse a JSONL file, handling errors gracefully"""
+        tasks = []
+        with open(filepath, 'r') as f:
+            for line_num, line in enumerate(f, 1):
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    task = json.loads(line)
+                    tasks.append(task)
+                except json.JSONDecodeError:
+                    print(f"Warning: Skipping malformed line {line_num} in {filepath}")
+                    continue
+        return tasks
+    
+    def load_all(self):
+        """Load all JSONL files from input directory"""
+        if not self.input_dir.exists():
+            print(f"Input directory not found: {self.input_dir}")
+            return
+        
+        jsonl_files = list(self.input_dir.glob("*.jsonl"))
+        if not jsonl_files:
+            print(f"No .jsonl files found in {self.input_dir}")
+            return
+        
+        for filepath in sorted(jsonl_files):
+            print(f"Loading: {filepath.name}")
+            tasks = self.load_jsonl(filepath)
+            self.tasks.extend(tasks)
+        
+        print(f"Loaded {len(self.tasks)} tasks from {len(jsonl_files)} files")
+    
+    def analyze(self):
+        """Analyze all loaded tasks"""
+        if not self.tasks:
+            print("No tasks to analyze")
+            return
+        
+        for task in self.tasks:
+            self._process_task(task)
+        
+        # Calculate overall pass rate
+        if self.stats["total"] > 0:
+            self.stats["pass_rate"] = (self.stats["passed"] / self.stats["total"]) * 100
+        
+        print(f"Analysis complete: {self.stats['passed']}/{self.stats['total']} passed ({self.stats['pass_rate']:.1f}%)")
+    
+    def _process_task(self, task: Dict):
+        """Process a single task record"""
+        # Basic stats
+        self.stats["total"] += 1
+        
+        status = task.get("status", "unknown")
+        duration = task.get("duration_s", 0)
+        task_type = task.get("task", "unknown")
+        timestamp = task.get("timestamp", "")
+        
+        # Pass/fail
+        if status == "pass":
+            self.stats["passed"] += 1
+            self.stats["by_task"][task_type]["passed"] += 1
+        else:
+            self.stats["failed"] += 1
+            self.stats["by_task"][task_type]["failed"] += 1
+            
+            # Track error patterns
+            error = task.get("error", "unknown_error")
+            self.stats["errors"][error] += 1
+        
+        # Durations
+        self.stats["durations"].append(duration)
+        self.stats["by_task"][task_type]["durations"].append(duration)
+        self.stats["by_task"][task_type]["total"] += 1
+        
+        # Hourly breakdown
+        if timestamp:
+            try:
+                hour = timestamp[:13]  # YYYY-MM-DDTHH
+                self.stats["by_hour"][hour]["total"] += 1
+                if status == "pass":
+                    self.stats["by_hour"][hour]["passed"] += 1
+                self.stats["by_hour"][hour]["durations"].append(duration)
+            except:
+                pass
+    
+    def calculate_duration_stats(self, durations: List[float]) -> Dict[str, float]:
+        """Calculate duration statistics"""
+        if not durations:
+            return {"avg": 0, "median": 0, "p95": 0, "min": 0, "max": 0}
+        
+        sorted_durations = sorted(durations)
+        n = len(sorted_durations)
+        
+        return {
+            "avg": round(statistics.mean(durations), 2),
+            "median": round(statistics.median(durations), 2),
+            "p95": round(sorted_durations[int(n * 0.95)] if n > 1 else sorted_durations[0], 2),
+            "min": round(min(durations), 2),
+            "max": round(max(durations), 2)
+        }
+    
+    def generate_json(self) -> Dict:
+        """Generate structured JSON report"""
+        duration_stats = self.calculate_duration_stats(self.stats["durations"])
+        
+        report = {
+            "generated_at": datetime.now().isoformat(),
+            "summary": {
+                "total_tasks": self.stats["total"],
+                "passed": self.stats["passed"],
+                "failed": self.stats["failed"],
+                "pass_rate": round(self.stats["pass_rate"], 2),
+                "duration_stats": duration_stats
+            },
+            "by_task": {},
+            "by_hour": {},
+            "errors": dict(self.stats["errors"]),
+            "recommendations": self._generate_recommendations()
+        }
+        
+        # Per-task breakdown
+        for task_type, data in self.stats["by_task"].items():
+            if data["total"] > 0:
+                pass_rate = (data["passed"] / data["total"]) * 100
+                report["by_task"][task_type] = {
+                    "total": data["total"],
+                    "passed": data["passed"],
+                    "failed": data["failed"],
+                    "pass_rate": round(pass_rate, 2),
+                    "duration_stats": self.calculate_duration_stats(data["durations"])
+                }
+        
+        # Hourly breakdown
+        for hour, data in sorted(self.stats["by_hour"].items()):
+            if data["total"] > 0:
+                pass_rate = (data["passed"] / data["total"]) * 100
+                report["by_hour"][hour] = {
+                    "total": data["total"],
+                    "passed": data["passed"],
+                    "pass_rate": round(pass_rate, 2),
+                    "avg_duration": round(statistics.mean(data["durations"]), 2) if data["durations"] else 0
+                }
+        
+        return report
+    
+    def generate_markdown(self) -> str:
+        """Generate markdown report"""
+        json_report = self.generate_json()
+        
+        md = f"""# Overnight Loop Scorecard
+
+**Generated:** {json_report['generated_at']}
+
+---
+
+## Summary
+
+| Metric | Value |
+|--------|-------|
+| Total Tasks | {json_report['summary']['total_tasks']} |
+| Passed | {json_report['summary']['passed']} ✅ |
+| Failed | {json_report['summary']['failed']} ❌ |
+| **Pass Rate** | **{json_report['summary']['pass_rate']:.1f}%** |
+
+### Duration Statistics
+
+| Metric | Value (seconds) |
+|--------|-----------------|
+| Average | {json_report['summary']['duration_stats']['avg']} |
+| Median | {json_report['summary']['duration_stats']['median']} |
+| P95 | {json_report['summary']['duration_stats']['p95']} |
+| Min | {json_report['summary']['duration_stats']['min']} |
+| Max | {json_report['summary']['duration_stats']['max']} |
+
+---
+
+## Per-Task Breakdown
+
+| Task | Total | Passed | Failed | Pass Rate | Avg Duration |
+|------|-------|--------|--------|-----------|--------------|
+"""
+        
+        # Sort by pass rate (ascending - worst first)
+        sorted_tasks = sorted(
+            json_report['by_task'].items(),
+            key=lambda x: x[1]['pass_rate']
+        )
+        
+        for task_type, data in sorted_tasks:
+            status = "✅" if data['pass_rate'] >= 90 else "⚠️" if data['pass_rate'] >= 70 else "❌"
+            md += f"| {task_type} | {data['total']} | {data['passed']} | {data['failed']} | {status} {data['pass_rate']:.1f}% | {data['duration_stats']['avg']}s |\n"
+        
+        md += """
+---
+
+## Timeline (Hourly)
+
+| Hour | Tasks | Passed | Pass Rate | Avg Duration |
+|------|-------|--------|-----------|--------------|
+"""
+        
+        for hour, data in sorted(json_report['by_hour'].items()):
+            trend = "📈" if data['pass_rate'] >= 90 else "📊" if data['pass_rate'] >= 70 else "📉"
+            md += f"| {hour} | {data['total']} | {data['passed']} | {trend} {data['pass_rate']:.1f}% | {data['avg_duration']}s |\n"
+        
+        md += """
+---
+
+## Error Analysis
+
+| Error Pattern | Count |
+|---------------|-------|
+"""
+        
+        for error, count in sorted(json_report['errors'].items(), key=lambda x: x[1], reverse=True):
+            md += f"| {error} | {count} |\n"
+        
+        md += """
+---
+
+## Recommendations
+
+"""
+        
+        for rec in json_report['recommendations']:
+            md += f"- {rec}\n"
+        
+        md += """
+---
+
+*Generated by Uni-Wizard Scorecard Generator*
+"""
+        
+        return md
+    
+    def _generate_recommendations(self) -> List[str]:
+        """Generate recommendations based on analysis"""
+        recommendations = []
+        
+        # Check overall pass rate
+        if self.stats["pass_rate"] < 70:
+            recommendations.append(f"⚠️ Overall pass rate ({self.stats['pass_rate']:.1f}%) is concerning. Review infrastructure health.")
+        elif self.stats["pass_rate"] >= 95:
+            recommendations.append(f"✅ Excellent pass rate ({self.stats['pass_rate']:.1f}%). System is performing well.")
+        
+        # Check for failing tasks
+        failing_tasks = []
+        for task_type, data in self.stats["by_task"].items():
+            if data["total"] > 0:
+                pass_rate = (data["passed"] / data["total"]) * 100
+                if pass_rate < 50:
+                    failing_tasks.append(task_type)
+        
+        if failing_tasks:
+            recommendations.append(f"❌ Tasks with <50% pass rate: {', '.join(failing_tasks)}. Consider debugging or removing.")
+        
+        # Check for slow tasks
+        slow_tasks = []
+        for task_type, data in self.stats["by_task"].items():
+            if data["durations"]:
+                avg = statistics.mean(data["durations"])
+                if avg > 30:  # Tasks taking >30s on average
+                    slow_tasks.append(f"{task_type} ({avg:.1f}s)")
+        
+        if slow_tasks:
+            recommendations.append(f"⏱️ Slow tasks detected: {', '.join(slow_tasks)}. Consider optimization.")
+        
+        # Check error patterns
+        if self.stats["errors"]:
+            top_error = max(self.stats["errors"].items(), key=lambda x: x[1])
+            recommendations.append(f"🔍 Most common error: '{top_error[0]}' ({top_error[1]} occurrences). Investigate root cause.")
+        
+        # Timeline trend
+        if len(self.stats["by_hour"]) >= 2:
+            hours = sorted(self.stats["by_hour"].keys())
+            first_hour = hours[0]
+            last_hour = hours[-1]
+            
+            first_rate = (self.stats["by_hour"][first_hour]["passed"] / self.stats["by_hour"][first_hour]["total"]) * 100
+            last_rate = (self.stats["by_hour"][last_hour]["passed"] / self.stats["by_hour"][last_hour]["total"]) * 100
+            
+            if last_rate > first_rate + 10:
+                recommendations.append(f"📈 Performance improving over time (+{last_rate - first_rate:.1f}% pass rate).")
+            elif last_rate < first_rate - 10:
+                recommendations.append(f"📉 Performance degrading over time (-{first_rate - last_rate:.1f}% pass rate). Check for resource exhaustion.")
+        
+        return recommendations
+    
+    def save_reports(self, output_dir: str = "~/timmy/reports"):
+        """Save JSON and markdown reports"""
+        output_path = Path(output_dir).expanduser()
+        output_path.mkdir(parents=True, exist_ok=True)
+        
+        date_str = datetime.now().strftime("%Y%m%d")
+        
+        # Save JSON
+        json_file = output_path / f"scorecard_{date_str}.json"
+        json_report = self.generate_json()
+        with open(json_file, 'w') as f:
+            json.dump(json_report, f, indent=2)
+        print(f"JSON report saved: {json_file}")
+        
+        # Save Markdown
+        md_file = output_path / f"scorecard_{date_str}.md"
+        md_report = self.generate_markdown()
+        with open(md_file, 'w') as f:
+            f.write(md_report)
+        print(f"Markdown report saved: {md_file}")
+        
+        return json_file, md_file
+
+
+def main():
+    """CLI entry point"""
+    import argparse
+    
+    parser = argparse.ArgumentParser(description="Generate scorecard from overnight loop JSONL")
+    parser.add_argument("--input", "-i", default="~/shared/overnight-loop", help="Input directory with JSONL files")
+    parser.add_argument("--output", "-o", default="~/timmy/reports", help="Output directory for reports")
+    
+    args = parser.parse_args()
+    
+    print("="*60)
+    print("UNI-WIZARD SCORECARD GENERATOR")
+    print("="*60)
+    print()
+    
+    generator = ScorecardGenerator(input_dir=args.input)
+    generator.load_all()
+    generator.analyze()
+    
+    if generator.stats["total"] > 0:
+        json_file, md_file = generator.save_reports(output_dir=args.output)
+        print()
+        print("="*60)
+        print("REPORTS GENERATED")
+        print("="*60)
+        print(f"JSON: {json_file}")
+        print(f"Markdown: {md_file}")
+    else:
+        print("No data to report")
+
+
+if __name__ == "__main__":
+    main()