Merge pull request '[#79] JSONL Scorecard Generator - overnight loop analysis' (#102) from feature/scorecard-generator into main
This commit was merged in pull request #102.
This commit is contained in:
125
docs/SCORECARD.md
Normal file
125
docs/SCORECARD.md
Normal file
@@ -0,0 +1,125 @@
|
||||
# Scorecard Generator Documentation
|
||||
|
||||
## Overview
|
||||
|
||||
The Scorecard Generator analyzes overnight loop JSONL data and produces comprehensive reports with statistics, trends, and recommendations.
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```bash
|
||||
# Generate scorecard from default input directory
|
||||
python uni-wizard/scripts/generate_scorecard.py
|
||||
|
||||
# Specify custom input/output directories
|
||||
python uni-wizard/scripts/generate_scorecard.py \
|
||||
--input ~/shared/overnight-loop \
|
||||
--output ~/timmy/reports
|
||||
```
|
||||
|
||||
### Cron Setup
|
||||
|
||||
```bash
|
||||
# Generate scorecard every morning at 6 AM
|
||||
0 6 * * * /root/timmy/venv/bin/python /root/timmy/uni-wizard/scripts/generate_scorecard.py
|
||||
```
|
||||
|
||||
## Input Format
|
||||
|
||||
JSONL files in `~/shared/overnight-loop/*.jsonl`:
|
||||
|
||||
```json
|
||||
{"task": "read-soul", "status": "pass", "duration_s": 19.7, "timestamp": "2026-03-29T21:54:12Z"}
|
||||
{"task": "check-health", "status": "fail", "duration_s": 5.2, "error": "timeout", "timestamp": "2026-03-29T22:15:33Z"}
|
||||
```
|
||||
|
||||
Fields:
|
||||
- `task`: Task identifier
|
||||
- `status`: "pass" or "fail"
|
||||
- `duration_s`: Execution time in seconds
|
||||
- `timestamp`: ISO 8601 timestamp
|
||||
- `error`: Error message (for failed tasks)
|
||||
|
||||
## Output
|
||||
|
||||
### JSON Report
|
||||
|
||||
`~/timmy/reports/scorecard_YYYYMMDD.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"generated_at": "2026-03-30T06:00:00Z",
|
||||
"summary": {
|
||||
"total_tasks": 100,
|
||||
"passed": 95,
|
||||
"failed": 5,
|
||||
"pass_rate": 95.0,
|
||||
"duration_stats": {
|
||||
"avg": 12.5,
|
||||
"median": 10.2,
|
||||
"p95": 45.0,
|
||||
"min": 1.2,
|
||||
"max": 120.5
|
||||
}
|
||||
},
|
||||
"by_task": {...},
|
||||
"by_hour": {...},
|
||||
"errors": {...},
|
||||
"recommendations": [...]
|
||||
}
|
||||
```
|
||||
|
||||
### Markdown Report
|
||||
|
||||
`~/timmy/reports/scorecard_YYYYMMDD.md`:
|
||||
|
||||
- Executive summary with pass/fail counts
|
||||
- Duration statistics (avg, median, p95)
|
||||
- Per-task breakdown with pass rates
|
||||
- Hourly timeline showing performance trends
|
||||
- Error analysis with frequency counts
|
||||
- Actionable recommendations
|
||||
|
||||
## Report Interpretation
|
||||
|
||||
### Pass Rate Thresholds
|
||||
|
||||
| Pass Rate | Status | Action |
|
||||
|-----------|--------|--------|
|
||||
| 95%+ | ✅ Excellent | Continue current operations |
|
||||
| 85-94% | ⚠️ Good | Monitor for degradation |
|
||||
| 70-84% | ⚠️ Fair | Review failing tasks |
|
||||
| <70% | ❌ Poor | Immediate investigation required |
|
||||
|
||||
### Duration Guidelines
|
||||
|
||||
| Duration | Assessment |
|
||||
|----------|------------|
|
||||
| <5s | Fast |
|
||||
| 5-15s | Normal |
|
||||
| 15-30s | Slow |
|
||||
| >30s | Very slow - consider optimization |
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### No JSONL files found
|
||||
|
||||
```bash
|
||||
# Check input directory
|
||||
ls -la ~/shared/overnight-loop/
|
||||
|
||||
# Ensure Syncthing is syncing
|
||||
systemctl status syncthing@root
|
||||
```
|
||||
|
||||
### Malformed lines
|
||||
|
||||
The generator skips malformed lines with a warning. Check the JSONL files for syntax errors.
|
||||
|
||||
### Empty reports
|
||||
|
||||
If no data exists, verify:
|
||||
1. Overnight loop is running and writing JSONL
|
||||
2. File permissions allow reading
|
||||
3. Input path is correct
|
||||
388
uni-wizard/scripts/generate_scorecard.py
Normal file
388
uni-wizard/scripts/generate_scorecard.py
Normal file
@@ -0,0 +1,388 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
JSONL Scorecard Generator for Uni-Wizard
|
||||
Analyzes overnight loop results and produces comprehensive reports
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List, Any
|
||||
import statistics
|
||||
|
||||
|
||||
class ScorecardGenerator:
|
||||
"""
|
||||
Generates scorecards from overnight loop JSONL data.
|
||||
|
||||
Analyzes:
|
||||
- Pass/fail rates
|
||||
- Response times (avg, median, p95)
|
||||
- Per-task breakdowns
|
||||
- Error patterns
|
||||
- Timeline trends
|
||||
"""
|
||||
|
||||
def __init__(self, input_dir: str = "~/shared/overnight-loop"):
|
||||
self.input_dir = Path(input_dir).expanduser()
|
||||
self.tasks = []
|
||||
self.stats = {
|
||||
"total": 0,
|
||||
"passed": 0,
|
||||
"failed": 0,
|
||||
"pass_rate": 0.0,
|
||||
"durations": [],
|
||||
"by_task": defaultdict(lambda: {"total": 0, "passed": 0, "failed": 0, "durations": []}),
|
||||
"by_hour": defaultdict(lambda: {"total": 0, "passed": 0, "durations": []}),
|
||||
"errors": defaultdict(int)
|
||||
}
|
||||
|
||||
def load_jsonl(self, filepath: Path) -> List[Dict]:
|
||||
"""Load and parse a JSONL file, handling errors gracefully"""
|
||||
tasks = []
|
||||
with open(filepath, 'r') as f:
|
||||
for line_num, line in enumerate(f, 1):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
task = json.loads(line)
|
||||
tasks.append(task)
|
||||
except json.JSONDecodeError:
|
||||
print(f"Warning: Skipping malformed line {line_num} in {filepath}")
|
||||
continue
|
||||
return tasks
|
||||
|
||||
def load_all(self):
|
||||
"""Load all JSONL files from input directory"""
|
||||
if not self.input_dir.exists():
|
||||
print(f"Input directory not found: {self.input_dir}")
|
||||
return
|
||||
|
||||
jsonl_files = list(self.input_dir.glob("*.jsonl"))
|
||||
if not jsonl_files:
|
||||
print(f"No .jsonl files found in {self.input_dir}")
|
||||
return
|
||||
|
||||
for filepath in sorted(jsonl_files):
|
||||
print(f"Loading: {filepath.name}")
|
||||
tasks = self.load_jsonl(filepath)
|
||||
self.tasks.extend(tasks)
|
||||
|
||||
print(f"Loaded {len(self.tasks)} tasks from {len(jsonl_files)} files")
|
||||
|
||||
def analyze(self):
|
||||
"""Analyze all loaded tasks"""
|
||||
if not self.tasks:
|
||||
print("No tasks to analyze")
|
||||
return
|
||||
|
||||
for task in self.tasks:
|
||||
self._process_task(task)
|
||||
|
||||
# Calculate overall pass rate
|
||||
if self.stats["total"] > 0:
|
||||
self.stats["pass_rate"] = (self.stats["passed"] / self.stats["total"]) * 100
|
||||
|
||||
print(f"Analysis complete: {self.stats['passed']}/{self.stats['total']} passed ({self.stats['pass_rate']:.1f}%)")
|
||||
|
||||
def _process_task(self, task: Dict):
|
||||
"""Process a single task record"""
|
||||
# Basic stats
|
||||
self.stats["total"] += 1
|
||||
|
||||
status = task.get("status", "unknown")
|
||||
duration = task.get("duration_s", 0)
|
||||
task_type = task.get("task", "unknown")
|
||||
timestamp = task.get("timestamp", "")
|
||||
|
||||
# Pass/fail
|
||||
if status == "pass":
|
||||
self.stats["passed"] += 1
|
||||
self.stats["by_task"][task_type]["passed"] += 1
|
||||
else:
|
||||
self.stats["failed"] += 1
|
||||
self.stats["by_task"][task_type]["failed"] += 1
|
||||
|
||||
# Track error patterns
|
||||
error = task.get("error", "unknown_error")
|
||||
self.stats["errors"][error] += 1
|
||||
|
||||
# Durations
|
||||
self.stats["durations"].append(duration)
|
||||
self.stats["by_task"][task_type]["durations"].append(duration)
|
||||
self.stats["by_task"][task_type]["total"] += 1
|
||||
|
||||
# Hourly breakdown
|
||||
if timestamp:
|
||||
try:
|
||||
hour = timestamp[:13] # YYYY-MM-DDTHH
|
||||
self.stats["by_hour"][hour]["total"] += 1
|
||||
if status == "pass":
|
||||
self.stats["by_hour"][hour]["passed"] += 1
|
||||
self.stats["by_hour"][hour]["durations"].append(duration)
|
||||
except:
|
||||
pass
|
||||
|
||||
def calculate_duration_stats(self, durations: List[float]) -> Dict[str, float]:
|
||||
"""Calculate duration statistics"""
|
||||
if not durations:
|
||||
return {"avg": 0, "median": 0, "p95": 0, "min": 0, "max": 0}
|
||||
|
||||
sorted_durations = sorted(durations)
|
||||
n = len(sorted_durations)
|
||||
|
||||
return {
|
||||
"avg": round(statistics.mean(durations), 2),
|
||||
"median": round(statistics.median(durations), 2),
|
||||
"p95": round(sorted_durations[int(n * 0.95)] if n > 1 else sorted_durations[0], 2),
|
||||
"min": round(min(durations), 2),
|
||||
"max": round(max(durations), 2)
|
||||
}
|
||||
|
||||
def generate_json(self) -> Dict:
|
||||
"""Generate structured JSON report"""
|
||||
duration_stats = self.calculate_duration_stats(self.stats["durations"])
|
||||
|
||||
report = {
|
||||
"generated_at": datetime.now().isoformat(),
|
||||
"summary": {
|
||||
"total_tasks": self.stats["total"],
|
||||
"passed": self.stats["passed"],
|
||||
"failed": self.stats["failed"],
|
||||
"pass_rate": round(self.stats["pass_rate"], 2),
|
||||
"duration_stats": duration_stats
|
||||
},
|
||||
"by_task": {},
|
||||
"by_hour": {},
|
||||
"errors": dict(self.stats["errors"]),
|
||||
"recommendations": self._generate_recommendations()
|
||||
}
|
||||
|
||||
# Per-task breakdown
|
||||
for task_type, data in self.stats["by_task"].items():
|
||||
if data["total"] > 0:
|
||||
pass_rate = (data["passed"] / data["total"]) * 100
|
||||
report["by_task"][task_type] = {
|
||||
"total": data["total"],
|
||||
"passed": data["passed"],
|
||||
"failed": data["failed"],
|
||||
"pass_rate": round(pass_rate, 2),
|
||||
"duration_stats": self.calculate_duration_stats(data["durations"])
|
||||
}
|
||||
|
||||
# Hourly breakdown
|
||||
for hour, data in sorted(self.stats["by_hour"].items()):
|
||||
if data["total"] > 0:
|
||||
pass_rate = (data["passed"] / data["total"]) * 100
|
||||
report["by_hour"][hour] = {
|
||||
"total": data["total"],
|
||||
"passed": data["passed"],
|
||||
"pass_rate": round(pass_rate, 2),
|
||||
"avg_duration": round(statistics.mean(data["durations"]), 2) if data["durations"] else 0
|
||||
}
|
||||
|
||||
return report
|
||||
|
||||
def generate_markdown(self) -> str:
|
||||
"""Generate markdown report"""
|
||||
json_report = self.generate_json()
|
||||
|
||||
md = f"""# Overnight Loop Scorecard
|
||||
|
||||
**Generated:** {json_report['generated_at']}
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
| Metric | Value |
|
||||
|--------|-------|
|
||||
| Total Tasks | {json_report['summary']['total_tasks']} |
|
||||
| Passed | {json_report['summary']['passed']} ✅ |
|
||||
| Failed | {json_report['summary']['failed']} ❌ |
|
||||
| **Pass Rate** | **{json_report['summary']['pass_rate']:.1f}%** |
|
||||
|
||||
### Duration Statistics
|
||||
|
||||
| Metric | Value (seconds) |
|
||||
|--------|-----------------|
|
||||
| Average | {json_report['summary']['duration_stats']['avg']} |
|
||||
| Median | {json_report['summary']['duration_stats']['median']} |
|
||||
| P95 | {json_report['summary']['duration_stats']['p95']} |
|
||||
| Min | {json_report['summary']['duration_stats']['min']} |
|
||||
| Max | {json_report['summary']['duration_stats']['max']} |
|
||||
|
||||
---
|
||||
|
||||
## Per-Task Breakdown
|
||||
|
||||
| Task | Total | Passed | Failed | Pass Rate | Avg Duration |
|
||||
|------|-------|--------|--------|-----------|--------------|
|
||||
"""
|
||||
|
||||
# Sort by pass rate (ascending - worst first)
|
||||
sorted_tasks = sorted(
|
||||
json_report['by_task'].items(),
|
||||
key=lambda x: x[1]['pass_rate']
|
||||
)
|
||||
|
||||
for task_type, data in sorted_tasks:
|
||||
status = "✅" if data['pass_rate'] >= 90 else "⚠️" if data['pass_rate'] >= 70 else "❌"
|
||||
md += f"| {task_type} | {data['total']} | {data['passed']} | {data['failed']} | {status} {data['pass_rate']:.1f}% | {data['duration_stats']['avg']}s |\n"
|
||||
|
||||
md += """
|
||||
---
|
||||
|
||||
## Timeline (Hourly)
|
||||
|
||||
| Hour | Tasks | Passed | Pass Rate | Avg Duration |
|
||||
|------|-------|--------|-----------|--------------|
|
||||
"""
|
||||
|
||||
for hour, data in sorted(json_report['by_hour'].items()):
|
||||
trend = "📈" if data['pass_rate'] >= 90 else "📊" if data['pass_rate'] >= 70 else "📉"
|
||||
md += f"| {hour} | {data['total']} | {data['passed']} | {trend} {data['pass_rate']:.1f}% | {data['avg_duration']}s |\n"
|
||||
|
||||
md += """
|
||||
---
|
||||
|
||||
## Error Analysis
|
||||
|
||||
| Error Pattern | Count |
|
||||
|---------------|-------|
|
||||
"""
|
||||
|
||||
for error, count in sorted(json_report['errors'].items(), key=lambda x: x[1], reverse=True):
|
||||
md += f"| {error} | {count} |\n"
|
||||
|
||||
md += """
|
||||
---
|
||||
|
||||
## Recommendations
|
||||
|
||||
"""
|
||||
|
||||
for rec in json_report['recommendations']:
|
||||
md += f"- {rec}\n"
|
||||
|
||||
md += """
|
||||
---
|
||||
|
||||
*Generated by Uni-Wizard Scorecard Generator*
|
||||
"""
|
||||
|
||||
return md
|
||||
|
||||
def _generate_recommendations(self) -> List[str]:
|
||||
"""Generate recommendations based on analysis"""
|
||||
recommendations = []
|
||||
|
||||
# Check overall pass rate
|
||||
if self.stats["pass_rate"] < 70:
|
||||
recommendations.append(f"⚠️ Overall pass rate ({self.stats['pass_rate']:.1f}%) is concerning. Review infrastructure health.")
|
||||
elif self.stats["pass_rate"] >= 95:
|
||||
recommendations.append(f"✅ Excellent pass rate ({self.stats['pass_rate']:.1f}%). System is performing well.")
|
||||
|
||||
# Check for failing tasks
|
||||
failing_tasks = []
|
||||
for task_type, data in self.stats["by_task"].items():
|
||||
if data["total"] > 0:
|
||||
pass_rate = (data["passed"] / data["total"]) * 100
|
||||
if pass_rate < 50:
|
||||
failing_tasks.append(task_type)
|
||||
|
||||
if failing_tasks:
|
||||
recommendations.append(f"❌ Tasks with <50% pass rate: {', '.join(failing_tasks)}. Consider debugging or removing.")
|
||||
|
||||
# Check for slow tasks
|
||||
slow_tasks = []
|
||||
for task_type, data in self.stats["by_task"].items():
|
||||
if data["durations"]:
|
||||
avg = statistics.mean(data["durations"])
|
||||
if avg > 30: # Tasks taking >30s on average
|
||||
slow_tasks.append(f"{task_type} ({avg:.1f}s)")
|
||||
|
||||
if slow_tasks:
|
||||
recommendations.append(f"⏱️ Slow tasks detected: {', '.join(slow_tasks)}. Consider optimization.")
|
||||
|
||||
# Check error patterns
|
||||
if self.stats["errors"]:
|
||||
top_error = max(self.stats["errors"].items(), key=lambda x: x[1])
|
||||
recommendations.append(f"🔍 Most common error: '{top_error[0]}' ({top_error[1]} occurrences). Investigate root cause.")
|
||||
|
||||
# Timeline trend
|
||||
if len(self.stats["by_hour"]) >= 2:
|
||||
hours = sorted(self.stats["by_hour"].keys())
|
||||
first_hour = hours[0]
|
||||
last_hour = hours[-1]
|
||||
|
||||
first_rate = (self.stats["by_hour"][first_hour]["passed"] / self.stats["by_hour"][first_hour]["total"]) * 100
|
||||
last_rate = (self.stats["by_hour"][last_hour]["passed"] / self.stats["by_hour"][last_hour]["total"]) * 100
|
||||
|
||||
if last_rate > first_rate + 10:
|
||||
recommendations.append(f"📈 Performance improving over time (+{last_rate - first_rate:.1f}% pass rate).")
|
||||
elif last_rate < first_rate - 10:
|
||||
recommendations.append(f"📉 Performance degrading over time (-{first_rate - last_rate:.1f}% pass rate). Check for resource exhaustion.")
|
||||
|
||||
return recommendations
|
||||
|
||||
def save_reports(self, output_dir: str = "~/timmy/reports"):
|
||||
"""Save JSON and markdown reports"""
|
||||
output_path = Path(output_dir).expanduser()
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
date_str = datetime.now().strftime("%Y%m%d")
|
||||
|
||||
# Save JSON
|
||||
json_file = output_path / f"scorecard_{date_str}.json"
|
||||
json_report = self.generate_json()
|
||||
with open(json_file, 'w') as f:
|
||||
json.dump(json_report, f, indent=2)
|
||||
print(f"JSON report saved: {json_file}")
|
||||
|
||||
# Save Markdown
|
||||
md_file = output_path / f"scorecard_{date_str}.md"
|
||||
md_report = self.generate_markdown()
|
||||
with open(md_file, 'w') as f:
|
||||
f.write(md_report)
|
||||
print(f"Markdown report saved: {md_file}")
|
||||
|
||||
return json_file, md_file
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Generate scorecard from overnight loop JSONL")
|
||||
parser.add_argument("--input", "-i", default="~/shared/overnight-loop", help="Input directory with JSONL files")
|
||||
parser.add_argument("--output", "-o", default="~/timmy/reports", help="Output directory for reports")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("="*60)
|
||||
print("UNI-WIZARD SCORECARD GENERATOR")
|
||||
print("="*60)
|
||||
print()
|
||||
|
||||
generator = ScorecardGenerator(input_dir=args.input)
|
||||
generator.load_all()
|
||||
generator.analyze()
|
||||
|
||||
if generator.stats["total"] > 0:
|
||||
json_file, md_file = generator.save_reports(output_dir=args.output)
|
||||
print()
|
||||
print("="*60)
|
||||
print("REPORTS GENERATED")
|
||||
print("="*60)
|
||||
print(f"JSON: {json_file}")
|
||||
print(f"Markdown: {md_file}")
|
||||
else:
|
||||
print("No data to report")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user