Merge PR #760: .gitea/workflows/validate-training-data.yml (changed)

This commit is contained in:
Merge Bot
2026-04-16 05:07:23 +00:00
parent 378a61e0ab
commit 7c716f8ca1

View File

@@ -22,3 +22,17 @@ jobs:
run: |
cd training/data/scene-descriptions
python3 validate.py *.jsonl
- name: Validate training data provenance
run: |
cd training
python3 -c "
from training_pair_provenance import validate_provenance
import json, sys, glob
issues = 0
for f in glob.glob('data/*.jsonl'):
report = validate_provenance(f)
print(f'{f}: {report["coverage"]:.0f}% coverage ({report["with_provenance"]}/{report["total"]})')
if report['missing_provenance'] > 0:
print(f' WARNING: {report["missing_provenance"]} pairs missing provenance')
sys.exit(0)
"