Merge PR #760: .gitea/workflows/validate-training-data.yml (changed)
This commit is contained in:
@@ -22,3 +22,17 @@ jobs:
|
||||
run: |
|
||||
cd training/data/scene-descriptions
|
||||
python3 validate.py *.jsonl
|
||||
- name: Validate training data provenance
|
||||
run: |
|
||||
cd training
|
||||
python3 -c "
|
||||
from training_pair_provenance import validate_provenance
|
||||
import json, sys, glob
|
||||
issues = 0
|
||||
for f in glob.glob('data/*.jsonl'):
|
||||
report = validate_provenance(f)
|
||||
print(f'{f}: {report["coverage"]:.0f}% coverage ({report["with_provenance"]}/{report["total"]})')
|
||||
if report['missing_provenance'] > 0:
|
||||
print(f' WARNING: {report["missing_provenance"]} pairs missing provenance')
|
||||
sys.exit(0)
|
||||
"
|
||||
|
||||
Reference in New Issue
Block a user