2026-04-16 05:06:10 +00:00
|
|
|
#!/usr/bin/env python3
|
2026-04-21 10:08:07 -04:00
|
|
|
"""Validate scene description JSONL files against schema."""
|
2026-04-16 05:06:10 +00:00
|
|
|
import json
|
|
|
|
|
import sys
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
import jsonschema
|
|
|
|
|
except ImportError:
|
2026-04-21 10:08:07 -04:00
|
|
|
print("Installing jsonschema...")
|
|
|
|
|
import subprocess
|
|
|
|
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "jsonschema"])
|
|
|
|
|
import jsonschema
|
2026-04-16 05:06:10 +00:00
|
|
|
|
2026-04-21 10:08:07 -04:00
|
|
|
def validate_scene_entry(entry, schema):
|
|
|
|
|
"""Validate a single scene entry against the schema."""
|
|
|
|
|
try:
|
|
|
|
|
jsonschema.validate(entry, schema)
|
|
|
|
|
return True, None
|
|
|
|
|
except jsonschema.ValidationError as e:
|
|
|
|
|
return False, str(e.message)
|
2026-04-16 05:06:10 +00:00
|
|
|
|
|
|
|
|
def validate_file(filepath, schema):
|
|
|
|
|
"""Validate all entries in a JSONL file."""
|
|
|
|
|
errors = []
|
2026-04-21 10:08:07 -04:00
|
|
|
with open(filepath, "r") as f:
|
2026-04-16 05:06:10 +00:00
|
|
|
for line_num, line in enumerate(f, 1):
|
|
|
|
|
line = line.strip()
|
|
|
|
|
if not line:
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
entry = json.loads(line)
|
|
|
|
|
except json.JSONDecodeError as e:
|
2026-04-21 10:08:07 -04:00
|
|
|
errors.append(f"{filepath}:{line_num}: Invalid JSON: {e}")
|
2026-04-16 05:06:10 +00:00
|
|
|
continue
|
2026-04-21 10:08:07 -04:00
|
|
|
|
|
|
|
|
valid, error = validate_scene_entry(entry, schema)
|
|
|
|
|
if not valid:
|
|
|
|
|
errors.append(f"{filepath}:{line_num}: {error}")
|
|
|
|
|
|
|
|
|
|
return errors
|
2026-04-16 05:06:10 +00:00
|
|
|
|
|
|
|
|
def main():
|
2026-04-21 10:08:07 -04:00
|
|
|
schema_path = Path("training-data/schema.json")
|
|
|
|
|
with open(schema_path) as f:
|
|
|
|
|
schema = json.load(f)
|
|
|
|
|
|
|
|
|
|
import glob
|
|
|
|
|
jsonl_files = glob.glob("training-data/*.jsonl")
|
|
|
|
|
|
|
|
|
|
all_errors = []
|
|
|
|
|
for filepath in jsonl_files:
|
|
|
|
|
errors = validate_file(filepath, schema)
|
|
|
|
|
all_errors.extend(errors)
|
|
|
|
|
|
|
|
|
|
if all_errors:
|
|
|
|
|
print("Validation FAILED:")
|
|
|
|
|
for error in all_errors:
|
|
|
|
|
print(f" {error}")
|
2026-04-16 05:06:10 +00:00
|
|
|
sys.exit(1)
|
|
|
|
|
else:
|
2026-04-21 10:08:07 -04:00
|
|
|
print(f"All {len(jsonl_files)} files validated successfully!")
|
2026-04-16 05:06:10 +00:00
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|