#!/usr/bin/env python3 """Validate scene description JSONL files against schema.""" import json import sys from pathlib import Path try: import jsonschema except ImportError: print("Installing jsonschema...") import subprocess subprocess.check_call([sys.executable, "-m", "pip", "install", "jsonschema"]) import jsonschema def validate_scene_entry(entry, schema): """Validate a single scene entry against the schema.""" try: jsonschema.validate(entry, schema) return True, None except jsonschema.ValidationError as e: return False, str(e.message) def validate_file(filepath, schema): """Validate all entries in a JSONL file.""" errors = [] with open(filepath, "r") as f: for line_num, line in enumerate(f, 1): line = line.strip() if not line: continue try: entry = json.loads(line) except json.JSONDecodeError as e: errors.append(f"{filepath}:{line_num}: Invalid JSON: {e}") continue valid, error = validate_scene_entry(entry, schema) if not valid: errors.append(f"{filepath}:{line_num}: {error}") return errors def main(): schema_path = Path("training-data/schema.json") with open(schema_path) as f: schema = json.load(f) import glob jsonl_files = glob.glob("training-data/*.jsonl") all_errors = [] for filepath in jsonl_files: errors = validate_file(filepath, schema) all_errors.extend(errors) if all_errors: print("Validation FAILED:") for error in all_errors: print(f" {error}") sys.exit(1) else: print(f"All {len(jsonl_files)} files validated successfully!") if __name__ == "__main__": main()