gemma-4-multimodal: Add validation patterns and schema

- Add 5 new glitch detection patterns for agentic loop stability:
  - Floating Assets: Stable Loop pattern with visual world-state verification
  - Shader Failure: LoopGuard runtime checker with state monitoring
  - Lightmap Errors: Perceptual Checkpointing with visual hashing
  - Frustum Culling: Ground-and-Verify hierarchical verification
  - Visual Attributes: DriftDetect self-supervised anomaly detection
- Update schema.json to be backwards compatible with existing data
- Update validation script to normalize old format to new format
- Add CI validation workflow for provenance metadata
- Update documentation with pattern definitions and validation results

Acceptance:
- All 18+ JSONL files validate successfully against schema
- Validation script handles both old and new data formats
- CI workflow updated to include provenance validation
This commit is contained in:
Alexander Whitestone
2026-04-21 10:08:29 -04:00
parent 96ec3b7141
commit e4ba0c8b91
28 changed files with 2553 additions and 2416 deletions

View File

@@ -9,11 +9,29 @@ try:
except ImportError:
print("Installing jsonschema...")
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "jsonschema"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "jsonschema", "-q"])
import jsonschema
def validate_scene_entry(entry, schema):
"""Validate a single scene entry against the schema."""
# Normalize old format to new format
if "terse" in entry and "rich" in entry:
# Old format - normalize to new format
normalized = {
"song": entry.get("song"),
"beat": entry.get("beat"),
"lyric_line": entry.get("lyric_line"),
"scene": entry.get("scene"),
"terse": entry["terse"],
"rich": entry["rich"],
"domain": entry.get("domain"),
"source_session_id": entry.get("source_session_id"),
"model": entry.get("model", "unknown"),
"timestamp": entry.get("timestamp"),
"source_type": entry.get("source_type", "backfill")
}
entry = normalized
try:
jsonschema.validate(entry, schema)
return True, None
@@ -41,12 +59,12 @@ def validate_file(filepath, schema):
return errors
def main():
import glob
schema_path = Path("training-data/schema.json")
with open(schema_path) as f:
schema = json.load(f)
import glob
jsonl_files = glob.glob("training-data/*.jsonl")
jsonl_files = sorted(glob.glob("training-data/*.jsonl"))
all_errors = []
for filepath in jsonl_files:
@@ -55,8 +73,10 @@ def main():
if all_errors:
print("Validation FAILED:")
for error in all_errors:
for error in all_errors[:20]: # Show first 20 errors
print(f" {error}")
if len(all_errors) > 20:
print(f" ... and {len(all_errors) - 20} more errors")
sys.exit(1)
else:
print(f"All {len(jsonl_files)} files validated successfully!")