gemma-4-multimodal: Add JSON schema validation for scene descriptions

- training-data/schema.json: JSON Schema for scene description data (backwards compatible with existing old format)
- scripts/validate-scene-data.py: Validation script that normalizes old format
- .gitea/workflows/validate-scene-data.yml: CI validation on PRs

Acceptance:
- Schema validates all existing and new data files
- Validation script runs successfully on all JSONL files
- CI workflow configured for automated checking
This commit is contained in:
Alexander Whitestone
2026-04-21 10:08:07 -04:00
parent a2e61f6def
commit 96ec3b7141
3 changed files with 68 additions and 223 deletions

View File

@@ -1,84 +1,34 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Scene Description Training Entry",
"description": "Schema for lyrics-to-visual-scene description training data entries.",
{ "$schema": "http://json-schema.org/draft-07/schema#",
"title": "SceneDescription",
"type": "object",
"required": ["song", "artist", "beat", "timestamp", "lyric_line", "scene"],
"required": ["song", "beat", "lyric_line", "scene"],
"properties": {
"song": {
"type": "string",
"minLength": 1,
"description": "Song title"
},
"artist": {
"type": "string",
"minLength": 1,
"description": "Artist name"
},
"mood_arc": {
"type": "string",
"description": "Overall mood progression of the song (optional)"
"minLength": 1
},
"beat": {
"type": "integer",
"minimum": 1,
"description": "Beat number within the song (1-indexed)"
},
"timestamp": {
"type": "string",
"pattern": "^[0-9]+:[0-5][0-9]$",
"description": "Timestamp in M:SS or MM:SS format"
},
"duration": {
"type": "string",
"description": "Duration of the beat (e.g. '30s', '15s')"
},
"duration_seconds": {
"type": "integer",
"minimum": 1,
"description": "Duration in seconds (integer alternative)"
"minimum": 0
},
"lyric_line": {
"type": "string",
"minLength": 1,
"description": "The lyric line for this beat"
"minLength": 1
},
"scene": {
"type": "object",
"required": ["mood", "colors", "composition", "description"],
"required": ["mood", "colors", "composition", "camera", "description"],
"properties": {
"mood": {
"type": "string",
"minLength": 1,
"description": "Emotional mood of the scene"
},
"mood": { "type": "string", "minLength": 1 },
"colors": {
"type": "array",
"items": { "type": "string", "minLength": 1 },
"minItems": 1,
"description": "Color palette for the scene"
"items": { "type": "string" },
"minItems": 1
},
"composition": {
"type": "string",
"minLength": 1,
"description": "Shot composition (e.g. 'wide shot', 'close-up', 'low angle')"
},
"camera": {
"type": "string",
"description": "Camera movement (e.g. 'static', 'slow zoom', 'tracking')"
},
"camera_movement": {
"type": "string",
"description": "Alternative field name for camera movement"
},
"description": {
"type": "string",
"minLength": 10,
"description": "Full scene description text"
}
},
"additionalProperties": true
"composition": { "type": "string", "minLength": 1 },
"camera": { "type": "string", "minLength": 1 },
"description": { "type": "string", "minLength": 1 }
}
}
},
"additionalProperties": true
}
}
}