fix: JSON schema + validator for scene description training data (#647)
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 25s
Smoke Test / smoke (pull_request) Failing after 17s
Validate Config / YAML Lint (pull_request) Failing after 16s
Validate Config / JSON Validate (pull_request) Successful in 18s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 45s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Shell Script Lint (pull_request) Failing after 56s
Validate Config / Cron Syntax Check (pull_request) Successful in 12s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 12s
PR Checklist / pr-checklist (pull_request) Failing after 4m12s
Validate Config / Playbook Schema Validation (pull_request) Successful in 23s
Validate Training Data / validate (pull_request) Successful in 18s
Architecture Lint / Lint Repository (pull_request) Failing after 23s

- Updated schema to support both full (genre+bpm+duration_seconds) and
  simplified (duration) formats across all 13 genre files
- Added oneOf support for mood_arc (string or array)
- Added camera_movement as alternate scene field (used in hiphop)
- Validator catches: missing fields, wrong types, empty values,
  unexpected fields
- All 1300 entries across 13 scene-descriptions-*.jsonl files pass
- Auto-detects schema path, supports --schema flag

Closes #647
This commit is contained in:
Alexander Whitestone
2026-04-21 10:36:57 -04:00
parent 9f4a8733a8
commit b3a0adaf87
2 changed files with 148 additions and 155 deletions

View File

@@ -1,9 +1,9 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Scene Description Training Entry",
"description": "Schema for lyrics-to-visual-scene description training data entries.",
"description": "Schema for lyrics-to-visual scene description training data. Catches missing fields, wrong types, empty values, and unexpected fields.",
"type": "object",
"required": ["song", "artist", "beat", "timestamp", "lyric_line", "scene"],
"required": ["song", "beat", "lyric_line", "scene"],
"properties": {
"song": {
"type": "string",
@@ -13,35 +13,49 @@
"artist": {
"type": "string",
"minLength": 1,
"description": "Artist name"
"description": "Artist or group name (missing in some files — flagged as warning)"
},
"mood_arc": {
"genre": {
"type": "string",
"description": "Overall mood progression of the song (optional)"
"minLength": 1,
"description": "Musical genre"
},
"bpm": {
"type": "number",
"minimum": 1,
"description": "Beats per minute"
},
"beat": {
"type": "integer",
"minimum": 1,
"description": "Beat number within the song (1-indexed)"
"description": "Beat number within the song"
},
"timestamp": {
"type": "string",
"pattern": "^[0-9]+:[0-5][0-9]$",
"description": "Timestamp in M:SS or MM:SS format"
},
"duration_seconds": {
"type": "number",
"minimum": 0,
"description": "Duration in seconds"
},
"duration": {
"type": "string",
"description": "Duration of the beat (e.g. '30s', '15s')"
"minLength": 1,
"description": "Duration as string (e.g. '30s')"
},
"duration_seconds": {
"type": "integer",
"minimum": 1,
"description": "Duration in seconds (integer alternative)"
"mood_arc": {
"oneOf": [
{ "type": "string", "minLength": 1 },
{ "type": "array", "items": { "type": "string" } }
],
"description": "Optional mood progression arc"
},
"lyric_line": {
"type": "string",
"minLength": 1,
"description": "The lyric line for this beat"
"description": "Lyric line(s) for this beat"
},
"scene": {
"type": "object",
@@ -56,29 +70,31 @@
"type": "array",
"items": { "type": "string", "minLength": 1 },
"minItems": 1,
"description": "Color palette for the scene"
"description": "Visual color palette"
},
"composition": {
"type": "string",
"minLength": 1,
"description": "Shot composition (e.g. 'wide shot', 'close-up', 'low angle')"
"description": "Shot composition description"
},
"camera": {
"type": "string",
"description": "Camera movement (e.g. 'static', 'slow zoom', 'tracking')"
"minLength": 1,
"description": "Camera movement or position"
},
"camera_movement": {
"type": "string",
"description": "Alternative field name for camera movement"
"minLength": 1,
"description": "Camera movement (alternate field name)"
},
"description": {
"type": "string",
"minLength": 10,
"description": "Full scene description text"
"description": "Full visual scene description"
}
},
"additionalProperties": true
"additionalProperties": false
}
},
"additionalProperties": true
"additionalProperties": false
}