From 40b68a13cbd87f1f5d93365a00339f48b69e2db5 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Fri, 17 Apr 2026 05:34:36 +0000 Subject: [PATCH] test: scene data validator tests (#647) --- tests/test_validate_scene_data.py | 269 ++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 tests/test_validate_scene_data.py diff --git a/tests/test_validate_scene_data.py b/tests/test_validate_scene_data.py new file mode 100644 index 00000000..1727516a --- /dev/null +++ b/tests/test_validate_scene_data.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3 +""" +Tests for scene description data validator (#647). + +Tests the validate-scene-data.py script against valid and invalid JSONL entries. +""" + +import json +import tempfile +import unittest +from pathlib import Path + +import sys +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) + +from validate_scene_data import validate_entry_manual, load_schema, validate_file + + +SCHEMA_PATH = Path(__file__).resolve().parent.parent / "training-data" / "schema.json" + + +def _valid_entry(**overrides): + """Create a valid scene description entry with optional overrides.""" + entry = { + "song": "Thunder Road", + "artist": "Heartland", + "beat": 1, + "timestamp": "0:00", + "duration": "30s", + "lyric_line": "The screen door slams", + "scene": { + "mood": "hope", + "colors": ["gold", "sky blue", "white"], + "composition": "wide shot", + "camera": "static", + "description": "Open horizon. Golden light breaking through clouds. A figure stands silhouetted.", + }, + } + entry.update(overrides) + return entry + + +class TestValidEntry(unittest.TestCase): + """Valid entries should produce no errors.""" + + def test_minimal_valid(self): + errors = validate_entry_manual(_valid_entry(), 0) + self.assertEqual(errors, []) + + def test_with_optional_fields(self): + entry = _valid_entry(mood_arc="rising", duration_seconds=30) + errors = validate_entry_manual(entry, 0) + self.assertEqual(errors, []) + + def test_all_composition_types(self): + for comp in ["wide shot", "close-up", "over the shoulder", "low angle", "bird\'s eye"]: + errors = validate_entry_manual(_valid_entry(composition=comp), 0) + self.assertEqual(errors, [], f"Failed for composition: {comp}") + + +class TestMissingRequiredFields(unittest.TestCase): + """Missing required fields should be caught.""" + + def test_missing_song(self): + entry = _valid_entry() + del entry["song"] + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("song" in e for e in errors)) + + def test_missing_artist(self): + entry = _valid_entry() + del entry["artist"] + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("artist" in e for e in errors)) + + def test_missing_beat(self): + entry = _valid_entry() + del entry["beat"] + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("beat" in e for e in errors)) + + def test_missing_timestamp(self): + entry = _valid_entry() + del entry["timestamp"] + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("timestamp" in e for e in errors)) + + def test_missing_lyric_line(self): + entry = _valid_entry() + del entry["lyric_line"] + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("lyric_line" in e for e in errors)) + + def test_missing_scene(self): + entry = _valid_entry() + del entry["scene"] + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("scene" in e for e in errors)) + + def test_missing_scene_mood(self): + entry = _valid_entry() + del entry["scene"]["mood"] + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("scene.mood" in e or "mood" in e for e in errors)) + + def test_missing_scene_colors(self): + entry = _valid_entry() + del entry["scene"]["colors"] + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("scene.colors" in e or "colors" in e for e in errors)) + + def test_missing_scene_description(self): + entry = _valid_entry() + del entry["scene"]["description"] + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("scene.description" in e or "description" in e for e in errors)) + + +class TestTypeValidation(unittest.TestCase): + """Wrong types should be caught.""" + + def test_song_not_string(self): + errors = validate_entry_manual(_valid_entry(song=42), 0) + self.assertTrue(any("song" in e and "string" in e for e in errors)) + + def test_beat_not_integer(self): + errors = validate_entry_manual(_valid_entry(beat="one"), 0) + self.assertTrue(any("beat" in e for e in errors)) + + def test_beat_zero(self): + errors = validate_entry_manual(_valid_entry(beat=0), 0) + self.assertTrue(any("beat" in e for e in errors)) + + def test_colors_not_array(self): + entry = _valid_entry() + entry["scene"]["colors"] = "red" + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("colors" in e and "array" in e for e in errors)) + + def test_colors_empty_array(self): + entry = _valid_entry() + entry["scene"]["colors"] = [] + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("colors" in e for e in errors)) + + def test_scene_not_object(self): + errors = validate_entry_manual(_valid_entry(scene="not an object"), 0) + self.assertTrue(any("scene" in e and "object" in e for e in errors)) + + def test_timestamp_bad_format(self): + errors = validate_entry_manual(_valid_entry(timestamp="abc"), 0) + self.assertTrue(any("timestamp" in e for e in errors)) + + def test_timestamp_valid_formats(self): + for ts in ["0:00", "1:30", "12:45", "99:59"]: + errors = validate_entry_manual(_valid_entry(timestamp=ts), 0) + self.assertEqual(errors, [], f"Failed for timestamp: {ts}") + + +class TestEmptyValues(unittest.TestCase): + """Empty strings should be caught.""" + + def test_empty_song(self): + errors = validate_entry_manual(_valid_entry(song=""), 0) + self.assertTrue(any("song" in e and "empty" in e for e in errors)) + + def test_empty_artist(self): + errors = validate_entry_manual(_valid_entry(artist=""), 0) + self.assertTrue(any("artist" in e and "empty" in e for e in errors)) + + def test_empty_lyric_line(self): + errors = validate_entry_manual(_valid_entry(lyric_line=""), 0) + self.assertTrue(any("lyric_line" in e and "empty" in e for e in errors)) + + def test_empty_scene_mood(self): + entry = _valid_entry() + entry["scene"]["mood"] = "" + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("mood" in e and "empty" in e for e in errors)) + + def test_empty_color_in_array(self): + entry = _valid_entry() + entry["scene"]["colors"] = ["red", "", "blue"] + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("colors[1]" in e for e in errors)) + + +class TestDescriptionLength(unittest.TestCase): + """Description minimum length check.""" + + def test_short_description(self): + entry = _valid_entry() + entry["scene"]["description"] = "Short" + errors = validate_entry_manual(entry, 0) + self.assertTrue(any("description" in e and "short" in e for e in errors)) + + def test_valid_description_length(self): + entry = _valid_entry() + entry["scene"]["description"] = "A long and detailed scene description that exceeds ten characters." + errors = validate_entry_manual(entry, 0) + desc_errors = [e for e in errors if "description" in e.lower()] + self.assertEqual(desc_errors, []) + + +class TestFileValidation(unittest.TestCase): + """Test full-file validation.""" + + def _write_jsonl(self, entries): + f = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) + for entry in entries: + f.write(json.dumps(entry) + "\n") + f.close() + return Path(f.name) + + def test_valid_file(self): + path = self._write_jsonl([_valid_entry(), _valid_entry(beat=2)]) + try: + schema = load_schema(SCHEMA_PATH) + total, errors = validate_file(path, schema) + self.assertEqual(total, 2) + self.assertEqual(errors, []) + finally: + path.unlink() + + def test_invalid_entries_reported(self): + entry_bad = _valid_entry() + del entry_bad["song"] + path = self._write_jsonl([_valid_entry(), entry_bad]) + try: + schema = load_schema(SCHEMA_PATH) + total, errors = validate_file(path, schema) + self.assertEqual(total, 2) + self.assertGreater(len(errors), 0) + self.assertTrue(any("2" in e for e in errors)) # line 2 + finally: + path.unlink() + + def test_malformed_json(self): + f = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) + f.write("{invalid json\n") + f.write(json.dumps(_valid_entry()) + "\n") + f.close() + path = Path(f.name) + try: + schema = load_schema(SCHEMA_PATH) + total, errors = validate_file(path, schema) + self.assertEqual(total, 2) + self.assertGreater(len(errors), 0) + self.assertTrue(any("JSON" in e for e in errors)) + finally: + path.unlink() + + def test_empty_lines_ignored(self): + f = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) + f.write(json.dumps(_valid_entry()) + "\n\n\n") + f.write(json.dumps(_valid_entry()) + "\n") + f.close() + path = Path(f.name) + try: + schema = load_schema(SCHEMA_PATH) + total, errors = validate_file(path, schema) + self.assertEqual(total, 2) + self.assertEqual(errors, []) + finally: + path.unlink() + + +if __name__ == "__main__": + unittest.main()