#!/usr/bin/env python3 """ Tests for scene description data validator (#647). Tests the validate-scene-data.py script against valid and invalid JSONL entries. """ import json import tempfile import unittest from pathlib import Path import sys sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) from validate_scene_data import validate_entry_manual, load_schema, validate_file SCHEMA_PATH = Path(__file__).resolve().parent.parent / "training-data" / "schema.json" def _valid_entry(**overrides): """Create a valid scene description entry with optional overrides.""" entry = { "song": "Thunder Road", "artist": "Heartland", "beat": 1, "timestamp": "0:00", "duration": "30s", "lyric_line": "The screen door slams", "scene": { "mood": "hope", "colors": ["gold", "sky blue", "white"], "composition": "wide shot", "camera": "static", "description": "Open horizon. Golden light breaking through clouds. A figure stands silhouetted.", }, } entry.update(overrides) return entry class TestValidEntry(unittest.TestCase): """Valid entries should produce no errors.""" def test_minimal_valid(self): errors = validate_entry_manual(_valid_entry(), 0) self.assertEqual(errors, []) def test_with_optional_fields(self): entry = _valid_entry(mood_arc="rising", duration_seconds=30) errors = validate_entry_manual(entry, 0) self.assertEqual(errors, []) def test_all_composition_types(self): for comp in ["wide shot", "close-up", "over the shoulder", "low angle", "bird\'s eye"]: errors = validate_entry_manual(_valid_entry(composition=comp), 0) self.assertEqual(errors, [], f"Failed for composition: {comp}") class TestMissingRequiredFields(unittest.TestCase): """Missing required fields should be caught.""" def test_missing_song(self): entry = _valid_entry() del entry["song"] errors = validate_entry_manual(entry, 0) self.assertTrue(any("song" in e for e in errors)) def test_missing_artist(self): entry = _valid_entry() del entry["artist"] errors = validate_entry_manual(entry, 0) self.assertTrue(any("artist" in e for e in errors)) def test_missing_beat(self): entry = _valid_entry() del entry["beat"] errors = validate_entry_manual(entry, 0) self.assertTrue(any("beat" in e for e in errors)) def test_missing_timestamp(self): entry = _valid_entry() del entry["timestamp"] errors = validate_entry_manual(entry, 0) self.assertTrue(any("timestamp" in e for e in errors)) def test_missing_lyric_line(self): entry = _valid_entry() del entry["lyric_line"] errors = validate_entry_manual(entry, 0) self.assertTrue(any("lyric_line" in e for e in errors)) def test_missing_scene(self): entry = _valid_entry() del entry["scene"] errors = validate_entry_manual(entry, 0) self.assertTrue(any("scene" in e for e in errors)) def test_missing_scene_mood(self): entry = _valid_entry() del entry["scene"]["mood"] errors = validate_entry_manual(entry, 0) self.assertTrue(any("scene.mood" in e or "mood" in e for e in errors)) def test_missing_scene_colors(self): entry = _valid_entry() del entry["scene"]["colors"] errors = validate_entry_manual(entry, 0) self.assertTrue(any("scene.colors" in e or "colors" in e for e in errors)) def test_missing_scene_description(self): entry = _valid_entry() del entry["scene"]["description"] errors = validate_entry_manual(entry, 0) self.assertTrue(any("scene.description" in e or "description" in e for e in errors)) class TestTypeValidation(unittest.TestCase): """Wrong types should be caught.""" def test_song_not_string(self): errors = validate_entry_manual(_valid_entry(song=42), 0) self.assertTrue(any("song" in e and "string" in e for e in errors)) def test_beat_not_integer(self): errors = validate_entry_manual(_valid_entry(beat="one"), 0) self.assertTrue(any("beat" in e for e in errors)) def test_beat_zero(self): errors = validate_entry_manual(_valid_entry(beat=0), 0) self.assertTrue(any("beat" in e for e in errors)) def test_colors_not_array(self): entry = _valid_entry() entry["scene"]["colors"] = "red" errors = validate_entry_manual(entry, 0) self.assertTrue(any("colors" in e and "array" in e for e in errors)) def test_colors_empty_array(self): entry = _valid_entry() entry["scene"]["colors"] = [] errors = validate_entry_manual(entry, 0) self.assertTrue(any("colors" in e for e in errors)) def test_scene_not_object(self): errors = validate_entry_manual(_valid_entry(scene="not an object"), 0) self.assertTrue(any("scene" in e and "object" in e for e in errors)) def test_timestamp_bad_format(self): errors = validate_entry_manual(_valid_entry(timestamp="abc"), 0) self.assertTrue(any("timestamp" in e for e in errors)) def test_timestamp_valid_formats(self): for ts in ["0:00", "1:30", "12:45", "99:59"]: errors = validate_entry_manual(_valid_entry(timestamp=ts), 0) self.assertEqual(errors, [], f"Failed for timestamp: {ts}") class TestEmptyValues(unittest.TestCase): """Empty strings should be caught.""" def test_empty_song(self): errors = validate_entry_manual(_valid_entry(song=""), 0) self.assertTrue(any("song" in e and "empty" in e for e in errors)) def test_empty_artist(self): errors = validate_entry_manual(_valid_entry(artist=""), 0) self.assertTrue(any("artist" in e and "empty" in e for e in errors)) def test_empty_lyric_line(self): errors = validate_entry_manual(_valid_entry(lyric_line=""), 0) self.assertTrue(any("lyric_line" in e and "empty" in e for e in errors)) def test_empty_scene_mood(self): entry = _valid_entry() entry["scene"]["mood"] = "" errors = validate_entry_manual(entry, 0) self.assertTrue(any("mood" in e and "empty" in e for e in errors)) def test_empty_color_in_array(self): entry = _valid_entry() entry["scene"]["colors"] = ["red", "", "blue"] errors = validate_entry_manual(entry, 0) self.assertTrue(any("colors[1]" in e for e in errors)) class TestDescriptionLength(unittest.TestCase): """Description minimum length check.""" def test_short_description(self): entry = _valid_entry() entry["scene"]["description"] = "Short" errors = validate_entry_manual(entry, 0) self.assertTrue(any("description" in e and "short" in e for e in errors)) def test_valid_description_length(self): entry = _valid_entry() entry["scene"]["description"] = "A long and detailed scene description that exceeds ten characters." errors = validate_entry_manual(entry, 0) desc_errors = [e for e in errors if "description" in e.lower()] self.assertEqual(desc_errors, []) class TestFileValidation(unittest.TestCase): """Test full-file validation.""" def _write_jsonl(self, entries): f = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) for entry in entries: f.write(json.dumps(entry) + "\n") f.close() return Path(f.name) def test_valid_file(self): path = self._write_jsonl([_valid_entry(), _valid_entry(beat=2)]) try: schema = load_schema(SCHEMA_PATH) total, errors = validate_file(path, schema) self.assertEqual(total, 2) self.assertEqual(errors, []) finally: path.unlink() def test_invalid_entries_reported(self): entry_bad = _valid_entry() del entry_bad["song"] path = self._write_jsonl([_valid_entry(), entry_bad]) try: schema = load_schema(SCHEMA_PATH) total, errors = validate_file(path, schema) self.assertEqual(total, 2) self.assertGreater(len(errors), 0) self.assertTrue(any("2" in e for e in errors)) # line 2 finally: path.unlink() def test_malformed_json(self): f = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) f.write("{invalid json\n") f.write(json.dumps(_valid_entry()) + "\n") f.close() path = Path(f.name) try: schema = load_schema(SCHEMA_PATH) total, errors = validate_file(path, schema) self.assertEqual(total, 2) self.assertGreater(len(errors), 0) self.assertTrue(any("JSON" in e for e in errors)) finally: path.unlink() def test_empty_lines_ignored(self): f = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) f.write(json.dumps(_valid_entry()) + "\n\n\n") f.write(json.dumps(_valid_entry()) + "\n") f.close() path = Path(f.name) try: schema = load_schema(SCHEMA_PATH) total, errors = validate_file(path, schema) self.assertEqual(total, 2) self.assertEqual(errors, []) finally: path.unlink() if __name__ == "__main__": unittest.main()