270 lines
9.4 KiB
Python
270 lines
9.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tests for scene description data validator (#647).
|
|
|
|
Tests the validate-scene-data.py script against valid and invalid JSONL entries.
|
|
"""
|
|
|
|
import json
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
|
|
|
|
from validate_scene_data import validate_entry_manual, load_schema, validate_file
|
|
|
|
|
|
SCHEMA_PATH = Path(__file__).resolve().parent.parent / "training-data" / "schema.json"
|
|
|
|
|
|
def _valid_entry(**overrides):
|
|
"""Create a valid scene description entry with optional overrides."""
|
|
entry = {
|
|
"song": "Thunder Road",
|
|
"artist": "Heartland",
|
|
"beat": 1,
|
|
"timestamp": "0:00",
|
|
"duration": "30s",
|
|
"lyric_line": "The screen door slams",
|
|
"scene": {
|
|
"mood": "hope",
|
|
"colors": ["gold", "sky blue", "white"],
|
|
"composition": "wide shot",
|
|
"camera": "static",
|
|
"description": "Open horizon. Golden light breaking through clouds. A figure stands silhouetted.",
|
|
},
|
|
}
|
|
entry.update(overrides)
|
|
return entry
|
|
|
|
|
|
class TestValidEntry(unittest.TestCase):
|
|
"""Valid entries should produce no errors."""
|
|
|
|
def test_minimal_valid(self):
|
|
errors = validate_entry_manual(_valid_entry(), 0)
|
|
self.assertEqual(errors, [])
|
|
|
|
def test_with_optional_fields(self):
|
|
entry = _valid_entry(mood_arc="rising", duration_seconds=30)
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertEqual(errors, [])
|
|
|
|
def test_all_composition_types(self):
|
|
for comp in ["wide shot", "close-up", "over the shoulder", "low angle", "bird\'s eye"]:
|
|
errors = validate_entry_manual(_valid_entry(composition=comp), 0)
|
|
self.assertEqual(errors, [], f"Failed for composition: {comp}")
|
|
|
|
|
|
class TestMissingRequiredFields(unittest.TestCase):
|
|
"""Missing required fields should be caught."""
|
|
|
|
def test_missing_song(self):
|
|
entry = _valid_entry()
|
|
del entry["song"]
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("song" in e for e in errors))
|
|
|
|
def test_missing_artist(self):
|
|
entry = _valid_entry()
|
|
del entry["artist"]
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("artist" in e for e in errors))
|
|
|
|
def test_missing_beat(self):
|
|
entry = _valid_entry()
|
|
del entry["beat"]
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("beat" in e for e in errors))
|
|
|
|
def test_missing_timestamp(self):
|
|
entry = _valid_entry()
|
|
del entry["timestamp"]
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("timestamp" in e for e in errors))
|
|
|
|
def test_missing_lyric_line(self):
|
|
entry = _valid_entry()
|
|
del entry["lyric_line"]
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("lyric_line" in e for e in errors))
|
|
|
|
def test_missing_scene(self):
|
|
entry = _valid_entry()
|
|
del entry["scene"]
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("scene" in e for e in errors))
|
|
|
|
def test_missing_scene_mood(self):
|
|
entry = _valid_entry()
|
|
del entry["scene"]["mood"]
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("scene.mood" in e or "mood" in e for e in errors))
|
|
|
|
def test_missing_scene_colors(self):
|
|
entry = _valid_entry()
|
|
del entry["scene"]["colors"]
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("scene.colors" in e or "colors" in e for e in errors))
|
|
|
|
def test_missing_scene_description(self):
|
|
entry = _valid_entry()
|
|
del entry["scene"]["description"]
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("scene.description" in e or "description" in e for e in errors))
|
|
|
|
|
|
class TestTypeValidation(unittest.TestCase):
|
|
"""Wrong types should be caught."""
|
|
|
|
def test_song_not_string(self):
|
|
errors = validate_entry_manual(_valid_entry(song=42), 0)
|
|
self.assertTrue(any("song" in e and "string" in e for e in errors))
|
|
|
|
def test_beat_not_integer(self):
|
|
errors = validate_entry_manual(_valid_entry(beat="one"), 0)
|
|
self.assertTrue(any("beat" in e for e in errors))
|
|
|
|
def test_beat_zero(self):
|
|
errors = validate_entry_manual(_valid_entry(beat=0), 0)
|
|
self.assertTrue(any("beat" in e for e in errors))
|
|
|
|
def test_colors_not_array(self):
|
|
entry = _valid_entry()
|
|
entry["scene"]["colors"] = "red"
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("colors" in e and "array" in e for e in errors))
|
|
|
|
def test_colors_empty_array(self):
|
|
entry = _valid_entry()
|
|
entry["scene"]["colors"] = []
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("colors" in e for e in errors))
|
|
|
|
def test_scene_not_object(self):
|
|
errors = validate_entry_manual(_valid_entry(scene="not an object"), 0)
|
|
self.assertTrue(any("scene" in e and "object" in e for e in errors))
|
|
|
|
def test_timestamp_bad_format(self):
|
|
errors = validate_entry_manual(_valid_entry(timestamp="abc"), 0)
|
|
self.assertTrue(any("timestamp" in e for e in errors))
|
|
|
|
def test_timestamp_valid_formats(self):
|
|
for ts in ["0:00", "1:30", "12:45", "99:59"]:
|
|
errors = validate_entry_manual(_valid_entry(timestamp=ts), 0)
|
|
self.assertEqual(errors, [], f"Failed for timestamp: {ts}")
|
|
|
|
|
|
class TestEmptyValues(unittest.TestCase):
|
|
"""Empty strings should be caught."""
|
|
|
|
def test_empty_song(self):
|
|
errors = validate_entry_manual(_valid_entry(song=""), 0)
|
|
self.assertTrue(any("song" in e and "empty" in e for e in errors))
|
|
|
|
def test_empty_artist(self):
|
|
errors = validate_entry_manual(_valid_entry(artist=""), 0)
|
|
self.assertTrue(any("artist" in e and "empty" in e for e in errors))
|
|
|
|
def test_empty_lyric_line(self):
|
|
errors = validate_entry_manual(_valid_entry(lyric_line=""), 0)
|
|
self.assertTrue(any("lyric_line" in e and "empty" in e for e in errors))
|
|
|
|
def test_empty_scene_mood(self):
|
|
entry = _valid_entry()
|
|
entry["scene"]["mood"] = ""
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("mood" in e and "empty" in e for e in errors))
|
|
|
|
def test_empty_color_in_array(self):
|
|
entry = _valid_entry()
|
|
entry["scene"]["colors"] = ["red", "", "blue"]
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("colors[1]" in e for e in errors))
|
|
|
|
|
|
class TestDescriptionLength(unittest.TestCase):
|
|
"""Description minimum length check."""
|
|
|
|
def test_short_description(self):
|
|
entry = _valid_entry()
|
|
entry["scene"]["description"] = "Short"
|
|
errors = validate_entry_manual(entry, 0)
|
|
self.assertTrue(any("description" in e and "short" in e for e in errors))
|
|
|
|
def test_valid_description_length(self):
|
|
entry = _valid_entry()
|
|
entry["scene"]["description"] = "A long and detailed scene description that exceeds ten characters."
|
|
errors = validate_entry_manual(entry, 0)
|
|
desc_errors = [e for e in errors if "description" in e.lower()]
|
|
self.assertEqual(desc_errors, [])
|
|
|
|
|
|
class TestFileValidation(unittest.TestCase):
|
|
"""Test full-file validation."""
|
|
|
|
def _write_jsonl(self, entries):
|
|
f = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False)
|
|
for entry in entries:
|
|
f.write(json.dumps(entry) + "\n")
|
|
f.close()
|
|
return Path(f.name)
|
|
|
|
def test_valid_file(self):
|
|
path = self._write_jsonl([_valid_entry(), _valid_entry(beat=2)])
|
|
try:
|
|
schema = load_schema(SCHEMA_PATH)
|
|
total, errors = validate_file(path, schema)
|
|
self.assertEqual(total, 2)
|
|
self.assertEqual(errors, [])
|
|
finally:
|
|
path.unlink()
|
|
|
|
def test_invalid_entries_reported(self):
|
|
entry_bad = _valid_entry()
|
|
del entry_bad["song"]
|
|
path = self._write_jsonl([_valid_entry(), entry_bad])
|
|
try:
|
|
schema = load_schema(SCHEMA_PATH)
|
|
total, errors = validate_file(path, schema)
|
|
self.assertEqual(total, 2)
|
|
self.assertGreater(len(errors), 0)
|
|
self.assertTrue(any("2" in e for e in errors)) # line 2
|
|
finally:
|
|
path.unlink()
|
|
|
|
def test_malformed_json(self):
|
|
f = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False)
|
|
f.write("{invalid json\n")
|
|
f.write(json.dumps(_valid_entry()) + "\n")
|
|
f.close()
|
|
path = Path(f.name)
|
|
try:
|
|
schema = load_schema(SCHEMA_PATH)
|
|
total, errors = validate_file(path, schema)
|
|
self.assertEqual(total, 2)
|
|
self.assertGreater(len(errors), 0)
|
|
self.assertTrue(any("JSON" in e for e in errors))
|
|
finally:
|
|
path.unlink()
|
|
|
|
def test_empty_lines_ignored(self):
|
|
f = tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False)
|
|
f.write(json.dumps(_valid_entry()) + "\n\n\n")
|
|
f.write(json.dumps(_valid_entry()) + "\n")
|
|
f.close()
|
|
path = Path(f.name)
|
|
try:
|
|
schema = load_schema(SCHEMA_PATH)
|
|
total, errors = validate_file(path, schema)
|
|
self.assertEqual(total, 2)
|
|
self.assertEqual(errors, [])
|
|
finally:
|
|
path.unlink()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|