Compare commits

...

1 Commits

Author SHA1 Message Date
Alexander Whitestone
d7f86cdaed feat: quality gate test suite — 27 tests (#629)
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 21s
PR Checklist / pr-checklist (pull_request) Failing after 4m19s
Smoke Test / smoke (pull_request) Failing after 19s
Validate Config / YAML Lint (pull_request) Failing after 11s
Validate Config / JSON Validate (pull_request) Successful in 15s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 1m20s
Validate Config / Shell Script Lint (pull_request) Failing after 44s
Validate Config / Cron Syntax Check (pull_request) Successful in 4s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 4s
Validate Config / Playbook Schema Validation (pull_request) Successful in 23s
Architecture Lint / Lint Repository (pull_request) Has been cancelled
Validate Config / Python Test Suite (pull_request) Has been cancelled
tests/test_quality_gate.py:
  TestValidateEntryManual (16 tests):
    valid entry, missing fields (song/artist/beat/scene),
    type checks (empty song, beat=0, beat=string),
    timestamp format (valid M:SS, valid MM:SS, no colon, letters),
    scene validation (missing mood, missing colors, empty colors,
    too many colors (>5), description too short, scene not dict)

  TestValidateFile (6 tests):
    all valid, all invalid, mixed, JSON parse error,
    blank lines skipped, missing file

  TestStatisticsTracking (2 tests):
    error count matches, valid count accurate

  TestRealFiles (1 test + 6 subtests):
    validates all actual scene-descriptions/*.jsonl files

Total: 27 passed, 6 subtests passed in 0.04s
2026-04-15 19:07:25 -04:00

309
tests/test_quality_gate.py Normal file
View File

@@ -0,0 +1,309 @@
"""
Tests for training/data/scene-descriptions/validate.py — Quality Gate
Covers:
- Unit tests for each validation type
- Rejection workflow (invalid entries rejected)
- Statistics tracking (line_count, valid_count, error_count)
- Integration tests with sample JSONL content
"""
import json
import os
import sys
import tempfile
import unittest
from pathlib import Path
# Add the validate.py module to path
VALIDATE_DIR = Path(__file__).parent.parent / "training" / "data" / "scene-descriptions"
sys.path.insert(0, str(VALIDATE_DIR))
from validate import validate_entry_manual, validate_file
class TestValidateEntryManual(unittest.TestCase):
"""Unit tests for individual field validation."""
def test_valid_entry(self):
entry = {
"song": "Test Song",
"artist": "Test Artist",
"beat": 1,
"timestamp": "0:30",
"duration_seconds": 30,
"lyric_line": "A valid lyric line here",
"scene": {
"mood": "hope",
"colors": ["gold", "blue"],
"composition": "wide shot",
"camera": "static",
"description": "A golden field stretching to the horizon at dawn"
}
}
errors = validate_entry_manual(entry, 1)
self.assertEqual(errors, [], f"Valid entry should have no errors: {errors}")
# ── Required fields ──────────────────────────────────────
def test_missing_song(self):
entry = {"artist": "A", "beat": 1, "timestamp": "0:00", "duration_seconds": 30,
"lyric_line": "x", "scene": {"mood": "m", "colors": ["c"], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("song" in e for e in errors))
def test_missing_artist(self):
entry = {"song": "S", "beat": 1, "timestamp": "0:00", "duration_seconds": 30,
"lyric_line": "x", "scene": {"mood": "m", "colors": ["c"], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("artist" in e for e in errors))
def test_missing_beat(self):
entry = {"song": "S", "artist": "A", "timestamp": "0:00", "duration_seconds": 30,
"lyric_line": "x", "scene": {"mood": "m", "colors": ["c"], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("beat" in e for e in errors))
def test_missing_scene(self):
entry = {"song": "S", "artist": "A", "beat": 1, "timestamp": "0:00",
"duration_seconds": 30, "lyric_line": "x"}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("scene" in e for e in errors))
# ── Type checks ──────────────────────────────────────────
def test_song_empty_string(self):
entry = {"song": "", "artist": "A", "beat": 1, "timestamp": "0:00",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"mood": "m", "colors": ["c"], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("song" in e and "empty" in e for e in errors))
def test_beat_not_positive(self):
entry = {"song": "S", "artist": "A", "beat": 0, "timestamp": "0:00",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"mood": "m", "colors": ["c"], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("beat" in e for e in errors))
def test_beat_string_rejected(self):
entry = {"song": "S", "artist": "A", "beat": "one", "timestamp": "0:00",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"mood": "m", "colors": ["c"], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("beat" in e for e in errors))
# ── Timestamp format ─────────────────────────────────────
def test_timestamp_valid_mss(self):
entry = {"song": "S", "artist": "A", "beat": 1, "timestamp": "0:30",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"mood": "m", "colors": ["c"], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertFalse(any("timestamp" in e for e in errors))
def test_timestamp_valid_mmss(self):
entry = {"song": "S", "artist": "A", "beat": 1, "timestamp": "12:45",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"mood": "m", "colors": ["c"], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertFalse(any("timestamp" in e for e in errors))
def test_timestamp_invalid_no_colon(self):
entry = {"song": "S", "artist": "A", "beat": 1, "timestamp": "90",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"mood": "m", "colors": ["c"], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("timestamp" in e for e in errors))
def test_timestamp_invalid_letters(self):
entry = {"song": "S", "artist": "A", "beat": 1, "timestamp": "ab:cd",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"mood": "m", "colors": ["c"], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("timestamp" in e for e in errors))
# ── Scene validation ─────────────────────────────────────
def test_scene_missing_mood(self):
entry = {"song": "S", "artist": "A", "beat": 1, "timestamp": "0:00",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"colors": ["c"], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("mood" in e for e in errors))
def test_scene_missing_colors(self):
entry = {"song": "S", "artist": "A", "beat": 1, "timestamp": "0:00",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"mood": "m", "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("colors" in e for e in errors))
def test_scene_colors_empty_array(self):
entry = {"song": "S", "artist": "A", "beat": 1, "timestamp": "0:00",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"mood": "m", "colors": [], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("colors" in e and "non-empty" in e for e in errors))
def test_scene_colors_too_many(self):
entry = {"song": "S", "artist": "A", "beat": 1, "timestamp": "0:00",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"mood": "m", "colors": ["a","b","c","d","e","f"], "composition": "c", "camera": "c", "description": "a"*20}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("colors" in e and "max 5" in e for e in errors))
def test_scene_description_too_short(self):
entry = {"song": "S", "artist": "A", "beat": 1, "timestamp": "0:00",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"mood": "m", "colors": ["c"], "composition": "c", "camera": "c", "description": "short"}}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("description" in e and "too short" in e for e in errors))
def test_scene_not_dict(self):
entry = {"song": "S", "artist": "A", "beat": 1, "timestamp": "0:00",
"duration_seconds": 30, "lyric_line": "x", "scene": "not a dict"}
errors = validate_entry_manual(entry, 1)
self.assertTrue(any("scene" in e and "object" in e for e in errors))
class TestValidateFile(unittest.TestCase):
"""Integration tests — validate_file with temp JSONL content."""
def _write_temp_jsonl(self, entries):
"""Write entries to a temp JSONL file and return path."""
f = tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False)
for entry in entries:
f.write(json.dumps(entry) + '\n')
f.close()
return f.name
def _valid_entry(self, **overrides):
base = {
"song": "Test Song",
"artist": "Test Artist",
"beat": 1,
"timestamp": "0:30",
"duration_seconds": 30,
"lyric_line": "A valid lyric line",
"scene": {
"mood": "hope",
"colors": ["gold", "blue"],
"composition": "wide shot",
"camera": "static",
"description": "A golden field stretching to the horizon at dawn"
}
}
base.update(overrides)
return base
def test_all_valid(self):
path = self._write_temp_jsonl([self._valid_entry() for _ in range(5)])
errors, line_count, valid_count = validate_file(path)
os.unlink(path)
self.assertEqual(len(errors), 0)
self.assertEqual(line_count, 5)
self.assertEqual(valid_count, 5)
def test_all_invalid(self):
entries = [{"bad": "data"}, {"also": "bad"}]
path = self._write_temp_jsonl(entries)
errors, line_count, valid_count = validate_file(path)
os.unlink(path)
self.assertGreater(len(errors), 0)
self.assertEqual(line_count, 2)
self.assertEqual(valid_count, 0)
def test_mixed_valid_invalid(self):
entries = [self._valid_entry(), {"bad": "data"}, self._valid_entry()]
path = self._write_temp_jsonl(entries)
errors, line_count, valid_count = validate_file(path)
os.unlink(path)
self.assertGreater(len(errors), 0)
self.assertEqual(line_count, 3)
self.assertEqual(valid_count, 2)
def test_json_parse_error(self):
f = tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False)
f.write('{"valid": true}\n')
f.write('NOT JSON {{{\n')
f.write('{"also_valid": true}\n')
f.close()
errors, line_count, valid_count = validate_file(f.name)
os.unlink(f.name)
self.assertTrue(any("JSON parse" in e for e in errors))
self.assertEqual(line_count, 3) # blank lines skipped, but non-blank counted
def test_blank_lines_skipped(self):
f = tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False)
f.write(json.dumps(self._valid_entry()) + '\n')
f.write('\n')
f.write(' \n')
f.write(json.dumps(self._valid_entry()) + '\n')
f.close()
errors, line_count, valid_count = validate_file(f.name)
os.unlink(f.name)
self.assertEqual(line_count, 2)
self.assertEqual(valid_count, 2)
def test_missing_file(self):
errors, line_count, valid_count = validate_file("/nonexistent/file.jsonl")
self.assertEqual(line_count, 0)
self.assertEqual(valid_count, 0)
class TestStatisticsTracking(unittest.TestCase):
"""Verify that validate_file tracks counts correctly."""
def _write_temp(self, entries):
f = tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False)
for e in entries:
f.write(json.dumps(e) + '\n')
f.close()
return f.name
def test_error_count_matches(self):
entries = [
{"bad": 1}, # missing required
{"bad": 2}, # missing required
{"bad": 3}, # missing required
]
path = self._write_temp(entries)
errors, line_count, valid_count = validate_file(path)
os.unlink(path)
# Each entry should have multiple missing field errors
self.assertGreater(len(errors), 3) # at least one error per entry
self.assertEqual(valid_count, 0)
def test_valid_count_accurate(self):
valid = {
"song": "S", "artist": "A", "beat": 1, "timestamp": "0:00",
"duration_seconds": 30, "lyric_line": "x",
"scene": {"mood": "m", "colors": ["c"], "composition": "c", "camera": "c", "description": "a"*20}
}
entries = [valid, valid, valid]
path = self._write_temp(entries)
errors, line_count, valid_count = validate_file(path)
os.unlink(path)
self.assertEqual(line_count, 3)
self.assertEqual(valid_count, 3)
self.assertEqual(len(errors), 0)
class TestRealFiles(unittest.TestCase):
"""Integration tests against actual training data files."""
def test_scene_descriptions_validate(self):
"""All scene description JSONL files should pass validation."""
scene_dir = VALIDATE_DIR
jsonl_files = list(scene_dir.glob("*.jsonl"))
if not jsonl_files:
self.skipTest("No JSONL files found")
for filepath in jsonl_files:
errors, line_count, valid_count = validate_file(str(filepath))
with self.subTest(file=filepath.name):
self.assertEqual(len(errors), 0,
f"{filepath.name} has {len(errors)} errors: {errors[:3]}")
self.assertEqual(line_count, valid_count)
self.assertGreater(line_count, 0)
if __name__ == "__main__":
unittest.main()