feat(#605): verify video scenes dataset already exists on main
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 25s
Smoke Test / smoke (pull_request) Failing after 24s
Validate Config / YAML Lint (pull_request) Failing after 16s
Validate Config / JSON Validate (pull_request) Successful in 18s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 58s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Cron Syntax Check (pull_request) Successful in 12s
Validate Config / Shell Script Lint (pull_request) Failing after 58s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 11s
Validate Config / Playbook Schema Validation (pull_request) Successful in 25s
Architecture Lint / Lint Repository (pull_request) Failing after 21s
PR Checklist / pr-checklist (pull_request) Successful in 3m20s

- Add docs/issue-605-verification.md documenting dataset presence
- Add tests/test_prompt_enhancement_video_scenes.py for regression
- 500 record video-scenes-500.jsonl dataset verified on main
- Closes #605
This commit is contained in:
Rockachopa
2026-04-29 05:31:07 -04:00
parent efc42968e8
commit 3cf0cb5e2b
2 changed files with 101 additions and 0 deletions

View File

@@ -0,0 +1,55 @@
#!/usr/bin/env python3
"""Verification tests for timmy-config issue #605."""
import json
import unittest
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
DATA_PATH = ROOT / "training" / "data" / "prompt-enhancement" / "video-scenes-500.jsonl"
DOC_PATH = ROOT / "docs" / "issue-605-verification.md"
def load_records():
with DATA_PATH.open("r", encoding="utf-8") as handle:
return [json.loads(line) for line in handle if line.strip()]
class TestVideoScenesPromptEnhancement(unittest.TestCase):
def test_dataset_exists(self):
self.assertTrue(DATA_PATH.exists(), f"Missing dataset: {DATA_PATH}")
def test_dataset_has_500_records(self):
self.assertEqual(len(load_records()), 500)
def test_schema_and_domain(self):
for idx, record in enumerate(load_records(), start=1):
for key in ("terse", "rich", "domain"):
self.assertIn(key, record, f"line {idx}: missing {key}")
self.assertIsInstance(record[key], str, f"line {idx}: {key} must be a string")
self.assertTrue(record[key].strip(), f"line {idx}: {key} is empty")
self.assertEqual(record["domain"], "video scenes", f"line {idx}: wrong domain")
def test_records_are_unique(self):
records = load_records()
serialized = [json.dumps(record, sort_keys=True) for record in records]
self.assertEqual(len(serialized), len(set(serialized)), "duplicate full records found")
def test_rich_prompts_include_video_scene_markers(self):
for idx, record in enumerate(load_records(), start=1):
rich = record["rich"].lower()
self.assertIn("lighting", rich, f"line {idx}: rich prompt missing lighting")
self.assertIn("composition", rich, f"line {idx}: rich prompt missing composition")
self.assertIn("transition", rich, f"line {idx}: rich prompt missing transition")
def test_verification_doc_exists_and_mentions_evidence(self):
self.assertTrue(DOC_PATH.exists(), f"Missing verification doc: {DOC_PATH}")
text = DOC_PATH.read_text(encoding="utf-8")
self.assertIn("Issue #605 Verification", text)
self.assertIn("training/data/prompt-enhancement/video-scenes-500.jsonl", text)
self.assertIn("#755", text)
self.assertIn("already implemented on `main`", text)
if __name__ == "__main__":
unittest.main()