Compare commits
1 Commits
step35/443
...
fix/605
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7fdc285b52 |
46
docs/issue-605-verification.md
Normal file
46
docs/issue-605-verification.md
Normal file
@@ -0,0 +1,46 @@
|
||||
# Issue #605 Verification
|
||||
|
||||
Status: already implemented on `main`.
|
||||
|
||||
Issue: Prompt Enhancement: Video Scenes — 500 Terse→Rich Pairs
|
||||
|
||||
What is already present on `main`
|
||||
- `training/data/prompt-enhancement/video-scenes-500.jsonl`
|
||||
- 500 JSONL records
|
||||
- every record includes `terse`, `rich`, and `domain`
|
||||
- every `domain` value is `video scenes`
|
||||
- 500/500 full records are unique
|
||||
- every `rich` prompt includes video-scene structure markers for `lighting`, `composition`, and `transition`
|
||||
|
||||
Evidence gathered from a fresh clone
|
||||
- validation against `training/data/prompt-enhancement/video-scenes-500.jsonl` returned:
|
||||
- `count = 500`
|
||||
- `unique_records = 500`
|
||||
- `unique_terse = 120`
|
||||
- `domains = ['video scenes']`
|
||||
- `missing_keys = 0`
|
||||
- all 500 `rich` prompts contain `lighting`, `composition`, and `transition`
|
||||
- closed PRs `#755` (`fix/605`) and `#648` (`feat/605-video-scenes-prompts`) show prior attempts on this lane
|
||||
- SHA-256 of `training/data/prompt-enhancement/video-scenes-500.jsonl` on `origin/main` matches the same file on remote branch `fix/605`, which shows the requested dataset is already present on `main`
|
||||
|
||||
Verification commands
|
||||
```bash
|
||||
python3 - <<'PY'
|
||||
import json
|
||||
from pathlib import Path
|
||||
path = Path('training/data/prompt-enhancement/video-scenes-500.jsonl')
|
||||
records = [json.loads(line) for line in path.read_text().splitlines() if line.strip()]
|
||||
print('count', len(records))
|
||||
print('unique_records', len({json.dumps(r, sort_keys=True) for r in records}))
|
||||
print('unique_terse', len({r['terse'] for r in records}))
|
||||
print('domains', sorted({r.get('domain') for r in records}))
|
||||
print('missing_keys', sum(any(k not in r or not str(r[k]).strip() for k in ('terse', 'rich', 'domain')) for r in records))
|
||||
print('lighting_count', sum('lighting' in r['rich'].lower() for r in records))
|
||||
print('composition_count', sum('composition' in r['rich'].lower() for r in records))
|
||||
print('transition_count', sum('transition' in r['rich'].lower() for r in records))
|
||||
PY
|
||||
```
|
||||
|
||||
Recommendation
|
||||
- Close issue #605 as already implemented on `main`.
|
||||
- This branch only adds a durable verification note and regression test so the open issue can be closed cleanly without regenerating duplicate training data.
|
||||
55
tests/test_prompt_enhancement_video_scenes.py
Normal file
55
tests/test_prompt_enhancement_video_scenes.py
Normal file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Verification tests for timmy-config issue #605."""
|
||||
|
||||
import json
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
DATA_PATH = ROOT / "training" / "data" / "prompt-enhancement" / "video-scenes-500.jsonl"
|
||||
DOC_PATH = ROOT / "docs" / "issue-605-verification.md"
|
||||
|
||||
|
||||
def load_records():
|
||||
with DATA_PATH.open("r", encoding="utf-8") as handle:
|
||||
return [json.loads(line) for line in handle if line.strip()]
|
||||
|
||||
|
||||
class TestVideoScenesPromptEnhancement(unittest.TestCase):
|
||||
def test_dataset_exists(self):
|
||||
self.assertTrue(DATA_PATH.exists(), f"Missing dataset: {DATA_PATH}")
|
||||
|
||||
def test_dataset_has_500_records(self):
|
||||
self.assertEqual(len(load_records()), 500)
|
||||
|
||||
def test_schema_and_domain(self):
|
||||
for idx, record in enumerate(load_records(), start=1):
|
||||
for key in ("terse", "rich", "domain"):
|
||||
self.assertIn(key, record, f"line {idx}: missing {key}")
|
||||
self.assertIsInstance(record[key], str, f"line {idx}: {key} must be a string")
|
||||
self.assertTrue(record[key].strip(), f"line {idx}: {key} is empty")
|
||||
self.assertEqual(record["domain"], "video scenes", f"line {idx}: wrong domain")
|
||||
|
||||
def test_records_are_unique(self):
|
||||
records = load_records()
|
||||
serialized = [json.dumps(record, sort_keys=True) for record in records]
|
||||
self.assertEqual(len(serialized), len(set(serialized)), "duplicate full records found")
|
||||
|
||||
def test_rich_prompts_include_video_scene_markers(self):
|
||||
for idx, record in enumerate(load_records(), start=1):
|
||||
rich = record["rich"].lower()
|
||||
self.assertIn("lighting", rich, f"line {idx}: rich prompt missing lighting")
|
||||
self.assertIn("composition", rich, f"line {idx}: rich prompt missing composition")
|
||||
self.assertIn("transition", rich, f"line {idx}: rich prompt missing transition")
|
||||
|
||||
def test_verification_doc_exists_and_mentions_evidence(self):
|
||||
self.assertTrue(DOC_PATH.exists(), f"Missing verification doc: {DOC_PATH}")
|
||||
text = DOC_PATH.read_text(encoding="utf-8")
|
||||
self.assertIn("Issue #605 Verification", text)
|
||||
self.assertIn("training/data/prompt-enhancement/video-scenes-500.jsonl", text)
|
||||
self.assertIn("#755", text)
|
||||
self.assertIn("already implemented on `main`", text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user