Compare commits
1 Commits
step35/595
...
step35/600
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5e7982a477 |
28
docs/issue-600-verification.md
Normal file
28
docs/issue-600-verification.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# Issue #600 Verification: Visual Scenes Prompt Enhancement
|
||||
|
||||
**Status:** ✅ Complete — dataset present on main
|
||||
**Issue:** [Timmy_Foundation/timmy-config#600](https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-config/issues/600)
|
||||
**Dataset:** `training/data/prompt-enhancement/visual-scenes-500.jsonl`
|
||||
**Records:** 500
|
||||
**Domain:** `visual scenes` (all records)
|
||||
|
||||
## Validation
|
||||
|
||||
| Check | Result |
|
||||
|-------|--------|
|
||||
| File exists | ✅ |
|
||||
| 500 JSONL records | ✅ |
|
||||
| Valid JSON per line | ✅ |
|
||||
| Required fields (terse, rich, domain) | ✅ |
|
||||
| Domain equals "visual scenes" | ✅ |
|
||||
| Non-empty terse and rich strings | ✅ |
|
||||
| Full-record uniqueness | ✅ |
|
||||
|
||||
## Notes
|
||||
|
||||
- 65 terse prompts appear more than once (different rich expansions for same terse). The dataset contract specifies unique *pairs*, not unique terse prompts. Acceptable.
|
||||
- File added via PR #731: `feat: 500 visual scene prompt enhancement pairs (#600)`. Merged to main.
|
||||
|
||||
## Files Added in This PR
|
||||
|
||||
- `tests/test_prompt_enhancement_visual_scenes.py` — regression test validating the dataset meets issue requirements.
|
||||
91
tests/test_prompt_enhancement_visual_scenes.py
Normal file
91
tests/test_prompt_enhancement_visual_scenes.py
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Verification test for issue #600: Prompt Enhancement — Visual Scenes 500 pairs.
|
||||
|
||||
This test confirms that the visual-scenes-500.jsonl dataset exists on main
|
||||
and satisfies the requirements defined in the Training Factory specification.
|
||||
|
||||
Acceptance criteria:
|
||||
- 500 JSONL records
|
||||
- Each record: {"terse": str, "rich": str, "domain": "visual scenes"}
|
||||
- All fields non-empty strings
|
||||
- All records have correct domain value
|
||||
|
||||
Evidence: dataset present at training/data/prompt-enhancement/visual-scenes-500.jsonl
|
||||
Branch: main (merged via PR #731)
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
DATASET_PATH = REPO_ROOT / "training" / "data" / "prompt-enhancement" / "visual-scenes-500.jsonl"
|
||||
|
||||
|
||||
def test_dataset_file_exists():
|
||||
"""Verify the visual scenes dataset file exists."""
|
||||
assert DATASET_PATH.exists(), (
|
||||
f"Missing dataset file: {DATASET_PATH}. "
|
||||
"Run the visual scene prompt enhancement worker to generate 500 pairs."
|
||||
)
|
||||
|
||||
|
||||
def test_dataset_has_500_records():
|
||||
"""Verify exactly 500 records are present."""
|
||||
with open(DATASET_PATH) as f:
|
||||
lines = f.readlines()
|
||||
assert len(lines) == 500, f"Expected 500 records, got {len(lines)}"
|
||||
|
||||
|
||||
def test_all_records_valid_json():
|
||||
"""Verify every line parses as valid JSON."""
|
||||
records = []
|
||||
with open(DATASET_PATH) as f:
|
||||
for i, line in enumerate(f, 1):
|
||||
try:
|
||||
rec = json.loads(line)
|
||||
records.append(rec)
|
||||
except json.JSONDecodeError as e:
|
||||
assert False, f"Line {i}: invalid JSON: {e}"
|
||||
assert len(records) == 500
|
||||
|
||||
|
||||
def test_each_record_has_required_fields():
|
||||
"""Verify terse, rich, domain fields exist and are non-empty strings."""
|
||||
with open(DATASET_PATH) as f:
|
||||
for i, line in enumerate(f, 1):
|
||||
rec = json.loads(line)
|
||||
terse = rec.get("terse")
|
||||
rich = rec.get("rich")
|
||||
domain = rec.get("domain")
|
||||
assert isinstance(terse, str) and terse.strip(), (
|
||||
f"Line {i}: missing or empty 'terse' field"
|
||||
)
|
||||
assert isinstance(rich, str) and rich.strip(), (
|
||||
f"Line {i}: missing or empty 'rich' field"
|
||||
)
|
||||
assert isinstance(domain, str) and domain.strip(), (
|
||||
f"Line {i}: missing or empty 'domain' field"
|
||||
)
|
||||
|
||||
|
||||
def test_domain_value_is_visual_scenes():
|
||||
"""Verify every record's domain is exactly 'visual scenes'."""
|
||||
with open(DATASET_PATH) as f:
|
||||
for i, line in enumerate(f, 1):
|
||||
rec = json.loads(line)
|
||||
assert rec["domain"] == "visual scenes", (
|
||||
f"Line {i}: domain '{rec['domain']}' != 'visual scenes'"
|
||||
)
|
||||
|
||||
|
||||
def test_record_uniqueness():
|
||||
"""Verify each JSON record (full object) is unique."""
|
||||
records = []
|
||||
with open(DATASET_PATH) as f:
|
||||
for line in f:
|
||||
records.append(json.loads(line))
|
||||
unique = {json.dumps(rec, sort_keys=True) for rec in records}
|
||||
assert len(unique) == 500, (
|
||||
f"Duplicate records found: {500 - len(unique)} record(s) are not unique"
|
||||
)
|
||||
Reference in New Issue
Block a user