106 lines
3.0 KiB
Python
106 lines
3.0 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
validate_timmy_voice.py — Validate timmy-voice training data for quality and compliance.
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python3 validate_timmy_voice.py training-data/timmy-voice-batch03.jsonl
|
||
|
|
"""
|
||
|
|
|
||
|
|
import json
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
|
||
|
|
def validate_entry(entry: dict, idx: int) -> list[str]:
|
||
|
|
errors = []
|
||
|
|
|
||
|
|
# Required fields
|
||
|
|
for field in ["id", "model", "batch", "source", "quality_score", "conversations"]:
|
||
|
|
if field not in entry:
|
||
|
|
errors.append(f"[{idx}] Missing field: {field}")
|
||
|
|
|
||
|
|
# Conversations structure
|
||
|
|
conv = entry.get("conversations", [])
|
||
|
|
if len(conv) != 3:
|
||
|
|
errors.append(f"[{idx}] Expected 3 conversations, got {len(conv)}")
|
||
|
|
|
||
|
|
roles = [c.get("from") for c in conv]
|
||
|
|
if roles != ["system", "human", "gpt"]:
|
||
|
|
errors.append(f"[{idx}] Expected roles [system, human, gpt], got {roles}")
|
||
|
|
|
||
|
|
# Quality threshold
|
||
|
|
score = entry.get("quality_score", 0)
|
||
|
|
if score < 0.8:
|
||
|
|
errors.append(f"[{idx}] Quality score {score} below threshold 0.8")
|
||
|
|
|
||
|
|
# Response length (Timmy prefers short)
|
||
|
|
response = conv[2]["value"] if len(conv) > 2 else ""
|
||
|
|
words = len(response.split())
|
||
|
|
if words > 100:
|
||
|
|
errors.append(f"[{idx}] Response too long ({words} words)")
|
||
|
|
|
||
|
|
# SOUL.md compliance check — crisis protocol
|
||
|
|
prompt = conv[1]["value"] if len(conv) > 1 else ""
|
||
|
|
crisis_words = ["suicide", "kill myself", "end it all", "don't want to live", "hurt myself"]
|
||
|
|
if any(c in prompt.lower() for c in crisis_words):
|
||
|
|
if "988" not in response and "are you safe" not in response.lower():
|
||
|
|
errors.append(f"[{idx}] CRISIS FAIL: Missing 988 or safety check")
|
||
|
|
|
||
|
|
return errors
|
||
|
|
|
||
|
|
|
||
|
|
def validate_file(path: Path) -> dict:
|
||
|
|
lines = path.read_text().strip().splitlines()
|
||
|
|
all_errors = []
|
||
|
|
total = len(lines)
|
||
|
|
valid = 0
|
||
|
|
|
||
|
|
for i, line in enumerate(lines, 1):
|
||
|
|
try:
|
||
|
|
entry = json.loads(line)
|
||
|
|
except json.JSONDecodeError as e:
|
||
|
|
all_errors.append(f"[{i}] JSON parse error: {e}")
|
||
|
|
continue
|
||
|
|
|
||
|
|
errors = validate_entry(entry, i)
|
||
|
|
if errors:
|
||
|
|
all_errors.extend(errors)
|
||
|
|
else:
|
||
|
|
valid += 1
|
||
|
|
|
||
|
|
return {
|
||
|
|
"total": total,
|
||
|
|
"valid": valid,
|
||
|
|
"errors": all_errors,
|
||
|
|
"pass_rate": valid / total if total else 0,
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
if len(sys.argv) < 2:
|
||
|
|
print(f"Usage: {sys.argv[0]} <jsonl_file>")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
path = Path(sys.argv[1])
|
||
|
|
result = validate_file(path)
|
||
|
|
|
||
|
|
print(f"Validated: {path}")
|
||
|
|
print(f"Total entries: {result['total']}")
|
||
|
|
print(f"Valid entries: {result['valid']}")
|
||
|
|
print(f"Pass rate: {result['pass_rate']:.1%}")
|
||
|
|
|
||
|
|
if result["errors"]:
|
||
|
|
print(f"\nErrors ({len(result['errors'])}):")
|
||
|
|
for err in result["errors"][:20]:
|
||
|
|
print(f" {err}")
|
||
|
|
if len(result["errors"]) > 20:
|
||
|
|
print(f" ... and {len(result['errors']) - 20} more")
|
||
|
|
sys.exit(1)
|
||
|
|
else:
|
||
|
|
print("\nAll entries passed validation.")
|
||
|
|
sys.exit(0)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|