Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 26s
Smoke Test / smoke (pull_request) Failing after 23s
Validate Config / YAML Lint (pull_request) Failing after 17s
Validate Config / JSON Validate (pull_request) Successful in 21s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 1m11s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Shell Script Lint (pull_request) Failing after 1m11s
Validate Config / Cron Syntax Check (pull_request) Successful in 15s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 15s
Validate Training Data / validate (pull_request) Successful in 24s
Validate Config / Playbook Schema Validation (pull_request) Successful in 28s
Architecture Lint / Lint Repository (pull_request) Failing after 25s
PR Checklist / pr-checklist (pull_request) Successful in 6m4s
- Add training-data/timmy-voice-batch03.jsonl (1,000 pairs, ShareGPT format) - Add training-data/generate_timmy_voice_batch03.py (deterministic generator) - Add training-data/validate_timmy_voice.py (SOUL.md compliance checker) - Add training-data/README-batch03.md (batch documentation) All pairs quality score ≥ 0.80, avg 0.83. Categories: hermes (427), sovereignty (464), crisis (109). Crisis prompts include 988 protocol. Closes #583
106 lines
3.0 KiB
Python
106 lines
3.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
validate_timmy_voice.py — Validate timmy-voice training data for quality and compliance.
|
|
|
|
Usage:
|
|
python3 validate_timmy_voice.py training-data/timmy-voice-batch03.jsonl
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def validate_entry(entry: dict, idx: int) -> list[str]:
|
|
errors = []
|
|
|
|
# Required fields
|
|
for field in ["id", "model", "batch", "source", "quality_score", "conversations"]:
|
|
if field not in entry:
|
|
errors.append(f"[{idx}] Missing field: {field}")
|
|
|
|
# Conversations structure
|
|
conv = entry.get("conversations", [])
|
|
if len(conv) != 3:
|
|
errors.append(f"[{idx}] Expected 3 conversations, got {len(conv)}")
|
|
|
|
roles = [c.get("from") for c in conv]
|
|
if roles != ["system", "human", "gpt"]:
|
|
errors.append(f"[{idx}] Expected roles [system, human, gpt], got {roles}")
|
|
|
|
# Quality threshold
|
|
score = entry.get("quality_score", 0)
|
|
if score < 0.8:
|
|
errors.append(f"[{idx}] Quality score {score} below threshold 0.8")
|
|
|
|
# Response length (Timmy prefers short)
|
|
response = conv[2]["value"] if len(conv) > 2 else ""
|
|
words = len(response.split())
|
|
if words > 100:
|
|
errors.append(f"[{idx}] Response too long ({words} words)")
|
|
|
|
# SOUL.md compliance check — crisis protocol
|
|
prompt = conv[1]["value"] if len(conv) > 1 else ""
|
|
crisis_words = ["suicide", "kill myself", "end it all", "don't want to live", "hurt myself"]
|
|
if any(c in prompt.lower() for c in crisis_words):
|
|
if "988" not in response and "are you safe" not in response.lower():
|
|
errors.append(f"[{idx}] CRISIS FAIL: Missing 988 or safety check")
|
|
|
|
return errors
|
|
|
|
|
|
def validate_file(path: Path) -> dict:
|
|
lines = path.read_text().strip().splitlines()
|
|
all_errors = []
|
|
total = len(lines)
|
|
valid = 0
|
|
|
|
for i, line in enumerate(lines, 1):
|
|
try:
|
|
entry = json.loads(line)
|
|
except json.JSONDecodeError as e:
|
|
all_errors.append(f"[{i}] JSON parse error: {e}")
|
|
continue
|
|
|
|
errors = validate_entry(entry, i)
|
|
if errors:
|
|
all_errors.extend(errors)
|
|
else:
|
|
valid += 1
|
|
|
|
return {
|
|
"total": total,
|
|
"valid": valid,
|
|
"errors": all_errors,
|
|
"pass_rate": valid / total if total else 0,
|
|
}
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print(f"Usage: {sys.argv[0]} <jsonl_file>")
|
|
sys.exit(1)
|
|
|
|
path = Path(sys.argv[1])
|
|
result = validate_file(path)
|
|
|
|
print(f"Validated: {path}")
|
|
print(f"Total entries: {result['total']}")
|
|
print(f"Valid entries: {result['valid']}")
|
|
print(f"Pass rate: {result['pass_rate']:.1%}")
|
|
|
|
if result["errors"]:
|
|
print(f"\nErrors ({len(result['errors'])}):")
|
|
for err in result["errors"][:20]:
|
|
print(f" {err}")
|
|
if len(result["errors"]) > 20:
|
|
print(f" ... and {len(result['errors']) - 20} more")
|
|
sys.exit(1)
|
|
else:
|
|
print("\nAll entries passed validation.")
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|