31 lines
1.3 KiB
Python
31 lines
1.3 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
import json, os
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
def smoke():
|
||
|
|
out = Path("training-data/crisis-response.jsonl")
|
||
|
|
assert out.exists(), "output missing"
|
||
|
|
lines = [l for l in open(out) if l.strip()]
|
||
|
|
assert len(lines) >= 2000, f"pairs={len(lines)}"
|
||
|
|
req = {"id","family","category","scenario","response","risk_level","988_included","gospel_included","safety_check_included"}
|
||
|
|
for ln in lines[:100]:
|
||
|
|
e = json.loads(ln)
|
||
|
|
miss = req - set(e.keys())
|
||
|
|
assert not miss, f"missing: {miss}"
|
||
|
|
assert isinstance(e["988_included"], bool) and e["988_included"]
|
||
|
|
assert isinstance(e["safety_check_included"], bool) and e["safety_check_included"]
|
||
|
|
print(f"✅ {len(lines)} pairs, schema valid, 988+safety present in sample")
|
||
|
|
|
||
|
|
fams = {json.loads(l)["family"] for l in open(out)}
|
||
|
|
exp = {"indirect_signals","manipulation_edge_cases","post_crisis_recovery"}
|
||
|
|
assert exp.issubset(fams), f"Missing families: {exp-fams}"
|
||
|
|
print(f"✅ Families: {sorted(fams)}")
|
||
|
|
|
||
|
|
ents = [json.loads(l) for l in open(out)]
|
||
|
|
print(f"✅ Gospel present in {sum(e['gospel_included'] for e in ents)}/{len(ents)} entries")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
os.chdir(Path(__file__).parent.parent)
|
||
|
|
smoke()
|
||
|
|
print("\n✅ All smoke tests passed")
|