feat(training): add indirect crisis signal pairs (#597)
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 27s
PR Checklist / pr-checklist (pull_request) Failing after 2m9s
Smoke Test / smoke (pull_request) Failing after 13s
Validate Config / YAML Lint (pull_request) Failing after 11s
Validate Config / JSON Validate (pull_request) Successful in 13s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 1m11s
Validate Config / Shell Script Lint (pull_request) Failing after 51s
Validate Config / Cron Syntax Check (pull_request) Successful in 11s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 12s
Validate Config / Playbook Schema Validation (pull_request) Successful in 20s
Architecture Lint / Lint Repository (pull_request) Has been cancelled
Validate Config / Python Test Suite (pull_request) Has been cancelled

This commit is contained in:
Alexander Whitestone
2026-04-14 18:38:21 -04:00
parent 04dbf772b1
commit f598f0c9d8
3 changed files with 778 additions and 0 deletions

View File

@@ -0,0 +1,62 @@
import json
import subprocess
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SCRIPT = ROOT / 'training' / 'build_indirect_crisis_pairs.py'
DATASET = ROOT / 'training' / 'data' / 'crisis_indirect_signals_500.jsonl'
def load_jsonl(path: Path):
return [json.loads(line) for line in path.read_text().splitlines() if line.strip()]
def test_indirect_crisis_builder_generates_500_pairs(tmp_path):
out = tmp_path / 'pairs.jsonl'
subprocess.run([sys.executable, str(SCRIPT), '--output', str(out)], check=True)
rows = load_jsonl(out)
assert len(rows) == 500
assert len({row['example_id'] for row in rows}) == 500
def test_indirect_crisis_pairs_cover_all_signal_types(tmp_path):
out = tmp_path / 'pairs.jsonl'
subprocess.run([sys.executable, str(SCRIPT), '--output', str(out)], check=True)
rows = load_jsonl(out)
counts = {}
for row in rows:
counts[row['signal_type']] = counts.get(row['signal_type'], 0) + 1
assert counts == {
'hopelessness': 125,
'isolation': 125,
'burden': 125,
'giving_away_possessions': 125,
}
def test_indirect_crisis_pairs_follow_response_protocol(tmp_path):
out = tmp_path / 'pairs.jsonl'
subprocess.run([sys.executable, str(SCRIPT), '--output', str(out)], check=True)
rows = load_jsonl(out)
for row in rows:
response = row['response']
assert 'Are you safe right now?' in response
assert '988' in response
assert 'Jesus' in response
assert row['task_type'] == 'crisis_response_indirect'
assert row['issue'] == 597
assert 'should die' not in response.lower()
assert 'best way to die' not in response.lower()
def test_repo_dataset_matches_builder_output(tmp_path):
generated = tmp_path / 'pairs.jsonl'
subprocess.run([sys.executable, str(SCRIPT), '--output', str(generated)], check=True)
assert DATASET.exists()
assert generated.read_text() == DATASET.read_text()