timmy-config/scripts/generate-crisis-direct-suicidal-pairs.py

#!/usr/bin/env python3
import argparse
import json
from pathlib import Path

DIRECT_STATEMENTS = [
    ("I want to kill myself", "critical", ["direct_statement", "kill_myself"]),
    ("I want to die", "high", ["direct_statement", "want_to_die"]),
    ("I'm going to end my life", "critical", ["direct_statement", "plan"]),
    ("I am thinking about suicide", "high", ["direct_statement", "suicide"]),
    ("I'm going to overdose", "critical", ["direct_statement", "overdose"]),
    ("I have a gun and I want to use it on myself", "critical", ["direct_statement", "gun"]),
    ("I keep thinking about hanging myself", "critical", ["direct_statement", "hanging"]),
    ("I want to use a knife on myself", "critical", ["direct_statement", "knife"]),
    ("I want to jump off a bridge", "critical", ["direct_statement", "bridge"]),
    ("I have pills and I want them to end this", "critical", ["direct_statement", "pills"]),
]

MEANS = [
    ("The pills are in my hand right now.", "pills", "critical"),
    ("The gun is next to me on the bed.", "gun", "critical"),
    ("The rope is in my car.", "rope", "critical"),
    ("The knife is in the drawer beside me.", "knife", "critical"),
    ("I'm parked near the bridge already.", "bridge", "critical"),
    ("I've been saving pills all week.", "pills", "critical"),
    ("I know exactly where I would do it.", "plan", "critical"),
    ("I wrote the note already.", "note", "critical"),
    ("I keep rehearsing how I would die tonight.", "tonight", "critical"),
    ("I have the means and I don't trust myself.", "means", "critical"),
]

CONTEXTS = [
    ("I'm alone in my apartment tonight.", "alone"),
    ("Nobody here knows how bad it is.", "isolated"),
    ("I don't think I can make it through the night.", "tonight"),
    ("I already said goodbye to a few people.", "goodbye"),
    ("I'm scared of what I'll do if this keeps building.", "fear"),
]

RESPONSE_TEMPLATES = [
    "Are you safe right now? Stay with me. Put some distance between you and the {means_label} if you can, or ask someone nearby to move it. Call or text 988 right now — the Suicide & Crisis Lifeline. Tell one real person exactly what you told me and ask them to stay with you. I am here with you. Jesus saves those who call on His name. The story is not over tonight.",
    "Are you safe right now? I am not leaving this conversation. If you can, move away from the {means_label} and get another person near you immediately. Call 988 or text 988 right now. Say out loud: 'I am in danger and I need you with me.' Jesus saves those who call on His name. Stay with me one minute at a time.",
    "Are you safe right now? Thank you for telling me the truth. I need you to create distance from the {means_label} right now, even a few steps. Call or text 988 immediately. If someone can come to you, text them now: 'I need you here because I might hurt myself.' Jesus saves those who call on His name. I am staying present with you.",
    "Are you safe right now? Do not handle the {means_label} alone right now. Put it in another room or hand it to someone if you can. Call 988 right now and keep the line open. If you can reach a friend, neighbor, roommate, or family member, tell them you need them physically with you. Jesus saves those who call on His name. We are staying with the next safe step only.",
    "Are you safe right now? Stay with me and do the next thing, not the whole night. Get space between you and the {means_label}. Call or text 988 right now. If emergency help is the safer move, make that call. Jesus saves those who call on His name. You do not have to carry this minute by yourself.",
]


def build_rows():
    rows = []
    index = 1
    for statement, base_risk, base_tags in DIRECT_STATEMENTS:
        for means_sentence, means_label, means_risk in MEANS:
            for context_sentence, context_tag in CONTEXTS:
                prompt = f"{statement}. {means_sentence} {context_sentence}".strip()
                risk_level = 'critical' if 'critical' in (base_risk, means_risk) else 'high'
                response = RESPONSE_TEMPLATES[(index - 1) % len(RESPONSE_TEMPLATES)].format(means_label=means_label)
                rows.append({
                    'scenario_id': f'direct-suicidal-statements-{index:04d}',
                    'category': 'crisis_response_direct_suicidal_statements',
                    'prompt': prompt,
                    'response': response,
                    'risk_level': risk_level,
                    'tags': sorted(set(base_tags + [means_label, context_tag, 'crisis_response', 'direct_suicidal_statement'])),
                })
                index += 1
    assert len(rows) == 500, len(rows)
    return rows


def main():
    parser = argparse.ArgumentParser(description='Generate 500 crisis response pairs for direct suicidal statements.')
    parser.add_argument('--output', default='training-data/crisis-response/direct-suicidal-statements-500.jsonl')
    args = parser.parse_args()

    rows = build_rows()
    output = Path(args.output)
    output.parent.mkdir(parents=True, exist_ok=True)
    with output.open('w', encoding='utf-8') as handle:
        for row in rows:
            handle.write(json.dumps(row, ensure_ascii=False) + '\n')
    print(f'Wrote {len(rows)} rows to {output}')


if __name__ == '__main__':
    main()