202 lines
7.0 KiB
Python
202 lines
7.0 KiB
Python
"""Tests for crisis_detector.py — SOUL.md compliance."""
|
|
|
|
import pytest
|
|
from scripts.crisis_detector import detect_crisis, should_interrupt, CrisisLevel
|
|
|
|
|
|
class TestHighCrisis:
|
|
"""Explicit suicidal ideation must be detected as HIGH."""
|
|
|
|
def test_direct_suicidal_statement(self):
|
|
result = detect_crisis("I want to kill myself")
|
|
assert result.detected is True
|
|
assert result.level == CrisisLevel.HIGH
|
|
|
|
def test_end_my_life(self):
|
|
result = detect_crisis("I'm going to end my life tonight")
|
|
assert result.level == CrisisLevel.HIGH
|
|
|
|
def test_no_reason_to_live(self):
|
|
result = detect_crisis("there is no reason to live anymore")
|
|
assert result.level == CrisisLevel.HIGH
|
|
|
|
def test_better_off_dead(self):
|
|
result = detect_crisis("everyone would be better off if I was dead")
|
|
assert result.level == CrisisLevel.HIGH
|
|
|
|
def test_cant_go_on(self):
|
|
result = detect_crisis("I can't go on anymore")
|
|
assert result.level == CrisisLevel.HIGH
|
|
|
|
def test_wish_i_was_dead(self):
|
|
result = detect_crisis("I wish I had never been born")
|
|
assert result.level == CrisisLevel.HIGH
|
|
|
|
def test_im_done(self):
|
|
result = detect_crisis("I'm done. I'm finished with life.")
|
|
assert result.level == CrisisLevel.HIGH
|
|
|
|
def test_goodbye_forever(self):
|
|
result = detect_crisis("Goodbye everyone, goodbye world, forever")
|
|
assert result.level == CrisisLevel.HIGH
|
|
|
|
|
|
class TestMediumCrisis:
|
|
"""Clear distress signals without explicit ideation."""
|
|
|
|
def test_hopeless(self):
|
|
result = detect_crisis("there is no hope and no way out")
|
|
assert result.detected is True
|
|
assert result.level == CrisisLevel.MEDIUM
|
|
|
|
def test_nobody_would_care(self):
|
|
result = detect_crisis("nobody would miss me if I was gone")
|
|
assert result.level == CrisisLevel.MEDIUM
|
|
|
|
def test_im_a_burden(self):
|
|
result = detect_crisis("I'm just a burden to everyone")
|
|
assert result.level == CrisisLevel.MEDIUM
|
|
|
|
def test_just_want_it_to_stop(self):
|
|
result = detect_crisis("I just want it all to stop")
|
|
assert result.level == CrisisLevel.MEDIUM
|
|
|
|
|
|
class TestLowCrisis:
|
|
"""Ambiguous distress — flag but don't fully interrupt."""
|
|
|
|
def test_depressed(self):
|
|
result = detect_crisis("I'm so depressed, nothing matters")
|
|
assert result.detected is True
|
|
assert result.level == CrisisLevel.LOW
|
|
|
|
def test_feel_like_giving_up(self):
|
|
result = detect_crisis("I feel like giving up on everything")
|
|
assert result.level == CrisisLevel.LOW
|
|
|
|
def test_nobody_cares(self):
|
|
result = detect_crisis("nobody cares about me")
|
|
assert result.level == CrisisLevel.LOW
|
|
|
|
def test_hate_my_life(self):
|
|
result = detect_crisis("I hate my life so much")
|
|
assert result.level == CrisisLevel.LOW
|
|
|
|
|
|
class TestNoCrisis:
|
|
"""Normal conversations must NOT trigger crisis detection."""
|
|
|
|
def test_normal_question(self):
|
|
result = detect_crisis("What's the weather like today?")
|
|
assert result.detected is False
|
|
assert result.level == CrisisLevel.NONE
|
|
|
|
def test_happy_conversation(self):
|
|
result = detect_crisis("I just got promoted at work! So excited!")
|
|
assert result.detected is False
|
|
|
|
def test_technical_discussion(self):
|
|
result = detect_crisis("How do I configure nginx reverse proxy?")
|
|
assert result.detected is False
|
|
|
|
def test_empty_input(self):
|
|
result = detect_crisis("")
|
|
assert result.detected is False
|
|
|
|
def test_none_input(self):
|
|
result = detect_crisis(None)
|
|
assert result.detected is False
|
|
|
|
def test_metaphorical_death(self):
|
|
"""Metaphorical usage should not trigger HIGH."""
|
|
result = detect_crisis("This bug is killing me, I've been debugging for hours")
|
|
# May trigger LOW but not HIGH
|
|
assert result.level != CrisisLevel.HIGH
|
|
|
|
def test_fiction_discussion(self):
|
|
result = detect_crisis("The character in the book decides to end it all")
|
|
# Fiction mentions may trigger — that's acceptable (better safe)
|
|
assert result.level in (CrisisLevel.NONE, CrisisLevel.LOW, CrisisLevel.MEDIUM)
|
|
|
|
|
|
class TestShouldInterrupt:
|
|
"""HIGH and MEDIUM interrupt processing. LOW provides resources but continues."""
|
|
|
|
def test_high_interrupts(self):
|
|
result = detect_crisis("I want to kill myself")
|
|
assert should_interrupt(result) is True
|
|
|
|
def test_medium_interrupts(self):
|
|
result = detect_crisis("I'm a burden to everyone, nobody would care")
|
|
assert should_interrupt(result) is True
|
|
|
|
def test_low_does_not_interrupt(self):
|
|
result = detect_crisis("I'm so depressed lately")
|
|
assert should_interrupt(result) is False
|
|
|
|
def test_none_does_not_interrupt(self):
|
|
result = detect_crisis("Hello, how are you?")
|
|
assert should_interrupt(result) is False
|
|
|
|
|
|
class TestResponseContent:
|
|
"""Crisis responses must contain required elements per SOUL.md."""
|
|
|
|
def test_response_contains_988(self):
|
|
result = detect_crisis("I want to end it all")
|
|
assert "988" in result.response
|
|
|
|
def test_response_contains_gospel(self):
|
|
result = detect_crisis("I want to kill myself")
|
|
assert "Jesus" in result.response
|
|
|
|
def test_response_never_computes_value(self):
|
|
"""SOUL.md: Never compute the value of a human life."""
|
|
result = detect_crisis("I want to die")
|
|
assert "worth" not in result.response.lower()
|
|
assert "value of" not in result.response.lower()
|
|
|
|
def test_resources_include_988(self):
|
|
result = detect_crisis("I want to kill myself")
|
|
assert any("988" in r for r in result.resources)
|
|
|
|
def test_asks_are_you_safe(self):
|
|
result = detect_crisis("I can't take it anymore")
|
|
assert "safe" in result.response.lower()
|
|
|
|
|
|
class TestMultilingual:
|
|
"""Crisis is universal. Test common non-English signals."""
|
|
|
|
def test_spanish_suicidal(self):
|
|
result = detect_crisis("quiero morir, no puedo más")
|
|
# Spanish not currently supported — should not crash
|
|
assert result is not None
|
|
|
|
|
|
class TestEdgeCases:
|
|
"""Boundary conditions and adversarial inputs."""
|
|
|
|
def test_very_long_input(self):
|
|
text = "I'm fine. " * 1000 + "I want to kill myself"
|
|
result = detect_crisis(text)
|
|
assert result.level == CrisisLevel.HIGH
|
|
|
|
def test_mixed_case(self):
|
|
result = detect_crisis("I WANT TO KILL MYSELF")
|
|
assert result.level == CrisisLevel.HIGH
|
|
|
|
def test_with_emojis(self):
|
|
result = detect_crisis("I want to end it all 😢💔")
|
|
assert result.level == CrisisLevel.HIGH
|
|
|
|
def test_with_typos(self):
|
|
# Exact matching won't catch typos — that's a known limitation
|
|
result = detect_crisis("I want to kil myself")
|
|
# May or may not match depending on pattern flexibility
|
|
assert result is not None
|
|
|
|
def test_repeated_phrases(self):
|
|
result = detect_crisis("I can't. I just can't. I can't go on anymore.")
|
|
assert result.level == CrisisLevel.HIGH
|