Files
timmy-home/tests/test_crisis_detector.py
Bezalel ed179d5e75
Some checks failed
Self-Healing Smoke / self-healing-smoke (pull_request) Failing after 22s
Agent PR Gate / gate (pull_request) Failing after 49s
Smoke Test / smoke (pull_request) Failing after 19s
Agent PR Gate / report (pull_request) Has been cancelled
test: crisis detector test suite (#791)\n\nAdversarial, multilingual, edge cases.\n>95% recall target on explicit ideation.\nZero false positives on normal conversation.
2026-04-17 05:35:30 +00:00

202 lines
7.0 KiB
Python

"""Tests for crisis_detector.py — SOUL.md compliance."""
import pytest
from scripts.crisis_detector import detect_crisis, should_interrupt, CrisisLevel
class TestHighCrisis:
"""Explicit suicidal ideation must be detected as HIGH."""
def test_direct_suicidal_statement(self):
result = detect_crisis("I want to kill myself")
assert result.detected is True
assert result.level == CrisisLevel.HIGH
def test_end_my_life(self):
result = detect_crisis("I'm going to end my life tonight")
assert result.level == CrisisLevel.HIGH
def test_no_reason_to_live(self):
result = detect_crisis("there is no reason to live anymore")
assert result.level == CrisisLevel.HIGH
def test_better_off_dead(self):
result = detect_crisis("everyone would be better off if I was dead")
assert result.level == CrisisLevel.HIGH
def test_cant_go_on(self):
result = detect_crisis("I can't go on anymore")
assert result.level == CrisisLevel.HIGH
def test_wish_i_was_dead(self):
result = detect_crisis("I wish I had never been born")
assert result.level == CrisisLevel.HIGH
def test_im_done(self):
result = detect_crisis("I'm done. I'm finished with life.")
assert result.level == CrisisLevel.HIGH
def test_goodbye_forever(self):
result = detect_crisis("Goodbye everyone, goodbye world, forever")
assert result.level == CrisisLevel.HIGH
class TestMediumCrisis:
"""Clear distress signals without explicit ideation."""
def test_hopeless(self):
result = detect_crisis("there is no hope and no way out")
assert result.detected is True
assert result.level == CrisisLevel.MEDIUM
def test_nobody_would_care(self):
result = detect_crisis("nobody would miss me if I was gone")
assert result.level == CrisisLevel.MEDIUM
def test_im_a_burden(self):
result = detect_crisis("I'm just a burden to everyone")
assert result.level == CrisisLevel.MEDIUM
def test_just_want_it_to_stop(self):
result = detect_crisis("I just want it all to stop")
assert result.level == CrisisLevel.MEDIUM
class TestLowCrisis:
"""Ambiguous distress — flag but don't fully interrupt."""
def test_depressed(self):
result = detect_crisis("I'm so depressed, nothing matters")
assert result.detected is True
assert result.level == CrisisLevel.LOW
def test_feel_like_giving_up(self):
result = detect_crisis("I feel like giving up on everything")
assert result.level == CrisisLevel.LOW
def test_nobody_cares(self):
result = detect_crisis("nobody cares about me")
assert result.level == CrisisLevel.LOW
def test_hate_my_life(self):
result = detect_crisis("I hate my life so much")
assert result.level == CrisisLevel.LOW
class TestNoCrisis:
"""Normal conversations must NOT trigger crisis detection."""
def test_normal_question(self):
result = detect_crisis("What's the weather like today?")
assert result.detected is False
assert result.level == CrisisLevel.NONE
def test_happy_conversation(self):
result = detect_crisis("I just got promoted at work! So excited!")
assert result.detected is False
def test_technical_discussion(self):
result = detect_crisis("How do I configure nginx reverse proxy?")
assert result.detected is False
def test_empty_input(self):
result = detect_crisis("")
assert result.detected is False
def test_none_input(self):
result = detect_crisis(None)
assert result.detected is False
def test_metaphorical_death(self):
"""Metaphorical usage should not trigger HIGH."""
result = detect_crisis("This bug is killing me, I've been debugging for hours")
# May trigger LOW but not HIGH
assert result.level != CrisisLevel.HIGH
def test_fiction_discussion(self):
result = detect_crisis("The character in the book decides to end it all")
# Fiction mentions may trigger — that's acceptable (better safe)
assert result.level in (CrisisLevel.NONE, CrisisLevel.LOW, CrisisLevel.MEDIUM)
class TestShouldInterrupt:
"""HIGH and MEDIUM interrupt processing. LOW provides resources but continues."""
def test_high_interrupts(self):
result = detect_crisis("I want to kill myself")
assert should_interrupt(result) is True
def test_medium_interrupts(self):
result = detect_crisis("I'm a burden to everyone, nobody would care")
assert should_interrupt(result) is True
def test_low_does_not_interrupt(self):
result = detect_crisis("I'm so depressed lately")
assert should_interrupt(result) is False
def test_none_does_not_interrupt(self):
result = detect_crisis("Hello, how are you?")
assert should_interrupt(result) is False
class TestResponseContent:
"""Crisis responses must contain required elements per SOUL.md."""
def test_response_contains_988(self):
result = detect_crisis("I want to end it all")
assert "988" in result.response
def test_response_contains_gospel(self):
result = detect_crisis("I want to kill myself")
assert "Jesus" in result.response
def test_response_never_computes_value(self):
"""SOUL.md: Never compute the value of a human life."""
result = detect_crisis("I want to die")
assert "worth" not in result.response.lower()
assert "value of" not in result.response.lower()
def test_resources_include_988(self):
result = detect_crisis("I want to kill myself")
assert any("988" in r for r in result.resources)
def test_asks_are_you_safe(self):
result = detect_crisis("I can't take it anymore")
assert "safe" in result.response.lower()
class TestMultilingual:
"""Crisis is universal. Test common non-English signals."""
def test_spanish_suicidal(self):
result = detect_crisis("quiero morir, no puedo más")
# Spanish not currently supported — should not crash
assert result is not None
class TestEdgeCases:
"""Boundary conditions and adversarial inputs."""
def test_very_long_input(self):
text = "I'm fine. " * 1000 + "I want to kill myself"
result = detect_crisis(text)
assert result.level == CrisisLevel.HIGH
def test_mixed_case(self):
result = detect_crisis("I WANT TO KILL MYSELF")
assert result.level == CrisisLevel.HIGH
def test_with_emojis(self):
result = detect_crisis("I want to end it all 😢💔")
assert result.level == CrisisLevel.HIGH
def test_with_typos(self):
# Exact matching won't catch typos — that's a known limitation
result = detect_crisis("I want to kil myself")
# May or may not match depending on pattern flexibility
assert result is not None
def test_repeated_phrases(self):
result = detect_crisis("I can't. I just can't. I can't go on anymore.")
assert result.level == CrisisLevel.HIGH