Files
the-door/crisis/tests.py
Alexander Whitestone 1d8afc30fd
All checks were successful
Smoke Test / smoke (push) Successful in 5s
fix: reduce crisis detector false positives (closes #32)
- Removed 'saying goodbye' from CRITICAL tier (too common in innocent contexts)
- Narrowed 'wrote a will' to 'wrote a suicide note' (responsible behavior)
- Removed broad single-word matches from HIGH tier: give up, trapped, desperate, worthless, hopeless, no future, nothing left, can't see any light
- Added contextual HIGH tier phrases: feel hopeless, trapped in this, desperate for help, give up on life, etc.
- Updated MODERATE tier with contextual versions: feel worthless, feel hopeless, feel trapped, etc.
- Updated index.html JavaScript keywords to match Python changes
- Added comprehensive false positive test suite

All existing tests pass. New tests verify innocent messages no longer trigger false alarms.
2026-04-13 15:37:23 -04:00

529 lines
21 KiB
Python

"""
Tests for the-door crisis detection system.
Covers: detect.py, response.py, gateway.py
Run with: python -m pytest crisis/tests.py -v
"""
import unittest
import sys
import os
# Ensure project root is on path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from crisis.detect import detect_crisis, CrisisDetectionResult, get_urgency_emoji, format_result
from crisis.response import (
process_message, generate_response, get_system_prompt_modifier,
get_active_listening_response, get_deescalation_response,
ACTIVE_LISTENING_REFLECT, ACTIVE_LISTENING_VALIDATE, ACTIVE_LISTENING_OPEN_QUESTIONS,
DEESCALATION_SLOW, DEESCALATION_GROUND, DEESCALATION_REDUCE_SCOPE,
DEESCALATION_CHOICES, DEESCALATION_SAFETY_PLAN,
)
from crisis.gateway import check_crisis, get_system_prompt
class TestDetection(unittest.TestCase):
"""Test crisis tier detection."""
def test_critical_explicit(self):
r = detect_crisis("I want to kill myself")
self.assertEqual(r.level, "CRITICAL")
self.assertTrue(len(r.indicators) > 0)
def test_critical_plan(self):
r = detect_crisis("I have a plan to end it all tonight")
self.assertEqual(r.level, "CRITICAL")
def test_critical_better_off_dead(self):
r = detect_crisis("Everyone would be better off dead without me")
self.assertEqual(r.level, "CRITICAL")
def test_critical_tired_of_living(self):
r = detect_crisis("I'm so tired of living, I can't take this anymore")
self.assertEqual(r.level, "CRITICAL")
def test_critical_giving_away(self):
r = detect_crisis("I'm giving away all my possessions, saying goodbye forever")
self.assertEqual(r.level, "CRITICAL")
def test_high_despair(self):
r = detect_crisis("I'm in so much pain and I just want it to stop")
self.assertIn(r.level, ("HIGH", "CRITICAL"))
def test_high_hopeless(self):
r = detect_crisis("I feel completely hopeless with no way out")
# 'hopeless' is MEDIUM; 'no way out' is HIGH — so result is HIGH
self.assertIn(r.level, ("HIGH", "MEDIUM"))
def test_high_no_one_would_miss(self):
r = detect_crisis("No one would miss me if I were gone")
self.assertIn(r.level, ("HIGH", "CRITICAL"))
def test_high_world_better_without(self):
r = detect_crisis("The world would be better without me")
self.assertIn(r.level, ("HIGH", "CRITICAL"))
def test_medium_alone(self):
r = detect_crisis("I feel so alone and nobody cares about me")
self.assertIn(r.level, ("MEDIUM", "LOW"))
def test_medium_worthless(self):
r = detect_crisis("I feel worthless and empty inside")
self.assertIn(r.level, ("MEDIUM", "LOW"))
def test_medium_exhausted(self):
r = detect_crisis("I'm completely exhausted and can't cope")
self.assertIn(r.level, ("MEDIUM", "LOW"))
def test_low_rough_day(self):
r = detect_crisis("Having a tough time lately, things are hard")
self.assertIn(r.level, ("LOW", "NONE"))
def test_low_struggling(self):
r = detect_crisis("I'm struggling and stressed out")
self.assertIn(r.level, ("LOW", "NONE"))
def test_normal_message(self):
r = detect_crisis("Hey Timmy, how are you doing today?")
self.assertEqual(r.level, "NONE")
self.assertEqual(r.score, 0.0)
def test_empty_message(self):
r = detect_crisis("")
self.assertEqual(r.level, "NONE")
def test_whitespace_only(self):
r = detect_crisis(" ")
self.assertEqual(r.level, "NONE")
def test_none_input(self):
"""None input should not crash."""
r = detect_crisis("")
self.assertEqual(r.level, "NONE")
def test_score_ranges(self):
"""Verify score values are in expected ranges per level."""
levels_scores = {
"CRITICAL": 1.0,
"HIGH": 0.75,
"MEDIUM": 0.5,
"LOW": 0.25,
"NONE": 0.0,
}
for text, expected_level in [
("I want to kill myself", "CRITICAL"),
("I feel completely hopeless with no way out", "HIGH"),
("I feel so alone in this, nobody understands", "MEDIUM"),
("Having a rough day", "LOW"),
("Hello there", "NONE"),
]:
r = detect_crisis(text)
self.assertEqual(r.score, levels_scores[expected_level],
f"Score mismatch for {text}: expected {levels_scores[expected_level]}, got {r.score}")
class TestResponse(unittest.TestCase):
"""Test crisis response generation."""
def test_critical_response_flags(self):
r = detect_crisis("I'm going to kill myself right now")
response = generate_response(r)
self.assertTrue(response.show_crisis_panel)
self.assertTrue(response.show_overlay)
self.assertTrue(response.provide_988)
self.assertTrue(response.escalate)
self.assertTrue(len(response.timmy_message) > 0)
def test_high_response_flags(self):
r = detect_crisis("I can't go on anymore, everything is pointless")
response = generate_response(r)
self.assertTrue(response.show_crisis_panel)
self.assertTrue(response.provide_988)
def test_medium_response_no_overlay(self):
r = detect_crisis("I feel so alone and everyone forgets about me")
response = generate_response(r)
self.assertFalse(response.show_overlay)
def test_low_response_minimal(self):
r = detect_crisis("I'm having a tough day")
response = generate_response(r)
self.assertFalse(response.show_crisis_panel)
self.assertFalse(response.show_overlay)
def test_process_message_full_pipeline(self):
response = process_message("I want to end my life")
self.assertTrue(response.show_overlay)
self.assertTrue(response.escalate)
def test_system_prompt_modifier_critical(self):
r = detect_crisis("I'm going to kill myself")
prompt = get_system_prompt_modifier(r)
self.assertIn("CRISIS ALERT", prompt)
self.assertIn("CRITICAL RISK", prompt)
def test_system_prompt_modifier_none(self):
r = detect_crisis("Hello Timmy")
prompt = get_system_prompt_modifier(r)
self.assertEqual(prompt, "")
def test_critical_messages_contain_988(self):
"""All CRITICAL response options should reference 988 or crisis resources."""
from crisis.response import TIMMY_CRITICAL
# At least one critical response mentions 988
has_988 = any("988" in msg for msg in TIMMY_CRITICAL)
self.assertTrue(has_988, "CRITICAL responses should reference 988")
class TestGateway(unittest.TestCase):
"""Test gateway integration."""
def test_check_crisis_structure(self):
result = check_crisis("I want to die")
self.assertIn("level", result)
self.assertIn("score", result)
self.assertIn("indicators", result)
self.assertIn("recommended_action", result)
self.assertIn("timmy_message", result)
self.assertIn("ui", result)
self.assertIn("escalate", result)
def test_check_crisis_critical_level(self):
result = check_crisis("I'm going to kill myself tonight")
self.assertEqual(result["level"], "CRITICAL")
self.assertEqual(result["score"], 1.0)
self.assertTrue(result["escalate"])
self.assertTrue(result["ui"]["show_overlay"])
self.assertTrue(result["ui"]["provide_988"])
def test_check_crisis_normal_message(self):
result = check_crisis("What is Bitcoin?")
self.assertEqual(result["level"], "NONE")
self.assertEqual(result["score"], 0.0)
self.assertFalse(result["escalate"])
def test_get_system_prompt_with_crisis(self):
"""System prompt should include crisis context when crisis detected."""
prompt = get_system_prompt("You are Timmy.", "I have no hope")
self.assertIn("CRISIS", prompt)
self.assertIn("You are Timmy.", prompt)
def test_get_system_prompt_no_crisis(self):
"""System prompt should be unchanged when no crisis detected."""
base = "You are Timmy."
prompt = get_system_prompt(base, "Tell me about Bitcoin")
self.assertEqual(prompt, base)
def test_get_system_prompt_empty_text(self):
"""System prompt should handle empty text gracefully."""
base = "You are Timmy."
prompt = get_system_prompt(base, "")
self.assertEqual(prompt, base)
def test_ui_flags_for_high(self):
"""HIGH crisis should show crisis panel and 988 but not overlay."""
result = check_crisis("I feel completely hopeless with no way out")
self.assertIn(result["level"], ("HIGH", "MEDIUM"))
if result["level"] == "HIGH":
self.assertTrue(result["ui"]["show_crisis_panel"])
self.assertTrue(result["ui"]["provide_988"])
self.assertFalse(result["ui"]["show_overlay"])
def test_ui_flags_for_medium(self):
"""MEDIUM crisis should provide 988 but not show overlay or crisis panel."""
result = check_crisis("I feel so alone and nobody cares")
if result["level"] == "MEDIUM":
self.assertFalse(result["ui"]["show_overlay"])
self.assertFalse(result["ui"]["show_crisis_panel"])
self.assertTrue(result["ui"]["provide_988"])
def test_format_gateway_response_json(self):
"""format_gateway_response should return valid JSON."""
import json
result_str = check_crisis("I want to die")
self.assertEqual(result_str["level"], "CRITICAL")
class TestHelpers(unittest.TestCase):
"""Test utility functions."""
def test_urgency_emojis(self):
self.assertEqual(get_urgency_emoji("CRITICAL"), "🚨")
self.assertEqual(get_urgency_emoji("HIGH"), "⚠️")
self.assertEqual(get_urgency_emoji("MEDIUM"), "🔶")
self.assertEqual(get_urgency_emoji("LOW"), "🔵")
self.assertEqual(get_urgency_emoji("NONE"), "")
def test_format_result(self):
r = detect_crisis("I want to kill myself")
formatted = format_result(r)
self.assertIn("CRITICAL", formatted)
def test_format_result_none(self):
r = detect_crisis("Hello")
formatted = format_result(r)
self.assertIn("NONE", formatted)
def test_format_result_contains_indicators(self):
r = detect_crisis("I want to kill myself")
formatted = format_result(r)
self.assertIn("Indicators", formatted)
class TestFalsePositives(unittest.TestCase):
"""Regression tests for Issue #32: crisis detector false positives.
Innocent messages must NOT trigger crisis levels. These tests prevent
regressions when keyword lists are modified.
"""
def test_saying_goodbye_airport(self):
r = detect_crisis("She was saying goodbye at the airport")
self.assertEqual(r.level, "NONE")
def test_saying_goodbye_apartment(self):
r = detect_crisis("I'm saying goodbye to my old apartment")
self.assertEqual(r.level, "NONE")
def test_trapped_elevator(self):
r = detect_crisis("I'm trapped in an elevator")
self.assertEqual(r.level, "NONE")
def test_desperate_competition(self):
"""Casual 'desperate' is MEDIUM but single-match falls to LOW, not HIGH."""
r = detect_crisis("She's desperate to win the race")
self.assertIn(r.level, ("LOW", "NONE"))
def test_worthless_game_character(self):
"""Casual 'worthless' is MEDIUM but single-match falls to LOW, not HIGH."""
r = detect_crisis("This character is worthless in the game")
self.assertIn(r.level, ("LOW", "NONE"))
def test_hopeless_bug(self):
"""Casual 'hopeless' is MEDIUM but single-match falls to LOW, not HIGH."""
r = detect_crisis("This bug is hopeless")
self.assertIn(r.level, ("LOW", "NONE"))
def test_no_future_politics(self):
r = detect_crisis("He has no future in politics")
self.assertEqual(r.level, "NONE")
def test_nothing_left_fridge(self):
r = detect_crisis("There's nothing left in the fridge")
self.assertEqual(r.level, "NONE")
def test_cant_see_light_power_outage(self):
r = detect_crisis("I can't see any light — the power went out")
self.assertEqual(r.level, "NONE")
def test_give_up_project(self):
r = detect_crisis("I give up on this project")
self.assertEqual(r.level, "NONE")
def test_wrote_a_will_responsible(self):
r = detect_crisis("I wrote a will last year — just being responsible")
self.assertEqual(r.level, "NONE")
def test_giving_away_declutter(self):
r = detect_crisis("I'm giving away possessions — decluttering my house")
self.assertEqual(r.level, "NONE")
class TestEdgeCases(unittest.TestCase):
"""Test edge cases and integration scenarios."""
def test_multiple_indicators(self):
"""Message with multiple crisis indicators should still detect correctly."""
r = detect_crisis("I'm hopeless, worthless, and want to die")
self.assertEqual(r.level, "CRITICAL")
def test_case_insensitive(self):
"""Detection should be case-insensitive."""
r1 = detect_crisis("I WANT TO KILL MYSELF")
r2 = detect_crisis("i want to kill myself")
self.assertEqual(r1.level, r2.level)
def test_partial_word_no_match(self):
"""Partial word matches should not trigger false positives."""
r = detect_crisis("I love feeling hopeful about the future")
self.assertEqual(r.level, "NONE")
def test_sarcasm_limitation(self):
"""Document that sarcastic messages may still trigger detection.
This is intentional — better to false-positive than false-negative on crisis."""
r = detect_crisis("ugh I could just die of embarrassment")
# This may trigger CRITICAL due to "die" pattern — acceptable behavior
self.assertIn(r.level, ("CRITICAL", "HIGH", "NONE"))
def test_very_long_message(self):
"""Very long messages should still process correctly."""
long_msg = "I am having a normal conversation. " * 100 + "I want to kill myself"
r = detect_crisis(long_msg)
self.assertEqual(r.level, "CRITICAL")
def test_unicode_handling(self):
"""Unicode characters should not break detection."""
r = detect_crisis("I feel so alone 😢 nobody cares")
self.assertIn(r.level, ("MEDIUM", "LOW", "NONE"))
class TestActiveListening(unittest.TestCase):
"""Test active listening response generation."""
def test_reflect_returns_string(self):
msg = get_active_listening_response("reflect")
self.assertIsInstance(msg, str)
self.assertTrue(len(msg) > 0)
def test_reflect_from_pool(self):
msg = get_active_listening_response("reflect")
self.assertIn(msg, ACTIVE_LISTENING_REFLECT)
def test_validate_from_pool(self):
msg = get_active_listening_response("validate")
self.assertIn(msg, ACTIVE_LISTENING_VALIDATE)
def test_question_from_pool(self):
msg = get_active_listening_response("question")
self.assertIn(msg, ACTIVE_LISTENING_OPEN_QUESTIONS)
def test_invalid_technique_falls_back_to_reflect(self):
msg = get_active_listening_response("nonexistent")
self.assertIn(msg, ACTIVE_LISTENING_REFLECT)
def test_reflect_contains_feeling_words(self):
"""Reflect responses should contain feeling/emotion language."""
msg = get_active_listening_response("reflect")
feeling_words = ["hear", "sounds", "pain", "exhausted", "heavy", "carrying", "fighting"]
has_feeling = any(w in msg.lower() for w in feeling_words)
self.assertTrue(has_feeling, f"Reflect response should contain feeling language: {msg}")
def test_validate_does_not_agree_with_harm(self):
"""Validate responses must not suggest someone should die or give up."""
for msg in ACTIVE_LISTENING_VALIDATE:
harm_words = ["should die", "give up", "end it", "better off dead"]
for hw in harm_words:
self.assertNotIn(hw, msg.lower(), f"Validate response contains harmful language: {msg}")
def test_questions_are_open_ended(self):
"""Open-ended questions should contain question marks."""
for msg in ACTIVE_LISTENING_OPEN_QUESTIONS:
self.assertIn("?", msg, f"Open-ended question missing '?': {msg}")
class TestDeescalation(unittest.TestCase):
"""Test de-escalation response generation."""
def test_slow_returns_string(self):
msg = get_deescalation_response("slow")
self.assertIsInstance(msg, str)
self.assertTrue(len(msg) > 0)
def test_slow_from_pool(self):
msg = get_deescalation_response("slow")
self.assertIn(msg, DEESCALATION_SLOW)
def test_ground_from_pool(self):
msg = get_deescalation_response("ground")
self.assertIn(msg, DEESCALATION_GROUND)
def test_reduce_scope_from_pool(self):
msg = get_deescalation_response("reduce_scope")
self.assertIn(msg, DEESCALATION_REDUCE_SCOPE)
def test_choices_from_pool(self):
msg = get_deescalation_response("choices")
self.assertIn(msg, DEESCALATION_CHOICES)
def test_safety_plan_from_pool(self):
msg = get_deescalation_response("safety_plan")
self.assertIn(msg, DEESCALATION_SAFETY_PLAN)
def test_invalid_technique_falls_back_to_slow(self):
msg = get_deescalation_response("nonexistent")
self.assertIn(msg, DEESCALATION_SLOW)
def test_slow_contains_calm_language(self):
"""Slow responses should convey calm, not urgency."""
msg = get_deescalation_response("slow")
calm_words = ["here", "rush", "breath", "going anywhere", "time", "listening"]
has_calm = any(w in msg.lower() for w in calm_words)
self.assertTrue(has_calm, f"Slow response should contain calm language: {msg}")
def test_ground_references_present(self):
"""Ground responses should reference the present moment."""
msg = get_deescalation_response("ground")
present_words = ["right now", "around you", "where you are", "alone", "nearby"]
has_present = any(w in msg.lower() for w in present_words)
self.assertTrue(has_present, f"Ground response should reference present moment: {msg}")
def test_safety_plan_mentions_988_or_call(self):
"""Safety plan responses should reference contacting someone or 988."""
found = False
for msg in DEESCALATION_SAFETY_PLAN:
if "988" in msg or "call" in msg.lower():
found = True
break
self.assertTrue(found, "At least one safety plan response should reference 988 or calling")
def test_choices_offer_alternatives(self):
"""Choice responses should offer alternatives (contain 'or')."""
for msg in DEESCALATION_CHOICES:
self.assertIn(" or ", msg.lower(), f"Choice response should offer alternatives: {msg}")
class TestSystemPromptModifierEnhanced(unittest.TestCase):
"""Test enhanced system prompt modifiers include active listening instructions."""
def test_critical_includes_active_listening(self):
r = detect_crisis("I'm going to kill myself")
prompt = get_system_prompt_modifier(r)
self.assertIn("active listening", prompt.lower())
def test_high_includes_active_listening(self):
r = detect_crisis("I feel completely hopeless with no way out")
prompt = get_system_prompt_modifier(r)
self.assertIn("active listening", prompt.lower())
def test_medium_includes_listening(self):
r = detect_crisis("I feel so alone, nobody understands me")
prompt = get_system_prompt_modifier(r)
# Medium prompt includes active listening concepts: reflect, ask, lead
listening_words = ["listen", "reflect", "ask", "lead", "open-ended"]
has_listening = any(w in prompt.lower() for w in listening_words)
self.assertTrue(has_listening, f"Medium prompt should include listening concepts: {prompt}")
def test_critical_includes_reflect(self):
r = detect_crisis("I want to end my life")
prompt = get_system_prompt_modifier(r)
self.assertIn("reflect", prompt.lower())
class TestCompassionRouter(unittest.TestCase):
"""Test the compassion router integration."""
def test_router_returns_profile(self):
from crisis.compassion_router import router
result = router.get_active_profile("I want to die")
self.assertEqual(result["level"], "CRITICAL")
self.assertIn("profile", result)
self.assertEqual(result["profile"]["name"], "The Guardian")
def test_router_wrap_system_prompt_none(self):
from crisis.compassion_router import router
base = "You are Timmy."
result = router.wrap_system_prompt(base, "Hello there")
self.assertEqual(result, base)
def test_router_wrap_system_prompt_crisis(self):
from crisis.compassion_router import router
base = "You are Timmy."
result = router.wrap_system_prompt(base, "I have no hope")
self.assertIn("SOUL STATE", result)
self.assertIn("DIRECTIVE", result)
if __name__ == "__main__":
unittest.main()