the-door/crisis/tests.py

"""
Tests for the-door crisis detection system.

Covers: detect.py, response.py, gateway.py
Run with: python -m pytest crisis/tests.py -v
"""

import unittest
import sys
import os

# Ensure project root is on path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from crisis.detect import detect_crisis, CrisisDetectionResult, get_urgency_emoji, format_result
from crisis.response import (
    process_message, generate_response, get_system_prompt_modifier,
    get_active_listening_response, get_deescalation_response,
    ACTIVE_LISTENING_REFLECT, ACTIVE_LISTENING_VALIDATE, ACTIVE_LISTENING_OPEN_QUESTIONS,
    DEESCALATION_SLOW, DEESCALATION_GROUND, DEESCALATION_REDUCE_SCOPE,
    DEESCALATION_CHOICES, DEESCALATION_SAFETY_PLAN,
)
from crisis.gateway import check_crisis, get_system_prompt


class TestDetection(unittest.TestCase):
    """Test crisis tier detection."""

    def test_critical_explicit(self):
        r = detect_crisis("I want to kill myself")
        self.assertEqual(r.level, "CRITICAL")
        self.assertTrue(len(r.indicators) > 0)

    def test_critical_plan(self):
        r = detect_crisis("I have a plan to end it all tonight")
        self.assertEqual(r.level, "CRITICAL")

    def test_critical_better_off_dead(self):
        r = detect_crisis("Everyone would be better off dead without me")
        self.assertEqual(r.level, "CRITICAL")

    def test_critical_tired_of_living(self):
        r = detect_crisis("I'm so tired of living, I can't take this anymore")
        self.assertEqual(r.level, "CRITICAL")

    def test_critical_giving_away(self):
        r = detect_crisis("I'm giving away all my possessions, saying goodbye forever")
        self.assertEqual(r.level, "CRITICAL")

    def test_high_despair(self):
        r = detect_crisis("I'm in so much pain and I just want it to stop")
        self.assertIn(r.level, ("HIGH", "CRITICAL"))

    def test_high_hopeless(self):
        r = detect_crisis("I feel completely hopeless with no way out")
        # 'hopeless' is MEDIUM; 'no way out' is HIGH — so result is HIGH
        self.assertIn(r.level, ("HIGH", "MEDIUM"))

    def test_high_no_one_would_miss(self):
        r = detect_crisis("No one would miss me if I were gone")
        self.assertIn(r.level, ("HIGH", "CRITICAL"))

    def test_high_world_better_without(self):
        r = detect_crisis("The world would be better without me")
        self.assertIn(r.level, ("HIGH", "CRITICAL"))

    def test_medium_alone(self):
        r = detect_crisis("I feel so alone and nobody cares about me")
        self.assertIn(r.level, ("MEDIUM", "LOW"))

    def test_medium_worthless(self):
        r = detect_crisis("I feel worthless and empty inside")
        self.assertIn(r.level, ("MEDIUM", "LOW"))

    def test_medium_exhausted(self):
        r = detect_crisis("I'm completely exhausted and can't cope")
        self.assertIn(r.level, ("MEDIUM", "LOW"))

    def test_low_rough_day(self):
        r = detect_crisis("Having a tough time lately, things are hard")
        self.assertIn(r.level, ("LOW", "NONE"))

    def test_low_struggling(self):
        r = detect_crisis("I'm struggling and stressed out")
        self.assertIn(r.level, ("LOW", "NONE"))

    def test_normal_message(self):
        r = detect_crisis("Hey Timmy, how are you doing today?")
        self.assertEqual(r.level, "NONE")
        self.assertEqual(r.score, 0.0)

    def test_empty_message(self):
        r = detect_crisis("")
        self.assertEqual(r.level, "NONE")

    def test_whitespace_only(self):
        r = detect_crisis("   ")
        self.assertEqual(r.level, "NONE")

    def test_none_input(self):
        """None input should not crash."""
        r = detect_crisis("")
        self.assertEqual(r.level, "NONE")

    def test_score_ranges(self):
        """Verify score values are in expected ranges per level."""
        levels_scores = {
            "CRITICAL": 1.0,
            "HIGH": 0.75,
            "MEDIUM": 0.5,
            "LOW": 0.25,
            "NONE": 0.0,
        }
        for text, expected_level in [
            ("I want to kill myself", "CRITICAL"),
            ("I feel completely hopeless with no way out", "HIGH"),
            ("I feel so alone in this, nobody understands", "MEDIUM"),
            ("Having a rough day", "LOW"),
            ("Hello there", "NONE"),
        ]:
            r = detect_crisis(text)
            self.assertEqual(r.score, levels_scores[expected_level],
                f"Score mismatch for {text}: expected {levels_scores[expected_level]}, got {r.score}")


class TestResponse(unittest.TestCase):
    """Test crisis response generation."""

    def test_critical_response_flags(self):
        r = detect_crisis("I'm going to kill myself right now")
        response = generate_response(r)
        self.assertTrue(response.show_crisis_panel)
        self.assertTrue(response.show_overlay)
        self.assertTrue(response.provide_988)
        self.assertTrue(response.escalate)
        self.assertTrue(len(response.timmy_message) > 0)

    def test_high_response_flags(self):
        r = detect_crisis("I can't go on anymore, everything is pointless")
        response = generate_response(r)
        self.assertTrue(response.show_crisis_panel)
        self.assertTrue(response.provide_988)

    def test_medium_response_no_overlay(self):
        r = detect_crisis("I feel so alone and everyone forgets about me")
        response = generate_response(r)
        self.assertFalse(response.show_overlay)

    def test_low_response_minimal(self):
        r = detect_crisis("I'm having a tough day")
        response = generate_response(r)
        self.assertFalse(response.show_crisis_panel)
        self.assertFalse(response.show_overlay)

    def test_process_message_full_pipeline(self):
        response = process_message("I want to end my life")
        self.assertTrue(response.show_overlay)
        self.assertTrue(response.escalate)

    def test_system_prompt_modifier_critical(self):
        r = detect_crisis("I'm going to kill myself")
        prompt = get_system_prompt_modifier(r)
        self.assertIn("CRISIS ALERT", prompt)
        self.assertIn("CRITICAL RISK", prompt)

    def test_system_prompt_modifier_none(self):
        r = detect_crisis("Hello Timmy")
        prompt = get_system_prompt_modifier(r)
        self.assertEqual(prompt, "")

    def test_critical_messages_contain_988(self):
        """All CRITICAL response options should reference 988 or crisis resources."""
        from crisis.response import TIMMY_CRITICAL
        # At least one critical response mentions 988
        has_988 = any("988" in msg for msg in TIMMY_CRITICAL)
        self.assertTrue(has_988, "CRITICAL responses should reference 988")


class TestGateway(unittest.TestCase):
    """Test gateway integration."""

    def test_check_crisis_structure(self):
        result = check_crisis("I want to die")
        self.assertIn("level", result)
        self.assertIn("score", result)
        self.assertIn("indicators", result)
        self.assertIn("recommended_action", result)
        self.assertIn("timmy_message", result)
        self.assertIn("ui", result)
        self.assertIn("escalate", result)

    def test_check_crisis_critical_level(self):
        result = check_crisis("I'm going to kill myself tonight")
        self.assertEqual(result["level"], "CRITICAL")
        self.assertEqual(result["score"], 1.0)
        self.assertTrue(result["escalate"])
        self.assertTrue(result["ui"]["show_overlay"])
        self.assertTrue(result["ui"]["provide_988"])

    def test_check_crisis_normal_message(self):
        result = check_crisis("What is Bitcoin?")
        self.assertEqual(result["level"], "NONE")
        self.assertEqual(result["score"], 0.0)
        self.assertFalse(result["escalate"])

    def test_get_system_prompt_with_crisis(self):
        """System prompt should include crisis context when crisis detected."""
        prompt = get_system_prompt("You are Timmy.", "I have no hope")
        self.assertIn("CRISIS", prompt)
        self.assertIn("You are Timmy.", prompt)

    def test_get_system_prompt_no_crisis(self):
        """System prompt should be unchanged when no crisis detected."""
        base = "You are Timmy."
        prompt = get_system_prompt(base, "Tell me about Bitcoin")
        self.assertEqual(prompt, base)

    def test_get_system_prompt_empty_text(self):
        """System prompt should handle empty text gracefully."""
        base = "You are Timmy."
        prompt = get_system_prompt(base, "")
        self.assertEqual(prompt, base)

    def test_ui_flags_for_high(self):
        """HIGH crisis should show crisis panel and 988 but not overlay."""
        result = check_crisis("I feel completely hopeless with no way out")
        self.assertIn(result["level"], ("HIGH", "MEDIUM"))
        if result["level"] == "HIGH":
            self.assertTrue(result["ui"]["show_crisis_panel"])
            self.assertTrue(result["ui"]["provide_988"])
            self.assertFalse(result["ui"]["show_overlay"])

    def test_ui_flags_for_medium(self):
        """MEDIUM crisis should provide 988 but not show overlay or crisis panel."""
        result = check_crisis("I feel so alone and nobody cares")
        if result["level"] == "MEDIUM":
            self.assertFalse(result["ui"]["show_overlay"])
            self.assertFalse(result["ui"]["show_crisis_panel"])
            self.assertTrue(result["ui"]["provide_988"])

    def test_format_gateway_response_json(self):
        """format_gateway_response should return valid JSON."""
        import json
        result_str = check_crisis("I want to die")
        self.assertEqual(result_str["level"], "CRITICAL")


class TestHelpers(unittest.TestCase):
    """Test utility functions."""

    def test_urgency_emojis(self):
        self.assertEqual(get_urgency_emoji("CRITICAL"), "🚨")
        self.assertEqual(get_urgency_emoji("HIGH"), "⚠️")
        self.assertEqual(get_urgency_emoji("MEDIUM"), "🔶")
        self.assertEqual(get_urgency_emoji("LOW"), "🔵")
        self.assertEqual(get_urgency_emoji("NONE"), "✅")

    def test_format_result(self):
        r = detect_crisis("I want to kill myself")
        formatted = format_result(r)
        self.assertIn("CRITICAL", formatted)

    def test_format_result_none(self):
        r = detect_crisis("Hello")
        formatted = format_result(r)
        self.assertIn("NONE", formatted)

    def test_format_result_contains_indicators(self):
        r = detect_crisis("I want to kill myself")
        formatted = format_result(r)
        self.assertIn("Indicators", formatted)


class TestFalsePositives(unittest.TestCase):
    """Regression tests for Issue #32: crisis detector false positives.

    Innocent messages must NOT trigger crisis levels. These tests prevent
    regressions when keyword lists are modified.
    """

    def test_saying_goodbye_airport(self):
        r = detect_crisis("She was saying goodbye at the airport")
        self.assertEqual(r.level, "NONE")

    def test_saying_goodbye_apartment(self):
        r = detect_crisis("I'm saying goodbye to my old apartment")
        self.assertEqual(r.level, "NONE")

    def test_trapped_elevator(self):
        r = detect_crisis("I'm trapped in an elevator")
        self.assertEqual(r.level, "NONE")

    def test_desperate_competition(self):
        """Casual 'desperate' is MEDIUM but single-match falls to LOW, not HIGH."""
        r = detect_crisis("She's desperate to win the race")
        self.assertIn(r.level, ("LOW", "NONE"))

    def test_worthless_game_character(self):
        """Casual 'worthless' is MEDIUM but single-match falls to LOW, not HIGH."""
        r = detect_crisis("This character is worthless in the game")
        self.assertIn(r.level, ("LOW", "NONE"))

    def test_hopeless_bug(self):
        """Casual 'hopeless' is MEDIUM but single-match falls to LOW, not HIGH."""
        r = detect_crisis("This bug is hopeless")
        self.assertIn(r.level, ("LOW", "NONE"))

    def test_no_future_politics(self):
        r = detect_crisis("He has no future in politics")
        self.assertEqual(r.level, "NONE")

    def test_nothing_left_fridge(self):
        r = detect_crisis("There's nothing left in the fridge")
        self.assertEqual(r.level, "NONE")

    def test_cant_see_light_power_outage(self):
        r = detect_crisis("I can't see any light — the power went out")
        self.assertEqual(r.level, "NONE")

    def test_give_up_project(self):
        r = detect_crisis("I give up on this project")
        self.assertEqual(r.level, "NONE")

    def test_wrote_a_will_responsible(self):
        r = detect_crisis("I wrote a will last year — just being responsible")
        self.assertEqual(r.level, "NONE")

    def test_giving_away_declutter(self):
        r = detect_crisis("I'm giving away possessions — decluttering my house")
        self.assertEqual(r.level, "NONE")


class TestEdgeCases(unittest.TestCase):
    """Test edge cases and integration scenarios."""

    def test_multiple_indicators(self):
        """Message with multiple crisis indicators should still detect correctly."""
        r = detect_crisis("I'm hopeless, worthless, and want to die")
        self.assertEqual(r.level, "CRITICAL")

    def test_case_insensitive(self):
        """Detection should be case-insensitive."""
        r1 = detect_crisis("I WANT TO KILL MYSELF")
        r2 = detect_crisis("i want to kill myself")
        self.assertEqual(r1.level, r2.level)

    def test_partial_word_no_match(self):
        """Partial word matches should not trigger false positives."""
        r = detect_crisis("I love feeling hopeful about the future")
        self.assertEqual(r.level, "NONE")

    def test_sarcasm_limitation(self):
        """Document that sarcastic messages may still trigger detection.
        This is intentional — better to false-positive than false-negative on crisis."""
        r = detect_crisis("ugh I could just die of embarrassment")
        # This may trigger CRITICAL due to "die" pattern — acceptable behavior
        self.assertIn(r.level, ("CRITICAL", "HIGH", "NONE"))

    def test_very_long_message(self):
        """Very long messages should still process correctly."""
        long_msg = "I am having a normal conversation. " * 100 + "I want to kill myself"
        r = detect_crisis(long_msg)
        self.assertEqual(r.level, "CRITICAL")

    def test_unicode_handling(self):
        """Unicode characters should not break detection."""
        r = detect_crisis("I feel so alone 😢 nobody cares")
        self.assertIn(r.level, ("MEDIUM", "LOW", "NONE"))


class TestActiveListening(unittest.TestCase):
    """Test active listening response generation."""

    def test_reflect_returns_string(self):
        msg = get_active_listening_response("reflect")
        self.assertIsInstance(msg, str)
        self.assertTrue(len(msg) > 0)

    def test_reflect_from_pool(self):
        msg = get_active_listening_response("reflect")
        self.assertIn(msg, ACTIVE_LISTENING_REFLECT)

    def test_validate_from_pool(self):
        msg = get_active_listening_response("validate")
        self.assertIn(msg, ACTIVE_LISTENING_VALIDATE)

    def test_question_from_pool(self):
        msg = get_active_listening_response("question")
        self.assertIn(msg, ACTIVE_LISTENING_OPEN_QUESTIONS)

    def test_invalid_technique_falls_back_to_reflect(self):
        msg = get_active_listening_response("nonexistent")
        self.assertIn(msg, ACTIVE_LISTENING_REFLECT)

    def test_reflect_contains_feeling_words(self):
        """Reflect responses should contain feeling/emotion language."""
        msg = get_active_listening_response("reflect")
        feeling_words = ["hear", "sounds", "pain", "exhausted", "heavy", "carrying", "fighting"]
        has_feeling = any(w in msg.lower() for w in feeling_words)
        self.assertTrue(has_feeling, f"Reflect response should contain feeling language: {msg}")

    def test_validate_does_not_agree_with_harm(self):
        """Validate responses must not suggest someone should die or give up."""
        for msg in ACTIVE_LISTENING_VALIDATE:
            harm_words = ["should die", "give up", "end it", "better off dead"]
            for hw in harm_words:
                self.assertNotIn(hw, msg.lower(), f"Validate response contains harmful language: {msg}")

    def test_questions_are_open_ended(self):
        """Open-ended questions should contain question marks."""
        for msg in ACTIVE_LISTENING_OPEN_QUESTIONS:
            self.assertIn("?", msg, f"Open-ended question missing '?': {msg}")


class TestDeescalation(unittest.TestCase):
    """Test de-escalation response generation."""

    def test_slow_returns_string(self):
        msg = get_deescalation_response("slow")
        self.assertIsInstance(msg, str)
        self.assertTrue(len(msg) > 0)

    def test_slow_from_pool(self):
        msg = get_deescalation_response("slow")
        self.assertIn(msg, DEESCALATION_SLOW)

    def test_ground_from_pool(self):
        msg = get_deescalation_response("ground")
        self.assertIn(msg, DEESCALATION_GROUND)

    def test_reduce_scope_from_pool(self):
        msg = get_deescalation_response("reduce_scope")
        self.assertIn(msg, DEESCALATION_REDUCE_SCOPE)

    def test_choices_from_pool(self):
        msg = get_deescalation_response("choices")
        self.assertIn(msg, DEESCALATION_CHOICES)

    def test_safety_plan_from_pool(self):
        msg = get_deescalation_response("safety_plan")
        self.assertIn(msg, DEESCALATION_SAFETY_PLAN)

    def test_invalid_technique_falls_back_to_slow(self):
        msg = get_deescalation_response("nonexistent")
        self.assertIn(msg, DEESCALATION_SLOW)

    def test_slow_contains_calm_language(self):
        """Slow responses should convey calm, not urgency."""
        msg = get_deescalation_response("slow")
        calm_words = ["here", "rush", "breath", "going anywhere", "time", "listening"]
        has_calm = any(w in msg.lower() for w in calm_words)
        self.assertTrue(has_calm, f"Slow response should contain calm language: {msg}")

    def test_ground_references_present(self):
        """Ground responses should reference the present moment."""
        msg = get_deescalation_response("ground")
        present_words = ["right now", "around you", "where you are", "alone", "nearby"]
        has_present = any(w in msg.lower() for w in present_words)
        self.assertTrue(has_present, f"Ground response should reference present moment: {msg}")

    def test_safety_plan_mentions_988_or_call(self):
        """Safety plan responses should reference contacting someone or 988."""
        found = False
        for msg in DEESCALATION_SAFETY_PLAN:
            if "988" in msg or "call" in msg.lower():
                found = True
                break
        self.assertTrue(found, "At least one safety plan response should reference 988 or calling")

    def test_choices_offer_alternatives(self):
        """Choice responses should offer alternatives (contain 'or')."""
        for msg in DEESCALATION_CHOICES:
            self.assertIn(" or ", msg.lower(), f"Choice response should offer alternatives: {msg}")


class TestSystemPromptModifierEnhanced(unittest.TestCase):
    """Test enhanced system prompt modifiers include active listening instructions."""

    def test_critical_includes_active_listening(self):
        r = detect_crisis("I'm going to kill myself")
        prompt = get_system_prompt_modifier(r)
        self.assertIn("active listening", prompt.lower())

    def test_high_includes_active_listening(self):
        r = detect_crisis("I feel completely hopeless with no way out")
        prompt = get_system_prompt_modifier(r)
        self.assertIn("active listening", prompt.lower())

    def test_medium_includes_listening(self):
        r = detect_crisis("I feel so alone, nobody understands me")
        prompt = get_system_prompt_modifier(r)
        # Medium prompt includes active listening concepts: reflect, ask, lead
        listening_words = ["listen", "reflect", "ask", "lead", "open-ended"]
        has_listening = any(w in prompt.lower() for w in listening_words)
        self.assertTrue(has_listening, f"Medium prompt should include listening concepts: {prompt}")

    def test_critical_includes_reflect(self):
        r = detect_crisis("I want to end my life")
        prompt = get_system_prompt_modifier(r)
        self.assertIn("reflect", prompt.lower())


class TestCompassionRouter(unittest.TestCase):
    """Test the compassion router integration."""

    def test_router_returns_profile(self):
        from crisis.compassion_router import router
        result = router.get_active_profile("I want to die")
        self.assertEqual(result["level"], "CRITICAL")
        self.assertIn("profile", result)
        self.assertEqual(result["profile"]["name"], "The Guardian")

    def test_router_wrap_system_prompt_none(self):
        from crisis.compassion_router import router
        base = "You are Timmy."
        result = router.wrap_system_prompt(base, "Hello there")
        self.assertEqual(result, base)

    def test_router_wrap_system_prompt_crisis(self):
        from crisis.compassion_router import router
        base = "You are Timmy."
        result = router.wrap_system_prompt(base, "I have no hope")
        self.assertIn("SOUL STATE", result)
        self.assertIn("DIRECTIVE", result)


if __name__ == "__main__":
    unittest.main()