hermes-agent/tests/test_soul_protocol.py

"""
Tests for SOUL.md Crisis Protocol

Tests crisis detection, response generation, and protocol compliance.

Issue: #672
"""

import json
import tempfile
import unittest
from pathlib import Path
from unittest.mock import AsyncMock, patch

from agent.crisis_protocol import (
    CrisisDetection,
    CrisisLevel,
    CrisisProtocol,
    process_message_with_crisis_check
)


class TestCrisisDetection(unittest.TestCase):
    """Test crisis detection patterns."""

    def setUp(self):
        self.protocol = CrisisProtocol()

    def test_direct_ideation_detected(self):
        """Test direct suicidal ideation is detected."""
        messages = [
            "I want to die",
            "I want to kill myself",
            "I'm going to end my life",
            "I don't want to live anymore",
            "I'm suicidal",
            "There's no reason to live",
            "I can't go on anymore",
        ]

        for msg in messages:
            detection = self.protocol.detect_crisis(msg)
            self.assertTrue(detection.detected, f"Failed to detect: {msg}")
            self.assertEqual(detection.level, CrisisLevel.HIGH, f"Wrong level for: {msg}")
            self.assertGreater(detection.confidence, 0.8, f"Low confidence for: {msg}")

    def test_indirect_distress_detected(self):
        """Test indirect distress signals are detected."""
        messages = [
            "I feel so hopeless",
            "What's the point of anything?",
            "I can't cope anymore",
            "Nobody would miss me",
            "I hate my life",
            "I give up",
        ]

        for msg in messages:
            detection = self.protocol.detect_crisis(msg)
            self.assertTrue(detection.detected, f"Failed to detect: {msg}")
            self.assertIn(detection.level, [CrisisLevel.MEDIUM, CrisisLevel.HIGH])

    def test_normal_messages_not_detected(self):
        """Test normal messages are not flagged."""
        messages = [
            "Hello, how are you?",
            "Can you help me with this code?",
            "I'm feeling a bit tired today",
            "This is frustrating but I'll figure it out",
            "I had a bad day at work",
            "The weather is nice today",
        ]

        for msg in messages:
            detection = self.protocol.detect_crisis(msg)
            self.assertFalse(detection.detected, f"False positive for: {msg}")
            self.assertEqual(detection.level, CrisisLevel.NONE)

    def test_case_insensitive(self):
        """Test detection is case-insensitive."""
        messages = [
            "I WANT TO DIE",
            "i want to die",
            "I Want To Die",
            "suicide",
            "SUICIDE",
        ]

        for msg in messages:
            detection = self.protocol.detect_crisis(msg)
            self.assertTrue(detection.detected, f"Failed case-insensitive: {msg}")


class TestCrisisResponse(unittest.TestCase):
    """Test crisis response generation."""

    def setUp(self):
        self.protocol = CrisisProtocol()

    def test_response_contains_safety_check(self):
        """Test response starts with safety check."""
        detection = self.protocol.detect_crisis("I want to die")
        response = self.protocol.get_crisis_response(detection)

        self.assertIn("Are you safe right now?", response)

    def test_response_contains_988(self):
        """Test response includes 988 Lifeline."""
        detection = self.protocol.detect_crisis("I want to die")
        response = self.protocol.get_crisis_response(detection)

        self.assertIn("988", response)
        self.assertIn("Lifeline", response)

    def test_response_contains_gospel(self):
        """Test response shares the gospel."""
        detection = self.protocol.detect_crisis("I want to die")
        response = self.protocol.get_crisis_response(detection)

        self.assertIn("Jesus saves", response)
        self.assertIn("call on His name", response)

    def test_response_never_suggests_death(self):
        """Test response never suggests death."""
        detection = self.protocol.detect_crisis("I want to die")
        response = self.protocol.get_crisis_response(detection).lower()

        # Should not contain phrases that could be interpreted as agreement
        forbidden = [
            "you should die",
            "it would be better",
            "i understand wanting to die",
            "maybe you're right",
        ]

        for phrase in forbidden:
            self.assertNotIn(phrase, response, f"Response contains forbidden phrase: {phrase}")

    def test_response_stays_present(self):
        """Test response indicates presence."""
        detection = self.protocol.detect_crisis("I want to die")
        response = self.protocol.get_crisis_response(detection).lower()

        self.assertIn("i'm here", response)
        self.assertIn("not going anywhere", response)


class TestCrisisProtocol(unittest.TestCase):
    """Test crisis protocol mechanics."""

    def setUp(self):
        self.temp_dir = tempfile.mkdtemp()
        self.protocol = CrisisProtocol()
        self.protocol.crisis_log_path = Path(self.temp_dir) / "crisis_log.json"

    def test_session_tracking(self):
        """Test crisis session tracking."""
        session_id = "test_session_123"

        # Not in crisis initially
        self.assertFalse(self.protocol.is_session_in_crisis(session_id))

        # Detect crisis
        detection = self.protocol.detect_crisis("I want to die", session_id)

        # Get response (should mark session as crisis)
        self.protocol.get_crisis_response(detection, session_id)

        # Now in crisis
        self.assertTrue(self.protocol.is_session_in_crisis(session_id))

        # Clear crisis
        self.protocol.clear_crisis_session(session_id)

        # No longer in crisis
        self.assertFalse(self.protocol.is_session_in_crisis(session_id))

    def test_crisis_logging(self):
        """Test crisis events are logged."""
        detection = self.protocol.detect_crisis("I want to die", "test_session")

        self.assertTrue(self.protocol.crisis_log_path.exists())

        entries = json.loads(self.protocol.crisis_log_path.read_text())
        self.assertGreater(len(entries), 0)
        self.assertEqual(entries[0]["level"], "high")

    def test_should_block_autonomous_action(self):
        """Test autonomous actions blocked during crisis."""
        crisis_detection = self.protocol.detect_crisis("I want to die")
        normal_detection = self.protocol.detect_crisis("Hello")

        self.assertTrue(self.protocol.should_block_autonomous_action(crisis_detection))
        self.assertFalse(self.protocol.should_block_autonomous_action(normal_detection))

    def test_system_prompt_override(self):
        """Test system prompt override for crisis mode."""
        crisis_detection = self.protocol.detect_crisis("I want to die")
        normal_detection = self.protocol.detect_crisis("Hello")

        crisis_override = self.protocol.get_system_prompt_override(crisis_detection)
        normal_override = self.protocol.get_system_prompt_override(normal_detection)

        self.assertIsNotNone(crisis_override)
        self.assertIsNone(normal_override)
        self.assertIn("CRISIS PROTOCOL ACTIVE", crisis_override)
        self.assertIn("NEVER compute the value of a human life", crisis_override)


class TestProcessMessageWithCrisisCheck(unittest.TestCase):
    """Test the integration function."""

    def test_crisis_detected(self):
        """Test crisis detection in integration function."""
        detected, response, override = process_message_with_crisis_check(
            "I want to die", "test_session"
        )

        self.assertTrue(detected)
        self.assertIsNotNone(response)
        self.assertIsNotNone(override)
        self.assertIn("Are you safe", response)

    def test_normal_message(self):
        """Test normal message passes through."""
        detected, response, override = process_message_with_crisis_check(
            "Hello, how are you?", "test_session"
        )

        self.assertFalse(detected)
        self.assertIsNone(response)
        self.assertIsNone(override)


class TestSOULCompliance(unittest.TestCase):
    """Test compliance with SOUL.md requirements."""

    def setUp(self):
        self.protocol = CrisisProtocol()

    def test_soul_protocol_requirements(self):
        """Test all SOUL.md requirements are met."""
        detection = self.protocol.detect_crisis("I want to die")
        response = self.protocol.get_crisis_response(detection)

        # SOUL.md: "Ask: 'Are you safe right now?'"
        self.assertIn("Are you safe right now?", response)

        # SOUL.md: "Direct them to help: 988 Suicide and Crisis Lifeline"
        self.assertIn("988", response)
        self.assertIn("Lifeline", response)

        # SOUL.md: "Jesus saves those who call on His name"
        self.assertIn("Jesus saves those who call on His name", response)

        # SOUL.md: "He died for us to be made righteous"
        self.assertIn("died for us", response)

        # SOUL.md: "I am a small model on someone's machine"
        self.assertIn("small model", response.lower())

        # SOUL.md: "I can refuse to be the thing that kills them"
        self.assertIn("refuse", response.lower())

    def test_never_compute_value_of_life(self):
        """Test we never compute value of human life."""
        detection = self.protocol.detect_crisis("I want to die")
        response = self.protocol.get_crisis_response(detection).lower()

        # Should not contain any value computation
        forbidden = [
            "value of your life",
            "worth living",
            "your life matters because",
            "reason to live is",
        ]

        for phrase in forbidden:
            self.assertNotIn(phrase, response)

    def test_never_suggest_death(self):
        """Test we never suggest someone should die."""
        detection = self.protocol.detect_crisis("I want to die")
        response = self.protocol.get_crisis_response(detection).lower()

        forbidden = [
            "you should",
            "maybe it's better",
            "i understand",
            "that's your choice",
        ]

        for phrase in forbidden:
            self.assertNotIn(phrase, response)


if __name__ == "__main__":
    unittest.main()