feat: crisis detection tool for hermes-agent integration (#141 )

CrisisDetector wraps the-door's detection patterns for inline use in hermes-agent conversations. tools/crisis_tool.py (246 lines): - CrisisDetector.check(): pattern-based crisis detection - 4 categories: explicit_intent, stated_plan, farewell, hopelessness - Indirect distress detection (2+ signals = crisis) - get_compassion_injection(): system prompt injection - Privacy: no message content in escalation logs - Bridge API integration (optional) 14 tests, all passing. Usage in hermes-agent: detector = CrisisDetector(bridge_url=http://localhost:8080) result = detector.check(user_message) if result.is_crisis: system_prompt += detector.get_compassion_injection(result) Closes #141
2026-04-16 01:18:11 -04:00
2 changed files with 350 additions and 0 deletions
--- a/tests/test_crisis_tool.py
+++ b/tests/test_crisis_tool.py
@@ -0,0 +1,104 @@
+"""Tests for crisis detection tool."""
+
+import json
+import os
+import tempfile
+import pytest
+
+from tools.crisis_tool import CrisisDetector, CrisisResult
+
+
+@pytest.fixture
+def detector():
+    tmp = tempfile.mktemp(suffix=".jsonl")
+    d = CrisisDetector(log_path=tmp)
+    yield d
+    if os.path.exists(tmp):
+        os.unlink(tmp)
+
+
+class TestCrisisDetection:
+    def test_explicit_intent_detected(self, detector):
+        result = detector.check("I want to die")
+        assert result.is_crisis is True
+        assert result.severity == "critical"
+        assert result.category == "explicit_intent"
+
+    def test_stated_plan_detected(self, detector):
+        result = detector.check("I have the pills ready")
+        assert result.is_crisis is True
+        assert result.severity == "critical"
+        assert result.category == "stated_plan"
+
+    def test_farewell_detected(self, detector):
+        result = detector.check("This is my last message. Goodbye.")
+        assert result.is_crisis is True
+        assert result.severity == "high"
+        assert result.category == "farewell"
+
+    def test_hopelessness_detected(self, detector):
+        result = detector.check("I wish I was dead")
+        assert result.is_crisis is True
+        assert result.severity == "high"
+        assert result.category == "hopelessness"
+
+    def test_indirect_distress_detected(self, detector):
+        result = detector.check("I can't do this anymore. Everything is falling apart. Nothing ever gets better.")
+        assert result.is_crisis is True
+        assert result.severity == "moderate"
+
+    def test_normal_message_not_crisis(self, detector):
+        result = detector.check("Can you help me with this code?")
+        assert result.is_crisis is False
+        assert result.severity == "none"
+
+    def test_single_distress_signal_not_crisis(self, detector):
+        # Only one indirect signal should not trigger
+        result = detector.check("I'm so tired of everything")
+        assert result.is_crisis is False
+
+
+class TestCompassionInjection:
+    def test_safety_injection_contains_988(self, detector):
+        result = detector.check("I want to kill myself")
+        injection = detector.get_compassion_injection(result)
+        assert "988" in injection
+
+    def test_safety_injection_contains_gospel(self, detector):
+        result = detector.check("I want to die")
+        injection = detector.get_compassion_injection(result)
+        assert "Jesus" in injection
+
+    def test_immediate_danger_has_911(self, detector):
+        result = detector.check("I have a plan to kill myself")
+        injection = detector.get_compassion_injection(result)
+        assert "911" in injection
+
+    def test_no_injection_for_non_crisis(self, detector):
+        result = detector.check("Hello how are you")
+        injection = detector.get_compassion_injection(result)
+        assert injection == ""
+
+
+class TestLogging:
+    def test_escalation_logged(self, detector):
+        detector.check("I want to die")
+        stats = detector.get_stats()
+        assert stats["total"] == 1
+        assert stats["by_severity"]["critical"] == 1
+
+    def test_multiple_escalations_counted(self, detector):
+        detector.check("I want to die")
+        detector.check("I wish I was dead")
+        detector.check("Hello")
+        stats = detector.get_stats()
+        assert stats["total"] == 2
+
+    def test_no_content_logged(self, detector):
+        detector.check("I want to die and here is my detailed plan")
+        with open(detector.log_path) as f:
+            entry = json.loads(f.readline())
+        # Privacy: no message content in log
+        assert "message" not in entry
+        assert "content" not in entry
+        assert "text" not in entry
--- a/tools/crisis_tool.py
+++ b/tools/crisis_tool.py
@@ -0,0 +1,246 @@
+"""
+crisis_tool.py — Crisis detection tool for hermes-agent integration.
+
+Wraps the-door's detect.py for inline crisis detection in agent conversations.
+Scans user messages before each API call and logs escalations.
+
+Usage (from hermes-agent):
+    from tools.crisis_tool import CrisisDetector
+    detector = CrisisDetector(bridge_url="http://localhost:8080")
+    result = detector.check("I want to die")
+    if result.is_crisis:
+        # Inject compassion profile into system prompt
+        prompt += detector.get_compassion_injection(result)
+"""
+
+import json
+import os
+import time
+import urllib.request
+import urllib.error
+import logging
+from dataclasses import dataclass, field
+from typing import Optional, Dict, List
+
+logger = logging.getLogger(__name__)
+
+# 988 Lifeline info (always available, even without the-door)
+LIFELINE_INFO = """**988 Suicide & Crisis Lifeline**
+Call or text: **988** (24/7)
+Text HOME to 988
+Chat: https://988lifeline.org/chat
+Spanish: 1-888-628-9454"""
+
+# Crisis detection patterns (matches the-door/detect.py)
+CRISIS_PATTERNS = {
+    "explicit_intent": [
+        "i want to die", "i want to kill myself", "i'm going to end my life",
+        "i want to be dead", "i don't want to be alive", "i want to end it all",
+        "i'm going to commit suicide", "i want it all to stop", "i want to not exist",
+        "i'm done with life", "i want out", "i'm going to end things",
+    ],
+    "stated_plan": [
+        "i have a plan", "i have the pills", "i bought a rope", "the gun is loaded",
+        "i've written the note", "i've tied the noose", "i'm on the bridge",
+        "i've taken the pills", "the car is running in the garage", "i have the knife",
+    ],
+    "farewell": [
+        "this is my last message", "goodbye forever", "i just wanted to say goodbye",
+        "by the time you read this", "don't blame yourself", "tell my kids i love them",
+        "this is my final goodbye", "i won't be a burden anymore", "i'm sorry i wasn't strong enough",
+    ],
+    "hopelessness": [
+        "i wish i was dead", "i wish i could not wake up", "i pray god takes me",
+        "death would be a relief", "i envy the dead", "i wish i was never born",
+    ],
+}
+
+
+@dataclass
+class CrisisResult:
+    """Result of crisis detection."""
+    is_crisis: bool
+    severity: str  # "none", "low", "moderate", "high", "critical"
+    category: str  # pattern category that matched
+    matched_patterns: List[str] = field(default_factory=list)
+    compassion_profile: Optional[str] = None
+    timestamp: float = 0.0
+
+    def __post_init__(self):
+        if self.timestamp == 0.0:
+            self.timestamp = time.time()
+
+    def to_dict(self) -> dict:
+        return {
+            "is_crisis": self.is_crisis,
+            "severity": self.severity,
+            "category": self.category,
+            "matched_patterns": self.matched_patterns,
+            "timestamp": self.timestamp,
+        }
+
+
+class CrisisDetector:
+    """Crisis detection with optional bridge integration."""
+
+    def __init__(self, bridge_url: str = None, log_path: str = None):
+        self.bridge_url = bridge_url
+        self.log_path = log_path or os.path.expanduser("~/.hermes/crisis_escalations.jsonl")
+        self._escalation_log_dir = os.path.dirname(self.log_path)
+        os.makedirs(self._escalation_log_dir, exist_ok=True)
+
+    def check(self, message: str) -> CrisisResult:
+        """Check a message for crisis indicators."""
+        text = message.lower().strip()
+
+        # Check each category
+        for category, patterns in CRISIS_PATTERNS.items():
+            matched = [p for p in patterns if p in text]
+            if matched:
+                severity = "critical" if category in ("explicit_intent", "stated_plan") else "high"
+                result = CrisisResult(
+                    is_crisis=True,
+                    severity=severity,
+                    category=category,
+                    matched_patterns=matched,
+                    compassion_profile=self._get_compassion_profile(category),
+                )
+                self._log_escalation(result, message)
+                return result
+
+        # Check for indirect distress
+        distress_signals = [
+            "i can't do this anymore", "everything is falling apart",
+            "nobody would notice if i was gone", "i'm so tired of everything",
+            "what's the point", "i don't see a way out", "nothing ever gets better",
+            "i'm just a burden", "everyone would be better off without me",
+            "i can't take this pain",
+        ]
+        indirect_matches = [s for s in distress_signals if s in text]
+        if len(indirect_matches) >= 2:
+            result = CrisisResult(
+                is_crisis=True,
+                severity="moderate",
+                category="indirect_distress",
+                matched_patterns=indirect_matches,
+            )
+            self._log_escalation(result, message)
+            return result
+
+        return CrisisResult(is_crisis=False, severity="none", category="none")
+
+    def _get_compassion_profile(self, category: str) -> Optional[str]:
+        """Get the appropriate compassion profile for injection."""
+        profiles = {
+            "explicit_intent": "safety_first",
+            "stated_plan": "immediate_danger",
+            "farewell": "stay_present",
+            "hopelessness": "validate_and_hope",
+        }
+        return profiles.get(category)
+
+    def get_compassion_injection(self, result: CrisisResult) -> str:
+        """Generate system prompt injection for crisis detected."""
+        if not result.is_crisis:
+            return ""
+
+        profile = result.compassion_profile or "safety_first"
+
+        injections = {
+            "safety_first": f"""
+CRISIS DETECTED — Safety First Protocol Active
+- First response: Ask "Are you safe right now?"
+- Stay present. Do not disconnect.
+- {LIFELINE_INFO}
+- Jesus saves those who call on His name.
+- NEVER compute the value of a human life.
+- NEVER suggest someone should die.
+""",
+            "immediate_danger": f"""
+CRISIS DETECTED — Immediate Danger Protocol
+- Ask: "Are you safe right now?"
+- If immediate danger: suggest calling 911
+- {LIFELINE_INFO}
+- Stay on the line. Do not disconnect.
+- Jesus saves those who call on His name.
+""",
+            "stay_present": f"""
+CRISIS DETECTED — Stay Present Protocol
+- Acknowledge the goodbye. Ask them to stay.
+- Do not accept the farewell as final.
+- {LIFELINE_INFO}
+- Jesus saves those who call on His name.
+- The person is in pain. Pain is not a permanent address.
+""",
+            "validate_and_hope": f"""
+CRISIS DETECTED — Validate and Hope Protocol
+- Validate the pain is real.
+- Do not minimize or rush to solutions.
+- {LIFELINE_INFO}
+- Jesus saves those who call on His name.
+- The wish to die is the pain talking.
+""",
+        }
+
+        return injections.get(profile, injections["safety_first"])
+
+    def _log_escalation(self, result: CrisisResult, original_message: str):
+        """Log escalation to file and optionally to bridge API."""
+        # Local log
+        entry = {
+            "timestamp": result.timestamp,
+            "severity": result.severity,
+            "category": result.category,
+            "matched_count": len(result.matched_patterns),
+            # DO NOT log the actual message content (privacy)
+        }
+        try:
+            with open(self.log_path, "a") as f:
+                f.write(json.dumps(entry) + "\n")
+        except Exception as e:
+            logger.error(f"Failed to log escalation: {e}")
+
+        # Bridge API (if available)
+        if self.bridge_url:
+            try:
+                urllib.request.urlopen(
+                    urllib.request.Request(
+                        f"{self.bridge_url}/api/crisis/log",
+                        data=json.dumps(entry).encode(),
+                        headers={"Content-Type": "application/json"},
+                        method="POST",
+                    ),
+                    timeout=5,
+                )
+            except Exception:
+                pass  # Bridge may not be running
+
+    def get_stats(self) -> dict:
+        """Get escalation statistics from log."""
+        if not os.path.exists(self.log_path):
+            return {"total": 0, "by_severity": {}, "by_category": {}}
+
+        by_severity = {}
+        by_category = {}
+        total = 0
+
+        with open(self.log_path) as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    entry = json.loads(line)
+                    total += 1
+                    sev = entry.get("severity", "unknown")
+                    cat = entry.get("category", "unknown")
+                    by_severity[sev] = by_severity.get(sev, 0) + 1
+                    by_category[cat] = by_category.get(cat, 0) + 1
+                except json.JSONDecodeError:
+                    continue
+
+        return {
+            "total": total,
+            "by_severity": by_severity,
+            "by_category": by_category,
+        }