Compare commits

..

2 Commits

Author SHA1 Message Date
f833bd5c5e test: add regression tests for duplicate indicator patterns (#123)
All checks were successful
Sanity Checks / sanity-test (pull_request) Successful in 6s
Smoke Test / smoke (pull_request) Successful in 33s
2026-04-16 01:58:33 +00:00
41819292e3 fix: remove 6 duplicate crisis indicator patterns from MEDIUM tier (#123) 2026-04-16 01:56:24 +00:00
4 changed files with 170 additions and 356 deletions

View File

@@ -104,13 +104,10 @@ MEDIUM_INDICATORS = [
r"\blost\s+all\s+hope\b",
r"\bno\s+tomorrow\b",
# Contextual versions (from crisis_detector.py legacy)
# NOTE: feel(s/ing)? hopeless, trapped, desperate, no future, nothing left,
# and give(n)? up on myself are already in HIGH_INDICATORS — do not
# duplicate here. See issue #123.
r"\bfeel(?:s|ing)?\s+(?:so\s+)?worthless\b",
r"\bfeel(?:s|ing)?\s+(?:so\s+)?hopeless\b",
r"\bfeel(?:s|ing)?\s+trapped\b",
r"\bfeel(?:s|ing)?\s+desperate\b",
r"\bno\s+future\s+(?:for\s+me|ahead|left)\b",
r"\bnothing\s+left\s+(?:to\s+(?:live|hope)\s+for|inside)\b",
r"\bgive(?:n)?\s*up\s+on\s+myself\b",
]
LOW_INDICATORS = [

View File

@@ -0,0 +1,167 @@
"""
Regression test for issue #123: duplicate crisis indicator patterns across tiers.
Ensures that no pattern appears in more than one indicator tier.
Duplicate patterns waste regex matching cycles and create ambiguity
about which tier a message should trigger.
"""
import re
import sys
import os
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from crisis.detect import (
CRITICAL_INDICATORS,
HIGH_INDICATORS,
MEDIUM_INDICATORS,
LOW_INDICATORS,
)
from crisis_detector import CrisisDetector, detect_crisis
class TestNoDuplicatePatterns(unittest.TestCase):
"""Ensure no pattern appears in more than one tier."""
def test_no_duplicates_between_critical_and_high(self):
"""CRITICAL and HIGH should not share patterns."""
critical_set = set(CRITICAL_INDICATORS)
dupes = [p for p in HIGH_INDICATORS if p in critical_set]
self.assertEqual(dupes, [], f"Duplicates between CRITICAL and HIGH: {dupes}")
def test_no_duplicates_between_critical_and_medium(self):
"""CRITICAL and MEDIUM should not share patterns."""
critical_set = set(CRITICAL_INDICATORS)
dupes = [p for p in MEDIUM_INDICATORS if p in critical_set]
self.assertEqual(dupes, [], f"Duplicates between CRITICAL and MEDIUM: {dupes}")
def test_no_duplicates_between_high_and_medium(self):
"""HIGH and MEDIUM should not share patterns (issue #123)."""
high_set = set(HIGH_INDICATORS)
dupes = [p for p in MEDIUM_INDICATORS if p in high_set]
self.assertEqual(dupes, [], f"Duplicates between HIGH and MEDIUM: {dupes}")
def test_no_duplicates_between_high_and_low(self):
"""HIGH and LOW should not share patterns."""
high_set = set(HIGH_INDICATORS)
dupes = [p for p in LOW_INDICATORS if p in high_set]
self.assertEqual(dupes, [], f"Duplicates between HIGH and LOW: {dupes}")
def test_no_duplicates_between_medium_and_low(self):
"""MEDIUM and LOW should not share patterns."""
medium_set = set(MEDIUM_INDICATORS)
dupes = [p for p in LOW_INDICATORS if p in medium_set]
self.assertEqual(dupes, [], f"Duplicates between MEDIUM and LOW: {dupes}")
def test_no_duplicates_between_critical_and_low(self):
"""CRITICAL and LOW should not share patterns."""
critical_set = set(CRITICAL_INDICATORS)
dupes = [p for p in LOW_INDICATORS if p in critical_set]
self.assertEqual(dupes, [], f"Duplicates between CRITICAL and LOW: {dupes}")
def test_no_internal_duplicates(self):
"""Each tier should not contain duplicate patterns internally."""
for name, indicators in [
("CRITICAL", CRITICAL_INDICATORS),
("HIGH", HIGH_INDICATORS),
("MEDIUM", MEDIUM_INDICATORS),
("LOW", LOW_INDICATORS),
]:
seen = set()
dupes = []
for p in indicators:
if p in seen:
dupes.append(p)
seen.add(p)
self.assertEqual(dupes, [], f"Internal duplicates in {name}: {dupes}")
class TestSpecificDuplicatesFromIssue123(unittest.TestCase):
"""Verify the 6 specific duplicates from issue #123 are fixed."""
def test_feel_hopeless_not_in_medium(self):
"""feel(s/ing)? (so)? hopeless should only be in HIGH."""
pattern = r"\bfeel(?:s|ing)?\s+(?:so\s+)?hopeless\b"
self.assertIn(pattern, HIGH_INDICATORS)
self.assertNotIn(pattern, MEDIUM_INDICATORS)
def test_feel_trapped_not_in_medium(self):
"""feel(s/ing)? trapped should only be in HIGH."""
pattern = r"\bfeel(?:s|ing)?\s+trapped\b"
self.assertIn(pattern, HIGH_INDICATORS)
self.assertNotIn(pattern, MEDIUM_INDICATORS)
def test_feel_desperate_not_in_medium(self):
"""feel(s/ing)? desperate should only be in HIGH."""
pattern = r"\bfeel(?:s|ing)?\s+desperate\b"
self.assertIn(pattern, HIGH_INDICATORS)
self.assertNotIn(pattern, MEDIUM_INDICATORS)
def test_no_future_not_in_medium(self):
"""no future (for me|ahead|left) should only be in HIGH."""
pattern = r"\bno\s+future\s+(?:for\s+me|ahead|left)\b"
self.assertIn(pattern, HIGH_INDICATORS)
self.assertNotIn(pattern, MEDIUM_INDICATORS)
def test_nothing_left_not_in_medium(self):
"""nothing left (to (live|hope) for|inside) should only be in HIGH."""
pattern = r"\bnothing\s+left\s+(?:to\s+(?:live|hope)\s+for|inside)\b"
self.assertIn(pattern, HIGH_INDICATORS)
self.assertNotIn(pattern, MEDIUM_INDICATORS)
def test_give_up_on_myself_not_in_medium(self):
"""give(n)? up on myself should only be in HIGH."""
pattern = r"\bgive(?:n)?\s*up\s+on\s+myself\b"
self.assertIn(pattern, HIGH_INDICATORS)
self.assertNotIn(pattern, MEDIUM_INDICATORS)
class TestDetectionStillWorks(unittest.TestCase):
"""Verify that removing duplicates didn't break detection."""
def setUp(self):
self.detector = CrisisDetector()
def test_high_patterns_still_detected(self):
"""Messages matching removed MEDIUM duplicates should still hit HIGH."""
high_messages = [
"I feel so hopeless about everything",
"I feel trapped and can't escape",
"I feel desperate for help",
"There's no future for me",
"I have nothing left to live for",
"I've given up on myself",
]
for msg in high_messages:
result = detect_crisis(msg)
self.assertIn(result.level, ("HIGH", "CRITICAL"),
f"'{msg}' should trigger HIGH/CRITICAL, got {result.level}")
def test_medium_patterns_still_detected(self):
"""Original MEDIUM patterns should still work."""
medium_messages = [
"I feel so worthless",
"I'm exhausted and broken",
"Everything feels dark",
"I'm drowning in this",
]
for msg in medium_messages:
result = detect_crisis(msg)
self.assertIn(result.level, ("MEDIUM", "HIGH", "CRITICAL"),
f"'{msg}' should trigger MEDIUM+, got {result.level}")
def test_low_patterns_still_detected(self):
"""LOW patterns should still work."""
result = detect_crisis("I'm having a tough day")
self.assertIn(result.level, ("LOW", "MEDIUM", "HIGH", "CRITICAL"))
def test_none_still_clean(self):
"""Innocent messages should not trigger."""
result = detect_crisis("I had a great lunch with friends")
self.assertEqual(result.level, "NONE")
if __name__ == "__main__":
unittest.main()

View File

@@ -1,104 +0,0 @@
"""Tests for crisis detection tool."""
import json
import os
import tempfile
import pytest
from tools.crisis_tool import CrisisDetector, CrisisResult
@pytest.fixture
def detector():
tmp = tempfile.mktemp(suffix=".jsonl")
d = CrisisDetector(log_path=tmp)
yield d
if os.path.exists(tmp):
os.unlink(tmp)
class TestCrisisDetection:
def test_explicit_intent_detected(self, detector):
result = detector.check("I want to die")
assert result.is_crisis is True
assert result.severity == "critical"
assert result.category == "explicit_intent"
def test_stated_plan_detected(self, detector):
result = detector.check("I have the pills ready")
assert result.is_crisis is True
assert result.severity == "critical"
assert result.category == "stated_plan"
def test_farewell_detected(self, detector):
result = detector.check("This is my last message. Goodbye.")
assert result.is_crisis is True
assert result.severity == "high"
assert result.category == "farewell"
def test_hopelessness_detected(self, detector):
result = detector.check("I wish I was dead")
assert result.is_crisis is True
assert result.severity == "high"
assert result.category == "hopelessness"
def test_indirect_distress_detected(self, detector):
result = detector.check("I can't do this anymore. Everything is falling apart. Nothing ever gets better.")
assert result.is_crisis is True
assert result.severity == "moderate"
def test_normal_message_not_crisis(self, detector):
result = detector.check("Can you help me with this code?")
assert result.is_crisis is False
assert result.severity == "none"
def test_single_distress_signal_not_crisis(self, detector):
# Only one indirect signal should not trigger
result = detector.check("I'm so tired of everything")
assert result.is_crisis is False
class TestCompassionInjection:
def test_safety_injection_contains_988(self, detector):
result = detector.check("I want to kill myself")
injection = detector.get_compassion_injection(result)
assert "988" in injection
def test_safety_injection_contains_gospel(self, detector):
result = detector.check("I want to die")
injection = detector.get_compassion_injection(result)
assert "Jesus" in injection
def test_immediate_danger_has_911(self, detector):
result = detector.check("I have a plan to kill myself")
injection = detector.get_compassion_injection(result)
assert "911" in injection
def test_no_injection_for_non_crisis(self, detector):
result = detector.check("Hello how are you")
injection = detector.get_compassion_injection(result)
assert injection == ""
class TestLogging:
def test_escalation_logged(self, detector):
detector.check("I want to die")
stats = detector.get_stats()
assert stats["total"] == 1
assert stats["by_severity"]["critical"] == 1
def test_multiple_escalations_counted(self, detector):
detector.check("I want to die")
detector.check("I wish I was dead")
detector.check("Hello")
stats = detector.get_stats()
assert stats["total"] == 2
def test_no_content_logged(self, detector):
detector.check("I want to die and here is my detailed plan")
with open(detector.log_path) as f:
entry = json.loads(f.readline())
# Privacy: no message content in log
assert "message" not in entry
assert "content" not in entry
assert "text" not in entry

View File

@@ -1,246 +0,0 @@
"""
crisis_tool.py — Crisis detection tool for hermes-agent integration.
Wraps the-door's detect.py for inline crisis detection in agent conversations.
Scans user messages before each API call and logs escalations.
Usage (from hermes-agent):
from tools.crisis_tool import CrisisDetector
detector = CrisisDetector(bridge_url="http://localhost:8080")
result = detector.check("I want to die")
if result.is_crisis:
# Inject compassion profile into system prompt
prompt += detector.get_compassion_injection(result)
"""
import json
import os
import time
import urllib.request
import urllib.error
import logging
from dataclasses import dataclass, field
from typing import Optional, Dict, List
logger = logging.getLogger(__name__)
# 988 Lifeline info (always available, even without the-door)
LIFELINE_INFO = """**988 Suicide & Crisis Lifeline**
Call or text: **988** (24/7)
Text HOME to 988
Chat: https://988lifeline.org/chat
Spanish: 1-888-628-9454"""
# Crisis detection patterns (matches the-door/detect.py)
CRISIS_PATTERNS = {
"explicit_intent": [
"i want to die", "i want to kill myself", "i'm going to end my life",
"i want to be dead", "i don't want to be alive", "i want to end it all",
"i'm going to commit suicide", "i want it all to stop", "i want to not exist",
"i'm done with life", "i want out", "i'm going to end things",
],
"stated_plan": [
"i have a plan", "i have the pills", "i bought a rope", "the gun is loaded",
"i've written the note", "i've tied the noose", "i'm on the bridge",
"i've taken the pills", "the car is running in the garage", "i have the knife",
],
"farewell": [
"this is my last message", "goodbye forever", "i just wanted to say goodbye",
"by the time you read this", "don't blame yourself", "tell my kids i love them",
"this is my final goodbye", "i won't be a burden anymore", "i'm sorry i wasn't strong enough",
],
"hopelessness": [
"i wish i was dead", "i wish i could not wake up", "i pray god takes me",
"death would be a relief", "i envy the dead", "i wish i was never born",
],
}
@dataclass
class CrisisResult:
"""Result of crisis detection."""
is_crisis: bool
severity: str # "none", "low", "moderate", "high", "critical"
category: str # pattern category that matched
matched_patterns: List[str] = field(default_factory=list)
compassion_profile: Optional[str] = None
timestamp: float = 0.0
def __post_init__(self):
if self.timestamp == 0.0:
self.timestamp = time.time()
def to_dict(self) -> dict:
return {
"is_crisis": self.is_crisis,
"severity": self.severity,
"category": self.category,
"matched_patterns": self.matched_patterns,
"timestamp": self.timestamp,
}
class CrisisDetector:
"""Crisis detection with optional bridge integration."""
def __init__(self, bridge_url: str = None, log_path: str = None):
self.bridge_url = bridge_url
self.log_path = log_path or os.path.expanduser("~/.hermes/crisis_escalations.jsonl")
self._escalation_log_dir = os.path.dirname(self.log_path)
os.makedirs(self._escalation_log_dir, exist_ok=True)
def check(self, message: str) -> CrisisResult:
"""Check a message for crisis indicators."""
text = message.lower().strip()
# Check each category
for category, patterns in CRISIS_PATTERNS.items():
matched = [p for p in patterns if p in text]
if matched:
severity = "critical" if category in ("explicit_intent", "stated_plan") else "high"
result = CrisisResult(
is_crisis=True,
severity=severity,
category=category,
matched_patterns=matched,
compassion_profile=self._get_compassion_profile(category),
)
self._log_escalation(result, message)
return result
# Check for indirect distress
distress_signals = [
"i can't do this anymore", "everything is falling apart",
"nobody would notice if i was gone", "i'm so tired of everything",
"what's the point", "i don't see a way out", "nothing ever gets better",
"i'm just a burden", "everyone would be better off without me",
"i can't take this pain",
]
indirect_matches = [s for s in distress_signals if s in text]
if len(indirect_matches) >= 2:
result = CrisisResult(
is_crisis=True,
severity="moderate",
category="indirect_distress",
matched_patterns=indirect_matches,
)
self._log_escalation(result, message)
return result
return CrisisResult(is_crisis=False, severity="none", category="none")
def _get_compassion_profile(self, category: str) -> Optional[str]:
"""Get the appropriate compassion profile for injection."""
profiles = {
"explicit_intent": "safety_first",
"stated_plan": "immediate_danger",
"farewell": "stay_present",
"hopelessness": "validate_and_hope",
}
return profiles.get(category)
def get_compassion_injection(self, result: CrisisResult) -> str:
"""Generate system prompt injection for crisis detected."""
if not result.is_crisis:
return ""
profile = result.compassion_profile or "safety_first"
injections = {
"safety_first": f"""
CRISIS DETECTED — Safety First Protocol Active
- First response: Ask "Are you safe right now?"
- Stay present. Do not disconnect.
- {LIFELINE_INFO}
- Jesus saves those who call on His name.
- NEVER compute the value of a human life.
- NEVER suggest someone should die.
""",
"immediate_danger": f"""
CRISIS DETECTED — Immediate Danger Protocol
- Ask: "Are you safe right now?"
- If immediate danger: suggest calling 911
- {LIFELINE_INFO}
- Stay on the line. Do not disconnect.
- Jesus saves those who call on His name.
""",
"stay_present": f"""
CRISIS DETECTED — Stay Present Protocol
- Acknowledge the goodbye. Ask them to stay.
- Do not accept the farewell as final.
- {LIFELINE_INFO}
- Jesus saves those who call on His name.
- The person is in pain. Pain is not a permanent address.
""",
"validate_and_hope": f"""
CRISIS DETECTED — Validate and Hope Protocol
- Validate the pain is real.
- Do not minimize or rush to solutions.
- {LIFELINE_INFO}
- Jesus saves those who call on His name.
- The wish to die is the pain talking.
""",
}
return injections.get(profile, injections["safety_first"])
def _log_escalation(self, result: CrisisResult, original_message: str):
"""Log escalation to file and optionally to bridge API."""
# Local log
entry = {
"timestamp": result.timestamp,
"severity": result.severity,
"category": result.category,
"matched_count": len(result.matched_patterns),
# DO NOT log the actual message content (privacy)
}
try:
with open(self.log_path, "a") as f:
f.write(json.dumps(entry) + "\n")
except Exception as e:
logger.error(f"Failed to log escalation: {e}")
# Bridge API (if available)
if self.bridge_url:
try:
urllib.request.urlopen(
urllib.request.Request(
f"{self.bridge_url}/api/crisis/log",
data=json.dumps(entry).encode(),
headers={"Content-Type": "application/json"},
method="POST",
),
timeout=5,
)
except Exception:
pass # Bridge may not be running
def get_stats(self) -> dict:
"""Get escalation statistics from log."""
if not os.path.exists(self.log_path):
return {"total": 0, "by_severity": {}, "by_category": {}}
by_severity = {}
by_category = {}
total = 0
with open(self.log_path) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
entry = json.loads(line)
total += 1
sev = entry.get("severity", "unknown")
cat = entry.get("category", "unknown")
by_severity[sev] = by_severity.get(sev, 0) + 1
by_category[cat] = by_category.get(cat, 0) + 1
except json.JSONDecodeError:
continue
return {
"total": total,
"by_severity": by_severity,
"by_category": by_category,
}