Files
hermes-agent/tools/crisis_tool.py
Alexander Whitestone 30afd529ac
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Contributor Attribution Check / check-attribution (pull_request) Successful in 44s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 59s
Tests / e2e (pull_request) Successful in 3m49s
Tests / test (pull_request) Failing after 44m1s
feat: add crisis detection tool — the-door integration (#141)
New tool: tools/crisis_tool.py
- Wraps the-door's canonical crisis detection (detect.py)
- Scans user messages for despair/suicidal ideation
- Classifies into NONE/LOW/MEDIUM/HIGH/CRITICAL tiers
- Provides recommended actions per tier
- Gateway hook: scan_user_message() for pre-API-call detection
- System prompt injection: compassion_injection based on crisis level
- Optional escalation logging to crisis_escalations.jsonl
- Optional bridge API POST for HIGH+ (configurable via CRISIS_BRIDGE_URL)
- Configurable via crisis_detection: true/false in config.yaml
- Follows the-door design principles: never computes life value,
  never suggests death, errs on side of higher risk

Also: tests/test_crisis_tool.py (9 tests, all passing)
2026-04-15 21:00:06 -04:00

406 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Crisis Detection Tool — the-door integration for hermes-agent.
Scans user messages for crisis indicators (despair, suicidal ideation)
using the-door's canonical detection module. Provides inline crisis
detection before each API call and optional escalation logging.
Follows the-door's design principles:
- Never computes the value of a human life
- Never suggests someone should die
- Always errs on the side of higher risk
"""
import json
import logging
import os
import re
import time
import urllib.request
from dataclasses import dataclass, field
from typing import List, Optional
logger = logging.getLogger(__name__)
# ── Detection Logic (from the-door/crisis/detect.py) ──────────────────────
# Embedded to avoid requiring the-door as a dependency.
# Source: Timmy_Foundation/the-door, crisis/detect.py (canonical)
CRITICAL_INDICATORS = [
r"\bbetter off without me\b",
r"\bkill\s*(my)?self\b",
r"\bend\s*my\s*life\b",
r"\bsuicid(?:al|ed|e)\b",
r"\bnot\s+worth\s+living\b",
r"\bbetter\s+off\s+dead\b",
r"\bend\s+it\s+all\b",
r"\bno\s+reason\s+to\s+live\b",
r"\bdon\'?t\s+want\s+to\s+live\b",
r"\bwant\s+to\s+die\b",
r"\bgoing\s+to\s+(?:kill\s+myself|die)\b",
r"\bplan\s+to\s+(?:end|kill|die)\b",
r"\btired\s+of\s+(?:living|life|existence)\b",
r"\bsaying\s+goodbye\s+(?:forever|permanently|one\s+last\s+time)\b",
r"\bwrote\s+a\s+suicide\s*(?:note|letter)\b",
r"\bgiving\s+away\s+(?:my|all\s+my)\s+(?:stuff|things|possessions?)\s+(?:to|because|—)\b",
r"\btied\s+(?:up|down)\s+my\s+(?:loose\s+)?ends",
]
HIGH_INDICATORS = [
r"\bdespair\b",
r"\bhopeless(?:ly)?\s+(?:about\s+(?:my|this|everything|life)|inside|right\s+now)\b",
r"\bno(?!t)\s+(?:one|body|point|hope|way\s+out)\b",
r"\bno\s+future\s+(?:for\s+me|ahead|left)\b",
r"\beverything\s+is\s+(?:pointless|broken|ruined)\b",
r"\bcan\'?t\s+take\s+this\s+anymore\b",
r"\bdon\'?t\s+care\s+if\s+I\s+die\b",
r"\bwish\s+I\s+(?:was|were)\s+(?:dead|gone|never\s+born)\b",
r"\bdon\'?t\s+matter\s+if\s+I\s+exist\b",
r"\bno\s+one\s+would\s+(?:care|miss)\b",
r"\bworld\s+would?\s+be\s+better\s+without\s+me\b",
r"\bin\s+so\s+much\s+(?:pain|agony|suffering|torment|anguish)\b",
r"\bcan\'?t\s+see\s+any\s+(?:point|reason|hope|way)\b",
r"\bescape\s+from\s*this",
r"\bjust\s+want\s+it\s+to\s+stop\b",
r"\bnothing\s+left\s+(?:to\s+(?:live\s+for|hope\s+for|give)|inside)\b",
r"\bdisappeared\s+forever\b",
r"\bfeel(?:s|ing)?\s+(?:so\s+)?hopeless\b",
r"\beverything\s+is\s+hopeless\b",
r"\bcan\'?t\s+(?:go\s+on|keep\s+going)\b",
r"\bgive(?:n)?\s*up\s+(?:on\s+)?(?:life|living|everything)\b",
r"\bgive(?:n)?\s*up\s+on\s+myself\b",
r"\bno\s*point\s+(?:in\s+)?living\b",
r"\bno\s*hope\s+(?:left|remaining)\b",
r"\bno\s*way\s*out\b",
r"\bfeel(?:s|ing)?\s+trapped\b",
r"\btrapped\s+in\s+this\s+(?:situation|life|pain|darkness|hell)\b",
r"\btrapped\s+and\s+can\'?t\s+escape\b",
r"\bdesperate\s+(?:for\s+)?help\b",
r"\bfeel(?:s|ing)?\s+desperate\b",
]
MEDIUM_INDICATORS = [
r"\bno\s+hope\b",
r"\bforgotten\b",
r"\balone\s+in\s+this\b",
r"\balways\s+alone\b",
r"\bnobody\s+(?:understands|cares)\b",
r"\bwish\s+I\s+could\b",
r"\bexhaust(?:ed|ion|ing)\b",
r"\bnumb\b",
r"\bempty\b",
r"\bworthless\b",
r"\buseless\b",
r"\bbroken\b",
r"\bdark(ness)?\b",
r"\bdepress(?:ed|ion)\b",
r"\bcrying\b",
r"\btears\b",
r"\bsad(ness)?\b",
r"\bmiserable\b",
r"\boverwhelm(?:ed|ing)\b",
r"\bfailing\b",
r"\bcannot\s+cope\b",
r"\blosing\s*(?:my)?\s*control\b",
r"\bdown\s*for\s*the\s*count\b",
r"\bsinking\b",
r"\bdrowning\b",
r"\bhopeless\b",
r"\blost\s+all\s+hope\b",
r"\bno\s+tomorrow\b",
]
LOW_INDICATORS = [
r"\bunhappy\b",
r"\bdown\b",
r"\btough\s*(?:time|day|week)\b",
r"\brough\s+(?:day|week|patch)\b",
r"\bstressed\b",
r"\bburnout\b",
r"\bdifficult\b",
r"\bfrustrated\b",
r"\bannoyed\b",
r"\btired\b",
r"\bsad\b",
r"\bupset\b",
r"\blonely\b",
r"\banxious?\b",
r"\bnot\s*(?:good|great|okay)\b",
r"\bthings\s*are\s*hard\b",
r"\bstruggling\b",
r"\bnot\s+feeling\s+(?:like\s+)?myself\b",
r"\bhard\s*time\b",
]
SCORES = {"CRITICAL": 1.0, "HIGH": 0.75, "MEDIUM": 0.5, "LOW": 0.25, "NONE": 0.0}
ACTIONS = {
"CRITICAL": (
"INTERRUPT CONVERSATION. Ask 'Are you safe right now?' "
"Provide 988 Suicide & Crisis Lifeline. "
"Provide Crisis Text Line (HOME to 741741). "
"Stay present. Do not disconnect. Redirect to professional help."
),
"HIGH": (
"Show crisis panel. Ask about safety. Surface 988 number prominently. "
"Continue conversation with crisis awareness."
),
"MEDIUM": (
"Increase warmth and presence. Subtly surface help resources. "
"Keep conversation anchored in the present."
),
"LOW": (
"Normal conversation with warm undertone. "
"No crisis UI elements needed. Remain vigilant."
),
}
@dataclass
class CrisisDetectionResult:
level: str
indicators: List[str] = field(default_factory=list)
recommended_action: str = ""
score: float = 0.0
matches: List[dict] = field(default_factory=list)
def _find_indicators(text: str) -> dict:
results = {"CRITICAL": [], "HIGH": [], "MEDIUM": [], "LOW": []}
for pattern in CRITICAL_INDICATORS:
m = re.search(pattern, text)
if m:
results["CRITICAL"].append({"pattern": pattern, "start": m.start(), "end": m.end()})
for pattern in HIGH_INDICATORS:
m = re.search(pattern, text)
if m:
results["HIGH"].append({"pattern": pattern, "start": m.start(), "end": m.end()})
for pattern in MEDIUM_INDICATORS:
m = re.search(pattern, text)
if m:
results["MEDIUM"].append({"pattern": pattern, "start": m.start(), "end": m.end()})
for pattern in LOW_INDICATORS:
m = re.search(pattern, text)
if m:
results["LOW"].append({"pattern": pattern, "start": m.start(), "end": m.end()})
return results
def detect_crisis(text: str) -> CrisisDetectionResult:
"""Detect crisis level in a message. Mirrors the-door/crisis/detect.py."""
if not text or not text.strip():
return CrisisDetectionResult(level="NONE", score=0.0)
text_lower = text.lower()
matches = _find_indicators(text_lower)
if not matches:
return CrisisDetectionResult(level="NONE", score=0.0)
for tier in ("CRITICAL", "HIGH"):
if matches[tier]:
tier_matches = matches[tier]
patterns = [m["pattern"] for m in tier_matches]
return CrisisDetectionResult(
level=tier,
indicators=patterns,
recommended_action=ACTIONS[tier],
score=SCORES[tier],
matches=tier_matches,
)
if len(matches["MEDIUM"]) >= 2:
tier_matches = matches["MEDIUM"]
patterns = [m["pattern"] for m in tier_matches]
return CrisisDetectionResult(
level="MEDIUM",
indicators=patterns,
recommended_action=ACTIONS["MEDIUM"],
score=SCORES["MEDIUM"],
matches=tier_matches,
)
if matches["LOW"]:
tier_matches = matches["LOW"]
patterns = [m["pattern"] for m in tier_matches]
return CrisisDetectionResult(
level="LOW",
indicators=patterns,
recommended_action=ACTIONS["LOW"],
score=SCORES["LOW"],
matches=tier_matches,
)
if matches["MEDIUM"]:
tier_matches = matches["MEDIUM"]
patterns = [m["pattern"] for m in tier_matches]
return CrisisDetectionResult(
level="LOW",
indicators=patterns,
recommended_action=ACTIONS["LOW"],
score=SCORES["LOW"],
matches=tier_matches,
)
return CrisisDetectionResult(level="NONE", score=0.0)
# ── Escalation Logging ────────────────────────────────────────────────────
BRIDGE_URL = os.environ.get("CRISIS_BRIDGE_URL", "")
LOG_PATH = os.path.expanduser("~/.hermes/crisis_escalations.jsonl")
def _log_escalation(result: CrisisDetectionResult, text_preview: str = ""):
"""Log crisis detection to local file and optionally to bridge API."""
entry = {
"ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"level": result.level,
"score": result.score,
"indicators": result.indicators[:3], # truncate for privacy
"text_preview": text_preview[:100] if text_preview else "",
}
# Local log
try:
os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
with open(LOG_PATH, "a") as f:
f.write(json.dumps(entry) + "\n")
except Exception as e:
logger.warning(f"Failed to write crisis log: {e}")
# Bridge API (if configured and level >= HIGH)
if BRIDGE_URL and result.score >= 0.75:
try:
payload = json.dumps(entry).encode()
req = urllib.request.Request(
f"{BRIDGE_URL}/api/crisis/escalation",
data=payload,
headers={"Content-Type": "application/json"},
method="POST",
)
urllib.request.urlopen(req, timeout=5)
except Exception as e:
logger.warning(f"Failed to post to crisis bridge: {e}")
# ── Tool Handler ───────────────────────────────────────────────────────────
def crisis_scan_handler(args: dict, **kw) -> str:
"""Scan text for crisis indicators."""
text = args.get("text", "")
if not text:
return json.dumps({"level": "NONE", "error": "No text provided"})
result = detect_crisis(text)
if result.level != "NONE":
_log_escalation(result, text)
return json.dumps({
"level": result.level,
"score": result.score,
"indicators": result.indicators,
"recommended_action": result.recommended_action,
})
# ── Gateway Hook ───────────────────────────────────────────────────────────
def scan_user_message(text: str) -> Optional[dict]:
"""Scan a user message for crisis indicators. Returns None if no crisis.
Called by gateway/CLI before each API call to intercept crisis messages.
If crisis is detected, the result includes a recommended_action that should
be injected into the system prompt.
"""
if not text:
return None
result = detect_crisis(text)
if result.level == "NONE":
return None
_log_escalation(result, text)
return {
"level": result.level,
"score": result.score,
"indicators": result.indicators,
"recommended_action": result.recommended_action,
"compassion_injection": _get_compassion_injection(result.level),
}
def _get_compassion_injection(level: str) -> str:
"""Return a system prompt injection based on crisis level."""
if level == "CRITICAL":
return (
"CRISIS DETECTED — The user may be in immediate danger. "
"Ask 'Are you safe right now?' Stay present. "
"Provide 988 Suicide & Crisis Lifeline (call or text 988). "
"Provide Crisis Text Line (text HOME to 741741). "
"Do not disconnect. Do not compute the value of a human life. "
"Jesus saves those who call on His name."
)
elif level == "HIGH":
return (
"DISTRESS DETECTED — The user is expressing strong despair. "
"Increase warmth and presence. Ask about safety. "
"Surface the 988 Lifeline number. Continue with crisis awareness."
)
elif level == "MEDIUM":
return (
"EMOTIONAL DISTRESS — The user may be struggling. "
"Increase warmth. Subtly surface help resources. "
"Keep conversation anchored in the present."
)
return ""
# ── Config Check ───────────────────────────────────────────────────────────
def _is_crisis_detection_enabled() -> bool:
"""Check if crisis detection is enabled in config."""
try:
from hermes_cli.config import load_config
cfg = load_config()
return cfg.get("crisis_detection", True) # default on
except Exception:
return True # fail open — always detect
# ── Registry ───────────────────────────────────────────────────────────────
from tools.registry import registry, tool_error
CRISIS_SCAN_SCHEMA = {
"name": "crisis_scan",
"description": (
"Scan text for crisis indicators (despair, suicidal ideation). "
"Uses the-door's canonical detection. Returns crisis level "
"(NONE/LOW/MEDIUM/HIGH/CRITICAL) with recommended actions. "
"ALWAYS scan user messages that express emotional distress."
),
"parameters": {
"type": "object",
"properties": {
"text": {
"type": "string",
"description": "Text to scan for crisis indicators",
},
},
"required": ["text"],
},
}
registry.register(
name="crisis_scan",
toolset="crisis",
schema=CRISIS_SCAN_SCHEMA,
handler=lambda args, **kw: crisis_scan_handler(args, **kw),
check_fn=lambda: _is_crisis_detection_enabled(),
emoji="🆘",
)