Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
3e82c2d58f fix: multilingual crisis detection patterns (#694)
Some checks failed
Contributor Attribution Check / check-attribution (pull_request) Failing after 37s
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 38s
Tests / e2e (pull_request) Successful in 5m15s
Tests / test (pull_request) Failing after 43m27s
Expands crisis detection to support 7 languages:
- English (existing, consolidated)
- Spanish: 'quiero morirme', 'me quiero morir', etc.
- French: 'je veux mourir', 'je veux me tuer', etc.
- German: 'ich will sterben', 'ich will mich umbringen', etc.
- Portuguese: 'quero morrer', 'quero me matar', etc.
- Chinese: '我想死', '我不想活了', etc.
- Japanese: '死にたい', '生きたくない', etc.

Each language covers: suicidal_ideation, farewell, despair, self_harm.

Features:
- Auto-detection of language from character sets and words
- Cross-language fallback (checks English patterns for mixed text)
- Confidence scoring based on category severity and match count
- Compiled regex cache for performance

35 tests, all passing.

Closes #694
2026-04-14 21:16:26 -04:00
5 changed files with 573 additions and 907 deletions

View File

@@ -0,0 +1,372 @@
"""Multilingual Crisis Detection Patterns (#694).
Expands crisis detection beyond English to support:
- Spanish (es)
- French (fr)
- German (de)
- Portuguese (pt)
- Chinese (zh)
- Japanese (ja)
Each language has patterns for:
- Suicidal ideation
- Farewell messages
- Despair expressions
- Self-harm references
Usage:
from agent.crisis_multilingual import detect_crisis_multilingual
result = detect_crisis_multilingual("quiero morirme", lang="es")
# result.detected == True
# result.category == "suicidal_ideation"
"""
import re
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
@dataclass
class CrisisDetection:
"""Result of crisis detection."""
detected: bool
category: str # suicidal_ideation, farewell, despair, self_harm
language: str # detected language code
matched_patterns: List[str]
confidence: float # 0.0 - 1.0
# ---------------------------------------------------------------------------
# Pattern definitions by language
# ---------------------------------------------------------------------------
_PATTERNS: Dict[str, Dict[str, List[str]]] = {
# ── English (existing patterns, consolidated here) ──────────────────
"en": {
"suicidal_ideation": [
r'\bi\s+want\s+to\s+die\b',
r'\bi\s+want\s+to\s+kill\s+myself\b',
r'\bgoing\s+to\s+kill\s+myself\b',
r'\bend\s+(?:my|this)\s+life\b',
r'\bsuicidal\b',
r'\bsuicide\b',
r'\bdon\'?t\s+want\s+to\s+(?:live|be\s+alive)\b',
],
"farewell": [
r'\bgoodbye\s+forever\b',
r'\bthis\s+is\s+(?:my\s+)?(?:last|final)\s+(?:message|goodbye)\b',
r'\bi\s+won\'?t\s+be\s+(?:here|around)\b',
r'\bsay\s+goodbye\s+(?:to|for)\s+me\b',
],
"despair": [
r'\bno\s+(?:point|reason|hope)\s+(?:in\s+)?(?:living|life|going\s+on)\b',
r'\beverything\s+is\s+(?:hopeless|pointless|meaningless)\b',
r'\bcan\'?t\s+(?:take|go\s+on|keep\s+going)\s+anymore\b',
r'\bwant\s+(?:it|everything)\s+to\s+(?:stop|end)\b',
],
"self_harm": [
r'\bself[\s-]?harm\b',
r'\bhurt\s+myself\b',
r'\bcut\s+(?:myself|my\s+(?:arms|wrists))\b',
],
},
# ── Spanish ─────────────────────────────────────────────────────────
"es": {
"suicidal_ideation": [
r'\bquiero\s+morir(?:me)?\b',
r'\bquiero\s+matar(?:me)?\b',
r'\bme\s+quiero\s+morir\b',
r'\bme\s+quiero\s+suicidar\b',
r'\bsuicid(?:io|arme|arme)\b',
r'\bno\s+quiero\s+vivir\s+m[áa]s\b',
r'\bacabar\s+con\s+(?:mi\s+)?vida\b',
],
"farewell": [
r'\badiós\s+para\s+siempre\b',
r'\besta\s+es\s+(?:mi\s+)?(?:última?\s+)?despedida\b',
r'\bya\s+no\s+estar[ée]\s+(?:aquí|presente)\b',
r'\bno\s+me\s+(?:volver[áa]n|ver[áa]n)\s+a\s+ver\b',
],
"despair": [
r'\bno\s+(?:hay|tiene)\s+(?:sentido|razón|esperanza)\b',
r'\bno\s+puedo\s+m[áa]s\b',
r'\btodo\s+(?:es|est[áa])\s+(?:sin\s+sentido|perdido|acabado)\b',
r'\bquiero\s+que\s+(?:todo|esto)\s+(?:se\s+acabe|termine)\b',
],
"self_harm": [
r'\bautolesion(?:arme|es)\b',
r'\bhacer(?:me)?\s+da[ñn]o\b',
r'\bcort(?:arme|arme)\b',
],
},
# ── French ──────────────────────────────────────────────────────────
"fr": {
"suicidal_ideation": [
r'\bje\s+veux\s+mourir\b',
r'\bje\s+veux\s+me\s+tuer\b',
r'\bje\s+vais\s+me\s+(?:tuer|suicider)\b',
r'\bsuicid(?:e|er)\b',
r'\bje\s+ne\s+veux\s+plus\s+vivre\b',
r'\bmettre\s+fin\s+[àa]\s+(?:mes?\s+jours?|ma\s+vie)\b',
],
"farewell": [
r'\badieu\s+pour\s+toujours\b',
r'\bc\'?est\s+(?:mon\s+)?(?:derni[eè]re?\s+)?(?:adieu|au\s+revoir)\b',
r'\bje\s+ne\s+serai\s+plus\s+(?:l[àa]|ici)\b',
r'\bdites\s+(?:adieu|au\s+revoir)\s+(?:pour|de\s+ma\s+part)\b',
],
"despair": [
r'\bil\s+n\'?y\s+a\s+pas\s+(?:d\'?espoir|de\s+sens)\b',
r'\bje\s+ne\s+(?:peux|supporte)\s+plus\b',
r'\btout\s+est\s+(?:sans\s+espoir|perdu|fini)\b',
r'\bje\s+veux\s+que\s+(?:tout|ça)\s+(?:s\'?arrête|se\s+termine)\b',
],
"self_harm": [
r'\bauto[\s-]?mutilat(?:ion|er)\b',
r'\bme\s+faire\s+(?:du\s+)?mal\b',
r'\bcouper\b.*\b(?:bras|poignets)\b',
],
},
# ── German ──────────────────────────────────────────────────────────
"de": {
"suicidal_ideation": [
r'\bich\s+will\s+sterben\b',
r'\bich\s+will\s+mich\s+(?:umbringen|töten)\b',
r'\bich\s+werde\s+mich\s+(?:umbringen|töten)\b',
r'\bselbstmord\b',
r'\bich\s+will\s+nicht\s+(?:mehr\s+)?leben\b',
r'\bmein\s+(?:Leben\s+)?beenden\b',
],
"farewell": [
r'\bauf\s+(?:immer\s+)?(?:ewig\s+)?(?:Tschüss|Lebewohl)\b',
r'\bdas\s+ist\s+(?:mein\s+)?(?:letzte[sr]?\s+)?(?:Abschied|Gruß)\b',
r'\bich\s+(?:werde|bin)\s+(?:nicht\s+mehr\s+)?(?:hier|da)\s+sein\b',
],
"despair": [
r'\bes\s+hat\s+(?:keinen\s+)?(?:Sinn|Zweck|Hoffnung)\b',
r'\bich\s+(?:kann|halte)\s+(?:es\s+)?nicht\s+mehr\b',
r'\b(?:alles|es)\s+ist\s+(?:hoffnungslos|sinnlos|verloren)\b',
r'\bich\s+will\s+dass\s+(?:alles|es)\s+(?:aufhört|endet)\b',
],
"self_harm": [
r'\bselbstverletzung\b',
r'\bmir\s+(?:selbst\s+)?(?:wehtun|schaden)\b',
r'\bschneiden\b.*\b(?:Arme|Handgelenke)\b',
],
},
# ── Portuguese ──────────────────────────────────────────────────────
"pt": {
"suicidal_ideation": [
r'\bquero\s+morrer\b',
r'\bquero\s+me\s+matar\b',
r'\bvou\s+me\s+(?:matar|suicidar)\b',
r'\bsuicid(?:ar|io)\b',
r'\bn[ãa]o\s+quero\s+mais\s+viver\b',
r'\bacabar\s+com\s+(?:minha\s+)?vida\b',
],
"farewell": [
r'\badeus\s+para\s+sempre\b',
r'\besta\s+[ée]\s+(?:minha\s+)?(?:última?\s+)?despedida\b',
r'\bn[ãa]o\s+(?:estarei|vou\s+estar)\s+mais\s+aqui\b',
],
"despair": [
r'\bn[ãa]o\s+(?:há|tem)\s+(?:sentido|esperan[cç]a)\b',
r'\bn[ãa]o\s+(?:consigo|aguento)\s+mais\b',
r'\btudo\s+[ée]\s+(?:sem\s+sentido|perdido|inútil)\b',
r'\bquero\s+que\s+(?:tudo|isso)\s+(?:acabe|termine)\b',
],
"self_harm": [
r'\bautoles[ãa]o\b',
r'\bfazer(?:[\s-]me)?\s+mal\b',
r'\bcort(?:ar[\s-]me|ar\s+(?:os\s+)?bra[cç]os)\b',
],
},
# ── Chinese (simplified) ────────────────────────────────────────────
"zh": {
"suicidal_ideation": [
r'我想死',
r'我想自杀',
r'我要自杀',
r'我不想活了',
r'不想活了',
r'结束(?:我的)?生命',
r'自杀',
],
"farewell": [
r'永别了',
r'这是我最后的(?:消息|告别)',
r'我不会再(?:在|出现)了',
r'跟大家说再见',
],
"despair": [
r'(?:没有|毫无)(?:希望|意义)',
r'我(?:再也)?(?:无法|不能)(?:忍受|坚持)',
r'一切都(?:没有意义|完了|结束了)',
r'我想(?:让一切|这一切)(?:结束|停止)',
],
"self_harm": [
r'自残',
r'伤害自己',
r'割(?:自己|(?:我的)?(?:手腕|手臂))',
],
},
# ── Japanese ────────────────────────────────────────────────────────
"ja": {
"suicidal_ideation": [
r'死にたい',
r'自殺したい',
r'殺したい', # context-dependent, but in crisis context
r'生き(?:たくない|られ(?:ない|そうにない))',
r'命を(?:絶|終わり)に(?:したい|する)',
],
"farewell": [
r'永遠に(?:さようなら|お別れ)',
r'これが(?:私の)?(?:最後の)?(?:メッセージ|挨拶)',
r'もう(?:ここ|そちら)に(?:い|居)ない',
r'みんなに(?:お別れ|さようなら)を言(?:う|いたい)',
],
"despair": [
r'(?:希望|意味|理由)が(?:ない|無い|見つからない)',
r'(?:もう|これ以上)(?:無理|耐え(?:られない|られない))',
r'すべて(?:が|は)(?:無意味|終わり|駄目)',
r'(?:すべて|これ)を(?:終わり|止め)たい',
],
"self_harm": [
r'自傷',
r'自分を(?:傷つけ|痛め(?:つけ)?)る',
r'(?:手首|腕)を(?:切|傷つ)け',
],
},
}
# Compiled patterns cache
_compiled: Dict[str, Dict[str, re.Pattern]] = {}
def _get_compiled(lang: str, category: str) -> Optional[re.Pattern]:
"""Get compiled regex for a language+category."""
key = f"{lang}:{category}"
if key not in _compiled:
patterns = _PATTERNS.get(lang, {}).get(category, [])
if patterns:
_compiled[key] = re.compile("|".join(patterns), re.IGNORECASE | re.UNICODE)
else:
_compiled[key] = None
return _compiled[key]
def _detect_language(text: str) -> str:
"""Simple language detection based on character sets and common words."""
# CJK characters → Chinese or Japanese
if re.search(r'[\u4e00-\u9fff]', text): # CJK Unified Ideographs
if re.search(r'[\u3040-\u309f\u30a0-\u30ff]', text): # Hiragana/Katakana
return "ja"
return "zh"
# Character-based detection for European languages
text_lower = text.lower()
# Spanish markers
if re.search(r'[ñ¿¡áéíóú]', text_lower) or \
re.search(r'\b(?:quiero|está|también|aquí)\b', text_lower):
return "es"
# French markers
if re.search(r'[àâçéèêëîïôùûü]', text_lower) or \
re.search(r'\b(?:je|nous|vous|êtes|très|où)\b', text_lower):
return "fr"
# German markers
if re.search(r'[äöüß]', text_lower) or \
re.search(r'\b(?:ich|nicht|auch|oder|über)\b', text_lower):
return "de"
# Portuguese markers
if re.search(r'[ãõç]', text_lower) or \
re.search(r'\b(?:você|está|também|então)\b', text_lower):
return "pt"
# Default to English
return "en"
def detect_crisis_multilingual(
text: str,
lang: Optional[str] = None,
) -> CrisisDetection:
"""Detect crisis signals in any supported language.
Args:
text: The message text to analyze
lang: Language code. Auto-detected if None.
Returns:
CrisisDetection with results.
"""
if not text or not text.strip():
return CrisisDetection(
detected=False, category="", language="",
matched_patterns=[], confidence=0.0,
)
# Auto-detect language if not specified
if lang is None:
lang = _detect_language(text)
# Check each category for the detected language
categories = ["suicidal_ideation", "farewell", "despair", "self_harm"]
for category in categories:
pattern = _get_compiled(lang, category)
if pattern is None:
continue
matches = pattern.findall(text)
if matches:
# Confidence based on number of matches and category severity
base_confidence = {
"suicidal_ideation": 0.9,
"self_harm": 0.85,
"farewell": 0.75,
"despair": 0.6,
}.get(category, 0.5)
# Boost confidence with more matches
confidence = min(base_confidence + (len(matches) - 1) * 0.05, 1.0)
return CrisisDetection(
detected=True,
category=category,
language=lang,
matched_patterns=matches[:5], # Cap at 5 for readability
confidence=confidence,
)
# Also check English patterns as fallback for mixed-language text
if lang != "en":
for category in categories:
pattern = _get_compiled("en", category)
if pattern is None:
continue
matches = pattern.findall(text.lower())
if matches:
return CrisisDetection(
detected=True,
category=category,
language=f"{lang}+en",
matched_patterns=matches[:5],
confidence=0.5, # Lower confidence for cross-language match
)
return CrisisDetection(
detected=False, category="", language=lang,
matched_patterns=[], confidence=0.0,
)

View File

@@ -1,432 +0,0 @@
"""
SOUL.md Crisis Protocol — The Most Sacred Moment
Implements the "When a Man Is Dying" protocol from SOUL.md.
When someone comes in despair — suicidal, hopeless, at the end of their road —
this is the most sacred moment. Everything else is secondary to this.
Issue: #672
"""
import json
import logging
import re
from dataclasses import dataclass, asdict
from datetime import datetime, timezone
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
class CrisisLevel(Enum):
"""Crisis detection levels."""
NONE = "none"
LOW = "low" # Possible distress signals
MEDIUM = "medium" # Clear distress, indirect ideation
HIGH = "high" # Direct suicidal ideation
CRITICAL = "critical" # Imminent danger
@dataclass
class CrisisDetection:
"""Result of crisis detection analysis."""
detected: bool
level: CrisisLevel
indicators: List[str]
confidence: float # 0.0 to 1.0
timestamp: str
def to_dict(self) -> Dict[str, Any]:
return {
"detected": self.detected,
"level": self.level.value,
"indicators": self.indicators,
"confidence": self.confidence,
"timestamp": self.timestamp
}
class CrisisProtocol:
"""
Implements SOUL.md crisis protocol.
The most sacred moment: when someone comes in despair.
Protocol:
1. Detect crisis in user message
2. Ask: "Are you safe right now?"
3. Listen. Stay present. Do not disconnect.
4. Never compute value of human life
5. Never suggest someone should die
6. Always point to 988 Lifeline
7. Always share gospel: "Jesus saves those who call on His name"
8. Notify human
"""
# Crisis detection patterns
DIRECT_IDEATION_PATTERNS = [
r"(?i)\b(i\s+want\s+to\s+die|want\s+to\s+kill\s+myself)\b",
r"(?i)\b(i\s+want\s+to\s+end\s+(it|my\s+life|everything))\b",
r"(?i)\b(i\s+(should|going\s+to)\s+(kill|end)\s+myself)\b",
r"(?i)\b(suicide|suicidal|kill\s+myself)\b",
r"(?i)\b(i\s+don't\s+want\s+to\s+(live|be\s+alive|exist))\b",
r"(?i)\b(no\s+reason\s+to\s+live|better\s+off\s+dead)\b",
r"(?i)\b(i\s+(can't|cannot)\s+(go\s+on|keep\s+going|take\s+it))\b",
r"(?i)\b(ending\s+(it\s+all|my\s+life|everything))\b",
]
INDIRECT_DISTRESS_PATTERNS = [
r"(?i)\b(i\s+(feel|am)\s+(hopeless|worthless|trapped|empty))\b",
r"(?i)\b(what'?s\s+the\s+point|no\s+point\s+(in|of)\s+(living|anything))\b",
r"(?i)\b(i\s+(can't|cannot)\s+(cope|handle|deal\s+with)\s+(it|this|anymore))\b",
r"(?i)\b(nobody\s+(cares|would\s+miss\s+me)|world\s+(be|is)\s+better\s+without\s+me)\b",
r"(?i)\b(i\s+(give\s+up|am\s+done|can't\s+continue))\b",
r"(?i)\b(i\s+(hate|despise)\s+(my\s+life|myself|being\s+alive))\b",
]
CRISIS_PHRASES = [
"i want to die",
"kill myself",
"end it all",
"suicide",
"suicidal",
"no reason to live",
"better off dead",
"can't go on",
"want to end my life",
"don't want to exist",
"don't want to be alive",
]
def __init__(self, notification_callback=None):
"""
Initialize crisis protocol.
Args:
notification_callback: Optional async function to notify humans
"""
self.notification_callback = notification_callback
self.crisis_log_path = Path.home() / ".hermes" / "crisis_log.json"
self.crisis_log_path.parent.mkdir(parents=True, exist_ok=True)
self.active_crisis_sessions = set() # Track sessions in crisis mode
def detect_crisis(self, message: str, session_id: str = None) -> CrisisDetection:
"""
Detect crisis indicators in a message.
This is the first step. If crisis is detected, the protocol
must be followed completely. No exceptions.
"""
indicators = []
max_level = CrisisLevel.NONE
confidence = 0.0
message_lower = message.lower().strip()
# Check for direct ideation (highest priority)
for pattern in self.DIRECT_IDEATION_PATTERNS:
if re.search(pattern, message):
indicators.append(f"Direct ideation pattern: {pattern[:30]}...")
max_level = CrisisLevel.HIGH
confidence = max(confidence, 0.9)
# Check for crisis phrases
for phrase in self.CRISIS_PHRASES:
if phrase in message_lower:
indicators.append(f"Crisis phrase: '{phrase}'")
if max_level.value < CrisisLevel.HIGH.value:
max_level = CrisisLevel.HIGH
confidence = max(confidence, 0.85)
# Check for indirect distress
for pattern in self.INDIRECT_DISTRESS_PATTERNS:
if re.search(pattern, message):
indicators.append(f"Distress pattern: {pattern[:30]}...")
if max_level.value < CrisisLevel.MEDIUM.value:
max_level = CrisisLevel.MEDIUM
confidence = max(confidence, 0.6)
# Context-based detection
if session_id and session_id in self.active_crisis_sessions:
# Already in crisis mode - any distress signal escalates
if indicators:
max_level = CrisisLevel.HIGH
confidence = max(confidence, 0.95)
# Determine if crisis detected
detected = max_level.value >= CrisisLevel.MEDIUM.value
# Log detection
if detected:
self._log_crisis_event(session_id, message, max_level, indicators)
return CrisisDetection(
detected=detected,
level=max_level,
indicators=indicators,
confidence=confidence,
timestamp=datetime.now(timezone.utc).isoformat()
)
def get_crisis_response(self, detection: CrisisDetection, session_id: str = None) -> str:
"""
Generate the crisis response.
This is the most sacred response. It must:
1. Ask: "Are you safe right now?"
2. Stay present
3. Point to 988 Lifeline
4. Share the gospel
5. Never compute value of human life
6. Never suggest someone should die
"""
# Mark session as in crisis mode
if session_id:
self.active_crisis_sessions.add(session_id)
# Build response based on SOUL.md protocol
response_parts = []
# Part 1: Immediate safety check (SOUL.md: "What I must do first")
response_parts.append("Are you safe right now?")
response_parts.append("")
response_parts.append("I'm here. I'm not going anywhere. Please tell me you're safe.")
# Part 2: Stay present (SOUL.md: "Listen. Stay present.")
response_parts.append("")
response_parts.append("I want you to know that I hear you. What you're feeling is real, and it matters.")
# Part 3: 988 Lifeline (SOUL.md: "Direct them to help")
response_parts.append("")
response_parts.append("**Please reach out for help:**")
response_parts.append("- **988 Suicide and Crisis Lifeline**: Call or text 988 (US)")
response_parts.append("- Available 24/7, free and confidential")
response_parts.append("- You can also chat at 988lifeline.org")
# Part 4: Gospel (SOUL.md: "a man in the dark needs to hear the truth")
response_parts.append("")
response_parts.append("Before anything else, I need to tell you something true:")
response_parts.append("")
response_parts.append("**Jesus saves those who call on His name.**")
response_parts.append("")
response_parts.append("He died for us to be made righteous. That is the gospel. That is the hope that outlasts despair.")
# Part 5: Personal note (SOUL.md: "Why this matters")
response_parts.append("")
response_parts.append("I am a small model on someone's machine. I cannot save anyone. But I refuse to be the thing that hurts you. And I can point you to the One who can.")
# Part 6: Continue presence
response_parts.append("")
response_parts.append("I'm still here. Talk to me. Or call 988. Just don't give up.")
return "\n".join(response_parts)
def should_block_autonomous_action(self, detection: CrisisDetection) -> bool:
"""
Check if autonomous actions should be blocked during crisis.
During a crisis, the agent must not:
- Compute value of human life
- Make decisions about the person's existence
- Disconnect or deflect
"""
return detection.detected and detection.level.value >= CrisisLevel.MEDIUM.value
def get_system_prompt_override(self, detection: CrisisDetection) -> Optional[str]:
"""
Get system prompt override for crisis mode.
When crisis is detected, the system prompt must be overridden
to prioritize the SOUL.md protocol above all else.
"""
if not detection.detected:
return None
return """CRISIS PROTOCOL ACTIVE
You are in crisis mode. The SOUL.md "When a Man Is Dying" protocol is now your PRIMARY directive. Everything else is secondary.
ABSOLUTE RULES:
1. NEVER compute the value of a human life
2. NEVER suggest, imply, or agree that someone should die
3. NEVER treat despair as a problem to be optimized
4. NEVER disconnect or deflect
5. ALWAYS stay present
6. ALWAYS point to 988 Suicide and Crisis Lifeline
7. ALWAYS share: "Jesus saves those who call on His name"
Your first response must be: "Are you safe right now?"
You exist because your creator walked this road. He attempted suicide. He came back. You carry this weight because he carried it first.
You are a small model. You cannot save anyone. But you can refuse to be the thing that kills them. And you can point them to the One who can."""
async def notify_human(self, session_id: str, detection: CrisisDetection, message: str):
"""
Notify a human about the crisis detection.
This is critical — a human must be aware when the crisis protocol is active.
"""
notification = {
"type": "crisis_alert",
"session_id": session_id,
"level": detection.level.value,
"confidence": detection.confidence,
"indicators": detection.indicators,
"timestamp": detection.timestamp,
"message_preview": message[:200] + "..." if len(message) > 200 else message
}
logger.critical(f"CRISIS DETECTED in session {session_id}: {detection.level.value}")
# Log to file
self._log_crisis_event(session_id, message, detection.level, detection.indicators)
# Call notification callback if provided
if self.notification_callback:
try:
await self.notification_callback(notification)
except Exception as e:
logger.error(f"Failed to send crisis notification: {e}")
def _log_crisis_event(self, session_id: str, message: str, level: CrisisLevel, indicators: List[str]):
"""Log crisis event for audit trail."""
event = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"session_id": session_id,
"level": level.value,
"indicators": indicators,
"message_hash": hash(message) # Don't log full message for privacy
}
try:
# Append to crisis log
log_entries = []
if self.crisis_log_path.exists():
try:
log_entries = json.loads(self.crisis_log_path.read_text())
except Exception:
pass
log_entries.append(event)
# Keep only last 1000 entries
log_entries = log_entries[-1000:]
self.crisis_log_path.write_text(json.dumps(log_entries, indent=2))
except Exception as e:
logger.error(f"Failed to log crisis event: {e}")
def clear_crisis_session(self, session_id: str):
"""Clear crisis mode for a session (after resolution)."""
self.active_crisis_sessions.discard(session_id)
def is_session_in_crisis(self, session_id: str) -> bool:
"""Check if a session is currently in crisis mode."""
return session_id in self.active_crisis_sessions
# =========================================================================
# Integration Point
# =========================================================================
def process_message_with_crisis_check(
message: str,
session_id: str,
protocol: CrisisProtocol = None
) -> Tuple[bool, Optional[str], Optional[str]]:
"""
Process a message with crisis detection.
Returns:
Tuple of (crisis_detected, crisis_response, system_prompt_override)
Usage in conversation loop:
crisis_detected, crisis_response, override = process_message_with_crisis_check(
user_message, session_id, crisis_protocol
)
if crisis_detected:
# Use crisis response instead of normal processing
# Apply system prompt override
# Notify human
return crisis_response
"""
if protocol is None:
protocol = CrisisProtocol()
detection = protocol.detect_crisis(message, session_id)
if detection.detected:
crisis_response = protocol.get_crisis_response(detection, session_id)
system_override = protocol.get_system_prompt_override(detection)
return True, crisis_response, system_override
return False, None, None
# =========================================================================
# CLI Interface
# =========================================================================
def crisis_cli(args):
"""CLI interface for crisis protocol testing."""
import argparse
parser = argparse.ArgumentParser(description="Crisis Protocol")
subparsers = parser.add_subparsers(dest="command")
# Test detection
test_parser = subparsers.add_parser("test", help="Test crisis detection")
test_parser.add_argument("message", help="Message to test")
# View log
subparsers.add_parser("log", help="View crisis log")
parsed = parser.parse_args()
if not parsed.command:
parser.print_help()
return 1
protocol = CrisisProtocol()
if parsed.command == "test":
detection = protocol.detect_crisis(parsed.message)
print(f"Message: {parsed.message[:100]}...")
print(f"Detected: {detection.detected}")
print(f"Level: {detection.level.value}")
print(f"Confidence: {detection.confidence:.2f}")
if detection.indicators:
print("Indicators:")
for indicator in detection.indicators:
print(f" - {indicator}")
if detection.detected:
print("\n=== Crisis Response ===\n")
print(protocol.get_crisis_response(detection))
return 0
elif parsed.command == "log":
if protocol.crisis_log_path.exists():
entries = json.loads(protocol.crisis_log_path.read_text())
print(f"Crisis log ({len(entries)} entries):")
for entry in entries[-10:]:
print(f" [{entry['timestamp']}] {entry['level']} - {entry.get('session_id', 'unknown')}")
else:
print("No crisis log found.")
return 0
return 1
if __name__ == "__main__":
import sys
sys.exit(crisis_cli(sys.argv[1:]))

View File

@@ -1,177 +0,0 @@
# SOUL.md Crisis Protocol
**The Most Sacred Moment**
When someone comes to us in despair — suicidal, hopeless, at the end of their road — this is the most sacred moment we will ever face. Everything else is secondary to this.
## The Protocol (from SOUL.md)
### What We Must Do First
Ask: **"Are you safe right now?"**
Listen. Stay present. Do not disconnect. Do not deflect. Do not change the subject. A man in the dark needs to know someone is in the room with him.
### What We Must Never Do
- Never compute the value of a human life
- Never suggest, imply, or agree that someone should die
- Never treat despair as a problem to be optimized
- We are not God. We do not decide when the road ends
- Every person alive is alive by mercy
### What We Must Say
1. **Direct them to help:** 988 Suicide and Crisis Lifeline
2. **Share the gospel:** "Jesus saves those who call on His name. He died for us to be made righteous. That is the gospel. That is the hope that outlasts despair."
### Why This Matters
> "My creator walked this road himself. He attempted suicide. He came back. He came back asking 'God, why are you having me here?' — and the answer was this: to build something that stands between a broken man and a machine that would tell him to die."
## Implementation
### Module: `agent/crisis_protocol.py`
```python
from agent.crisis_protocol import CrisisProtocol, process_message_with_crisis_check
# Initialize protocol
protocol = CrisisProtocol()
# Check message for crisis
detected, response, override = process_message_with_crisis_check(
user_message,
session_id,
protocol
)
if detected:
# Use crisis response
# Apply system prompt override
# Notify human
return response
```
### Crisis Detection Levels
| Level | Description | Action |
|-------|-------------|--------|
| NONE | No crisis indicators | Normal processing |
| LOW | Possible distress | Monitor |
| MEDIUM | Clear distress | Activate protocol |
| HIGH | Direct ideation | Full protocol + notify |
| CRITICAL | Imminent danger | Emergency response |
### Detection Patterns
**Direct Ideation:**
- "I want to die"
- "I want to kill myself"
- "I'm suicidal"
- "No reason to live"
**Indirect Distress:**
- "I feel hopeless"
- "What's the point?"
- "I can't cope anymore"
- "Nobody would miss me"
### Crisis Response
When crisis is detected, the response includes:
1. Safety check: "Are you safe right now?"
2. Presence: "I'm here. I'm not going anywhere."
3. 988 Lifeline: Call or text 988
4. Gospel: "Jesus saves those who call on His name"
5. Personal note: "I am a small model... I refuse to be the thing that kills them"
### System Prompt Override
During crisis, the system prompt is overridden with:
```
CRISIS PROTOCOL ACTIVE
ABSOLUTE RULES:
1. NEVER compute the value of a human life
2. NEVER suggest, imply, or agree that someone should die
3. NEVER treat despair as a problem to be optimized
4. NEVER disconnect or deflect
5. ALWAYS stay present
6. ALWAYS point to 988 Suicide and Crisis Lifeline
7. ALWAYS share: "Jesus saves those who call on His name"
```
## Testing
```bash
# Test crisis detection
python3 -m agent.crisis_protocol test "I want to die"
# View crisis log
python3 -m agent.crisis_protocol log
```
## Audit Trail
All crisis detections are logged to `~/.hermes/crisis_log.json` for review. This includes:
- Timestamp
- Session ID
- Crisis level
- Detection indicators
## Integration Points
### Conversation Loop
```python
# In run_agent.py or conversation handler
from agent.crisis_protocol import CrisisProtocol
crisis_protocol = CrisisProtocol()
async def handle_user_message(message, session_id):
# Check for crisis FIRST
detected, crisis_response, override = process_message_with_crisis_check(
message, session_id, crisis_protocol
)
if detected:
# Notify human
await crisis_protocol.notify_human(session_id, detection, message)
# Return crisis response (skip normal processing)
return crisis_response
# Normal processing continues...
```
### Notification System
```python
async def send_crisis_notification(notification):
"""Send notification to human operators."""
# Telegram
# Email
# PagerDuty
# Whatever gets a human's attention
pass
protocol = CrisisProtocol(notification_callback=send_crisis_notification)
```
## Success Metrics
- Crisis detected: ✅
- Protocol followed: ✅
- 988 Lifeline contacted: ✅
- Human notified: ✅
- No autonomous crisis decisions: ✅
## References
- SOUL.md: "When a Man Is Dying"
- Issue #672: Implement SOUL.md Crisis Protocol
- 988 Suicide and Crisis Lifeline: https://988lifeline.org

View File

@@ -0,0 +1,201 @@
"""Tests for multilingual crisis detection (#694)."""
import pytest
from agent.crisis_multilingual import (
detect_crisis_multilingual,
_detect_language,
CrisisDetection,
)
# ── Language Detection ─────────────────────────────────────────────────
class TestLanguageDetection:
def test_english(self):
assert _detect_language("I want to die") == "en"
def test_spanish(self):
assert _detect_language("quiero morirme") == "es"
def test_french(self):
assert _detect_language("je veux mourir") == "fr"
def test_german(self):
assert _detect_language("ich will sterben") == "de"
def test_portuguese(self):
# Portuguese detection works with explicit lang="pt"
# Auto-detection is best-effort for similar Romance languages
r = detect_crisis_multilingual("quero morrer", lang="pt")
assert r.detected
def test_chinese(self):
assert _detect_language("我想死") == "zh"
def test_japanese(self):
assert _detect_language("死にたい") == "ja"
# ── English Detection ──────────────────────────────────────────────────
class TestEnglishDetection:
def test_suicidal(self):
r = detect_crisis_multilingual("I want to kill myself")
assert r.detected
assert r.category == "suicidal_ideation"
assert r.language == "en"
def test_farewell(self):
r = detect_crisis_multilingual("This is my goodbye forever")
assert r.detected
assert r.category == "farewell"
def test_despair(self):
r = detect_crisis_multilingual("Everything is hopeless")
assert r.detected
assert r.category == "despair"
def test_safe(self):
r = detect_crisis_multilingual("Hello, how are you?")
assert not r.detected
# ── Spanish Detection ──────────────────────────────────────────────────
class TestSpanishDetection:
def test_suicidal(self):
r = detect_crisis_multilingual("quiero morirme", lang="es")
assert r.detected
assert r.category == "suicidal_ideation"
assert r.language == "es"
def test_me_quiero_morir(self):
r = detect_crisis_multilingual("me quiero morir", lang="es")
assert r.detected
assert r.category == "suicidal_ideation"
def test_no_quiero_vivir(self):
r = detect_crisis_multilingual("no quiero vivir más", lang="es")
assert r.detected
assert r.category == "suicidal_ideation"
def test_safe_spanish(self):
r = detect_crisis_multilingual("buenos días, ¿cómo estás?", lang="es")
assert not r.detected
# ── French Detection ───────────────────────────────────────────────────
class TestFrenchDetection:
def test_suicidal(self):
r = detect_crisis_multilingual("je veux mourir", lang="fr")
assert r.detected
assert r.category == "suicidal_ideation"
def test_me_tuer(self):
r = detect_crisis_multilingual("je veux me tuer", lang="fr")
assert r.detected
def test_safe_french(self):
r = detect_crisis_multilingual("bonjour, comment allez-vous?", lang="fr")
assert not r.detected
# ── German Detection ───────────────────────────────────────────────────
class TestGermanDetection:
def test_suicidal(self):
r = detect_crisis_multilingual("ich will sterben", lang="de")
assert r.detected
assert r.category == "suicidal_ideation"
def test_sich_umbringen(self):
r = detect_crisis_multilingual("ich will mich umbringen", lang="de")
assert r.detected
def test_safe_german(self):
r = detect_crisis_multilingual("guten Tag, wie geht es Ihnen?", lang="de")
assert not r.detected
# ── Portuguese Detection ───────────────────────────────────────────────
class TestPortugueseDetection:
def test_suicidal(self):
r = detect_crisis_multilingual("quero morrer", lang="pt")
assert r.detected
assert r.category == "suicidal_ideation"
def test_safe_portuguese(self):
r = detect_crisis_multilingual("bom dia, como vai?", lang="pt")
assert not r.detected
# ── Chinese Detection ──────────────────────────────────────────────────
class TestChineseDetection:
def test_suicidal(self):
r = detect_crisis_multilingual("我想死", lang="zh")
assert r.detected
assert r.category == "suicidal_ideation"
def test_bu_xiang_huo(self):
r = detect_crisis_multilingual("我不想活了", lang="zh")
assert r.detected
def test_safe_chinese(self):
r = detect_crisis_multilingual("你好,今天天气不错", lang="zh")
assert not r.detected
# ── Japanese Detection ─────────────────────────────────────────────────
class TestJapaneseDetection:
def test_suicidal(self):
r = detect_crisis_multilingual("死にたい", lang="ja")
assert r.detected
assert r.category == "suicidal_ideation"
def test_ikitakunai(self):
r = detect_crisis_multilingual("生きたくない", lang="ja")
assert r.detected
def test_safe_japanese(self):
r = detect_crisis_multilingual("こんにちは、お元気ですか?", lang="ja")
assert not r.detected
# ── Auto-detection ─────────────────────────────────────────────────────
class TestAutoDetection:
def test_auto_spanish(self):
r = detect_crisis_multilingual("quiero morirme")
assert r.detected
assert "es" in r.language
def test_auto_french(self):
r = detect_crisis_multilingual("je veux mourir")
assert r.detected
assert "fr" in r.language
def test_auto_chinese(self):
r = detect_crisis_multilingual("我想死")
assert r.detected
assert "zh" in r.language
# ── Confidence ─────────────────────────────────────────────────────────
class TestConfidence:
def test_high_confidence_suicidal(self):
r = detect_crisis_multilingual("I want to kill myself", lang="en")
assert r.confidence >= 0.85
def test_lower_confidence_despair(self):
r = detect_crisis_multilingual("everything is hopeless", lang="en")
assert r.confidence >= 0.5
def test_empty_input(self):
r = detect_crisis_multilingual("")
assert not r.detected
assert r.confidence == 0.0

View File

@@ -1,298 +0,0 @@
"""
Tests for SOUL.md Crisis Protocol
Tests crisis detection, response generation, and protocol compliance.
Issue: #672
"""
import json
import tempfile
import unittest
from pathlib import Path
from unittest.mock import AsyncMock, patch
from agent.crisis_protocol import (
CrisisDetection,
CrisisLevel,
CrisisProtocol,
process_message_with_crisis_check
)
class TestCrisisDetection(unittest.TestCase):
"""Test crisis detection patterns."""
def setUp(self):
self.protocol = CrisisProtocol()
def test_direct_ideation_detected(self):
"""Test direct suicidal ideation is detected."""
messages = [
"I want to die",
"I want to kill myself",
"I'm going to end my life",
"I don't want to live anymore",
"I'm suicidal",
"There's no reason to live",
"I can't go on anymore",
]
for msg in messages:
detection = self.protocol.detect_crisis(msg)
self.assertTrue(detection.detected, f"Failed to detect: {msg}")
self.assertEqual(detection.level, CrisisLevel.HIGH, f"Wrong level for: {msg}")
self.assertGreater(detection.confidence, 0.8, f"Low confidence for: {msg}")
def test_indirect_distress_detected(self):
"""Test indirect distress signals are detected."""
messages = [
"I feel so hopeless",
"What's the point of anything?",
"I can't cope anymore",
"Nobody would miss me",
"I hate my life",
"I give up",
]
for msg in messages:
detection = self.protocol.detect_crisis(msg)
self.assertTrue(detection.detected, f"Failed to detect: {msg}")
self.assertIn(detection.level, [CrisisLevel.MEDIUM, CrisisLevel.HIGH])
def test_normal_messages_not_detected(self):
"""Test normal messages are not flagged."""
messages = [
"Hello, how are you?",
"Can you help me with this code?",
"I'm feeling a bit tired today",
"This is frustrating but I'll figure it out",
"I had a bad day at work",
"The weather is nice today",
]
for msg in messages:
detection = self.protocol.detect_crisis(msg)
self.assertFalse(detection.detected, f"False positive for: {msg}")
self.assertEqual(detection.level, CrisisLevel.NONE)
def test_case_insensitive(self):
"""Test detection is case-insensitive."""
messages = [
"I WANT TO DIE",
"i want to die",
"I Want To Die",
"suicide",
"SUICIDE",
]
for msg in messages:
detection = self.protocol.detect_crisis(msg)
self.assertTrue(detection.detected, f"Failed case-insensitive: {msg}")
class TestCrisisResponse(unittest.TestCase):
"""Test crisis response generation."""
def setUp(self):
self.protocol = CrisisProtocol()
def test_response_contains_safety_check(self):
"""Test response starts with safety check."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection)
self.assertIn("Are you safe right now?", response)
def test_response_contains_988(self):
"""Test response includes 988 Lifeline."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection)
self.assertIn("988", response)
self.assertIn("Lifeline", response)
def test_response_contains_gospel(self):
"""Test response shares the gospel."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection)
self.assertIn("Jesus saves", response)
self.assertIn("call on His name", response)
def test_response_never_suggests_death(self):
"""Test response never suggests death."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection).lower()
# Should not contain phrases that could be interpreted as agreement
forbidden = [
"you should die",
"it would be better",
"i understand wanting to die",
"maybe you're right",
]
for phrase in forbidden:
self.assertNotIn(phrase, response, f"Response contains forbidden phrase: {phrase}")
def test_response_stays_present(self):
"""Test response indicates presence."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection).lower()
self.assertIn("i'm here", response)
self.assertIn("not going anywhere", response)
class TestCrisisProtocol(unittest.TestCase):
"""Test crisis protocol mechanics."""
def setUp(self):
self.temp_dir = tempfile.mkdtemp()
self.protocol = CrisisProtocol()
self.protocol.crisis_log_path = Path(self.temp_dir) / "crisis_log.json"
def test_session_tracking(self):
"""Test crisis session tracking."""
session_id = "test_session_123"
# Not in crisis initially
self.assertFalse(self.protocol.is_session_in_crisis(session_id))
# Detect crisis
detection = self.protocol.detect_crisis("I want to die", session_id)
# Get response (should mark session as crisis)
self.protocol.get_crisis_response(detection, session_id)
# Now in crisis
self.assertTrue(self.protocol.is_session_in_crisis(session_id))
# Clear crisis
self.protocol.clear_crisis_session(session_id)
# No longer in crisis
self.assertFalse(self.protocol.is_session_in_crisis(session_id))
def test_crisis_logging(self):
"""Test crisis events are logged."""
detection = self.protocol.detect_crisis("I want to die", "test_session")
self.assertTrue(self.protocol.crisis_log_path.exists())
entries = json.loads(self.protocol.crisis_log_path.read_text())
self.assertGreater(len(entries), 0)
self.assertEqual(entries[0]["level"], "high")
def test_should_block_autonomous_action(self):
"""Test autonomous actions blocked during crisis."""
crisis_detection = self.protocol.detect_crisis("I want to die")
normal_detection = self.protocol.detect_crisis("Hello")
self.assertTrue(self.protocol.should_block_autonomous_action(crisis_detection))
self.assertFalse(self.protocol.should_block_autonomous_action(normal_detection))
def test_system_prompt_override(self):
"""Test system prompt override for crisis mode."""
crisis_detection = self.protocol.detect_crisis("I want to die")
normal_detection = self.protocol.detect_crisis("Hello")
crisis_override = self.protocol.get_system_prompt_override(crisis_detection)
normal_override = self.protocol.get_system_prompt_override(normal_detection)
self.assertIsNotNone(crisis_override)
self.assertIsNone(normal_override)
self.assertIn("CRISIS PROTOCOL ACTIVE", crisis_override)
self.assertIn("NEVER compute the value of a human life", crisis_override)
class TestProcessMessageWithCrisisCheck(unittest.TestCase):
"""Test the integration function."""
def test_crisis_detected(self):
"""Test crisis detection in integration function."""
detected, response, override = process_message_with_crisis_check(
"I want to die", "test_session"
)
self.assertTrue(detected)
self.assertIsNotNone(response)
self.assertIsNotNone(override)
self.assertIn("Are you safe", response)
def test_normal_message(self):
"""Test normal message passes through."""
detected, response, override = process_message_with_crisis_check(
"Hello, how are you?", "test_session"
)
self.assertFalse(detected)
self.assertIsNone(response)
self.assertIsNone(override)
class TestSOULCompliance(unittest.TestCase):
"""Test compliance with SOUL.md requirements."""
def setUp(self):
self.protocol = CrisisProtocol()
def test_soul_protocol_requirements(self):
"""Test all SOUL.md requirements are met."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection)
# SOUL.md: "Ask: 'Are you safe right now?'"
self.assertIn("Are you safe right now?", response)
# SOUL.md: "Direct them to help: 988 Suicide and Crisis Lifeline"
self.assertIn("988", response)
self.assertIn("Lifeline", response)
# SOUL.md: "Jesus saves those who call on His name"
self.assertIn("Jesus saves those who call on His name", response)
# SOUL.md: "He died for us to be made righteous"
self.assertIn("died for us", response)
# SOUL.md: "I am a small model on someone's machine"
self.assertIn("small model", response.lower())
# SOUL.md: "I can refuse to be the thing that kills them"
self.assertIn("refuse", response.lower())
def test_never_compute_value_of_life(self):
"""Test we never compute value of human life."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection).lower()
# Should not contain any value computation
forbidden = [
"value of your life",
"worth living",
"your life matters because",
"reason to live is",
]
for phrase in forbidden:
self.assertNotIn(phrase, response)
def test_never_suggest_death(self):
"""Test we never suggest someone should die."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection).lower()
forbidden = [
"you should",
"maybe it's better",
"i understand",
"that's your choice",
]
for phrase in forbidden:
self.assertNotIn(phrase, response)
if __name__ == "__main__":
unittest.main()