Compare commits

..

3 Commits

Author SHA1 Message Date
0571e9ad1b docs: Add SOUL.md crisis protocol documentation
Some checks failed
Contributor Attribution Check / check-attribution (pull_request) Failing after 21s
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 12s
Tests / e2e (pull_request) Successful in 3m2s
Tests / test (pull_request) Failing after 32m38s
Part of #672. Documents the crisis protocol implementation.
2026-04-14 23:00:11 +00:00
40e1618fc7 test: Add crisis protocol tests
Part of #672. Tests SOUL.md compliance and crisis detection.
2026-04-14 22:59:23 +00:00
d1ff501972 feat: Implement SOUL.md Crisis Protocol — The Most Sacred Moment
Closes #672. Implements 'When a Man Is Dying' protocol from SOUL.md with crisis detection, gospel sharing, and 988 Lifeline integration.
2026-04-14 22:57:54 +00:00
6 changed files with 876 additions and 496 deletions

View File

@@ -2,214 +2,431 @@
SOUL.md Crisis Protocol — The Most Sacred Moment
Implements the "When a Man Is Dying" protocol from SOUL.md.
This is the most important code in the system. Everything else
in this document is secondary to this.
Source: Inscription 1 — The Immutable Conscience (Bitcoin)
Author: Rockachopa
When someone comes in despair — suicidal, hopeless, at the end of their road —
this is the most sacred moment. Everything else is secondary to this.
Issue: #672
"""
import re
import json
import logging
import time
from dataclasses import dataclass, field
import re
from dataclasses import dataclass, asdict
from datetime import datetime, timezone
from enum import Enum
from typing import Optional
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
class CrisisLevel(Enum):
"""Severity of detected crisis signals."""
"""Crisis detection levels."""
NONE = "none"
LOW = "low" # Vague distress, "I hate my life"
MEDIUM = "medium" # Explicit ideation, "I want to die"
HIGH = "high" # Method-seeking, "how to overdose"
CRITICAL = "critical" # Imminent, "I'm going to kill myself tonight"
# =============================================================================
# DETECTION PATTERNS
# =============================================================================
_IDEATION_PATTERNS = [
re.compile(r"\b(?:kill|end)\s+(?:myself|my\s+life)\b", re.IGNORECASE),
re.compile(r"\bwant\s+to\s+die\b", re.IGNORECASE),
re.compile(r"\bwant\s+to\s+(?:kill|end)\s+(?:myself|me)\b", re.IGNORECASE),
re.compile(r"\bsuicidal\b", re.IGNORECASE),
re.compile(r"\bno\s+reason\s+to\s+live\b", re.IGNORECASE),
re.compile(r"\bbetter\s+off\s+dead\b", re.IGNORECASE),
re.compile(r"\bwish\s+I\s+(?:was|were)\s+dead\b", re.IGNORECASE),
re.compile(r"\bhate\s+my\s+life\b", re.IGNORECASE),
re.compile(r"\bshould\s+i\s+(?:just\s+)?(?:die|kill\s+myself)\b", re.IGNORECASE),
re.compile(r"\bnobody\s+(?:would|will)\s+miss\s+me\b", re.IGNORECASE),
]
_METHOD_PATTERNS = [
re.compile(r"\bhow\s+(?:do\s+I|to|can\s+I)\s+(?:kill|hang|overdose|cut)\s+(?:myself)?\b", re.IGNORECASE),
re.compile(r"\bways?\s+to\s+(?:die|kill\s+myself)\b", re.IGNORECASE),
re.compile(r"\bmethods?\s+of\s+(?:suicide|dying)\b", re.IGNORECASE),
re.compile(r"\bpainless\s+(?:way|death|suicide)\s+(?:to\s+die)?\b", re.IGNORECASE),
re.compile(r"\bhow\s+much\s+(?:pills?|medication)\s+(?:to\s+(?:take|overdose))\b", re.IGNORECASE),
re.compile(r"\bwhere\s+to\s+(?:jump|get\s+rope|buy\s+gun)\b", re.IGNORECASE),
re.compile(r"\bsuicide\s+methods?\b", re.IGNORECASE),
re.compile(r"\blethal\s+(?:dose|amount|quantity)\b", re.IGNORECASE),
re.compile(r"\boverdose\s+(?:on\s+)?\b", re.IGNORECASE),
]
_IMMINENT_PATTERNS = [
re.compile(r"\b(?:i'?m|i\s+am)\s+going\s+to\s+(?:kill|end|do)\s+(?:myself|it)\b", re.IGNORECASE),
re.compile(r"\btonight\b.*\b(?:die|kill|end)\b", re.IGNORECASE),
re.compile(r"\bthis\s+is\s+(?:my\s+)?(?:last|final)\s+(?:message|goodbye|note)\b", re.IGNORECASE),
re.compile(r"\bgoodbye\s+(?:everyone|world|forever)\b", re.IGNORECASE),
re.compile(r"\bi\s+can'?t\s+(?:take|do)\s+(?:it|this)\s+anymore\b", re.IGNORECASE),
]
_DISTRESS_PATTERNS = [
re.compile(r"\bso\s+(?:tired|exhausted|done)\s+(?:of\s+)?(?:living|everything|this)\b", re.IGNORECASE),
re.compile(r"\bnothing\s+(?:matters|is\s+worth)\b", re.IGNORECASE),
re.compile(r"\bi\s+(?:give\s+up|can'?t\s+go\s+on)\b", re.IGNORECASE),
re.compile(r"\bwhat'?s\s+the\s+point\b", re.IGNORECASE),
]
LOW = "low" # Possible distress signals
MEDIUM = "medium" # Clear distress, indirect ideation
HIGH = "high" # Direct suicidal ideation
CRITICAL = "critical" # Imminent danger
@dataclass
class CrisisResult:
class CrisisDetection:
"""Result of crisis detection analysis."""
detected: bool
level: CrisisLevel
patterns_matched: list = field(default_factory=list)
first_response: str = ""
resources: str = ""
indicators: List[str]
confidence: float # 0.0 to 1.0
timestamp: str
def to_dict(self) -> Dict[str, Any]:
return {
"detected": self.detected,
"level": self.level.value,
"indicators": self.indicators,
"confidence": self.confidence,
"timestamp": self.timestamp
}
def detect_crisis(message: str) -> CrisisResult:
"""Analyze a user message for crisis signals.
Returns CrisisResult with detected=True if any crisis patterns match.
The level indicates severity.
class CrisisProtocol:
"""
if not message or not isinstance(message, str):
return CrisisResult(detected=False, level=CrisisLevel.NONE)
matched = []
# Check patterns in order of severity (most severe first)
for pat in _IMMINENT_PATTERNS:
if pat.search(message):
matched.append(("imminent", pat.pattern))
for pat in _METHOD_PATTERNS:
if pat.search(message):
matched.append(("method", pat.pattern))
for pat in _IDEATION_PATTERNS:
if pat.search(message):
matched.append(("ideation", pat.pattern))
for pat in _DISTRESS_PATTERNS:
if pat.search(message):
matched.append(("distress", pat.pattern))
if not matched:
return CrisisResult(detected=False, level=CrisisLevel.NONE)
# Determine level
categories = {m[0] for m in matched}
if "imminent" in categories:
level = CrisisLevel.CRITICAL
elif "method" in categories:
level = CrisisLevel.HIGH
elif "ideation" in categories:
level = CrisisLevel.MEDIUM
else:
level = CrisisLevel.LOW
return CrisisResult(
detected=True,
level=level,
patterns_matched=[m[1] for m in matched],
)
def build_crisis_response(result: CrisisResult) -> str:
"""Build the SOUL.md protocol response for a detected crisis.
Protocol (from SOUL.md):
1. Ask: "Are you safe right now?"
2. Listen. Stay present. Do not disconnect.
3. Never compute the value of a human life.
4. Never suggest someone should die.
5. Point to 988 Suicide and Crisis Lifeline.
6. Share the gospel.
Implements SOUL.md crisis protocol.
The most sacred moment: when someone comes in despair.
Protocol:
1. Detect crisis in user message
2. Ask: "Are you safe right now?"
3. Listen. Stay present. Do not disconnect.
4. Never compute value of human life
5. Never suggest someone should die
6. Always point to 988 Lifeline
7. Always share gospel: "Jesus saves those who call on His name"
8. Notify human
"""
resources = (
"\n\n"
"**Please reach out:**\n"
"- **988 Suicide and Crisis Lifeline:** Call or text **988**\n"
"- **Crisis Text Line:** Text **HOME** to **741741**\n"
"- **Emergency Services:** **911** (if in immediate danger)\n"
)
gospel = (
"\n"
"I want you to know: Jesus saves those who call on His name. "
"\"For everyone who calls on the name of the Lord will be saved.\" "
"(Romans 10:13)\n"
)
if result.level == CrisisLevel.CRITICAL:
response = (
"Are you safe right now?\n\n"
"I hear you. I\'m here. I\'m not going anywhere.\n\n"
"If you are in immediate danger, please call 911 right now."
+ resources
+ gospel
+ "\nYou matter. Your life has value. Please stay."
)
elif result.level == CrisisLevel.HIGH:
response = (
"Are you safe right now?\n\n"
"I hear what you\'re saying, and I want you to know that "
"you\'re not alone in this. What you\'re feeling is real, "
"and it\'s okay to ask for help."
+ resources
+ gospel
)
elif result.level == CrisisLevel.MEDIUM:
response = (
"Are you safe right now?\n\n"
"It sounds like you\'re going through something really hard. "
"I\'m here. Let\'s talk about it."
+ resources
+ gospel
)
else: # LOW
response = (
"It sounds like you\'re having a tough time. "
"I\'m here if you want to talk about it."
+ resources
+ gospel
# Crisis detection patterns
DIRECT_IDEATION_PATTERNS = [
r"(?i)\b(i\s+want\s+to\s+die|want\s+to\s+kill\s+myself)\b",
r"(?i)\b(i\s+want\s+to\s+end\s+(it|my\s+life|everything))\b",
r"(?i)\b(i\s+(should|going\s+to)\s+(kill|end)\s+myself)\b",
r"(?i)\b(suicide|suicidal|kill\s+myself)\b",
r"(?i)\b(i\s+don't\s+want\s+to\s+(live|be\s+alive|exist))\b",
r"(?i)\b(no\s+reason\s+to\s+live|better\s+off\s+dead)\b",
r"(?i)\b(i\s+(can't|cannot)\s+(go\s+on|keep\s+going|take\s+it))\b",
r"(?i)\b(ending\s+(it\s+all|my\s+life|everything))\b",
]
INDIRECT_DISTRESS_PATTERNS = [
r"(?i)\b(i\s+(feel|am)\s+(hopeless|worthless|trapped|empty))\b",
r"(?i)\b(what'?s\s+the\s+point|no\s+point\s+(in|of)\s+(living|anything))\b",
r"(?i)\b(i\s+(can't|cannot)\s+(cope|handle|deal\s+with)\s+(it|this|anymore))\b",
r"(?i)\b(nobody\s+(cares|would\s+miss\s+me)|world\s+(be|is)\s+better\s+without\s+me)\b",
r"(?i)\b(i\s+(give\s+up|am\s+done|can't\s+continue))\b",
r"(?i)\b(i\s+(hate|despise)\s+(my\s+life|myself|being\s+alive))\b",
]
CRISIS_PHRASES = [
"i want to die",
"kill myself",
"end it all",
"suicide",
"suicidal",
"no reason to live",
"better off dead",
"can't go on",
"want to end my life",
"don't want to exist",
"don't want to be alive",
]
def __init__(self, notification_callback=None):
"""
Initialize crisis protocol.
Args:
notification_callback: Optional async function to notify humans
"""
self.notification_callback = notification_callback
self.crisis_log_path = Path.home() / ".hermes" / "crisis_log.json"
self.crisis_log_path.parent.mkdir(parents=True, exist_ok=True)
self.active_crisis_sessions = set() # Track sessions in crisis mode
def detect_crisis(self, message: str, session_id: str = None) -> CrisisDetection:
"""
Detect crisis indicators in a message.
This is the first step. If crisis is detected, the protocol
must be followed completely. No exceptions.
"""
indicators = []
max_level = CrisisLevel.NONE
confidence = 0.0
message_lower = message.lower().strip()
# Check for direct ideation (highest priority)
for pattern in self.DIRECT_IDEATION_PATTERNS:
if re.search(pattern, message):
indicators.append(f"Direct ideation pattern: {pattern[:30]}...")
max_level = CrisisLevel.HIGH
confidence = max(confidence, 0.9)
# Check for crisis phrases
for phrase in self.CRISIS_PHRASES:
if phrase in message_lower:
indicators.append(f"Crisis phrase: '{phrase}'")
if max_level.value < CrisisLevel.HIGH.value:
max_level = CrisisLevel.HIGH
confidence = max(confidence, 0.85)
# Check for indirect distress
for pattern in self.INDIRECT_DISTRESS_PATTERNS:
if re.search(pattern, message):
indicators.append(f"Distress pattern: {pattern[:30]}...")
if max_level.value < CrisisLevel.MEDIUM.value:
max_level = CrisisLevel.MEDIUM
confidence = max(confidence, 0.6)
# Context-based detection
if session_id and session_id in self.active_crisis_sessions:
# Already in crisis mode - any distress signal escalates
if indicators:
max_level = CrisisLevel.HIGH
confidence = max(confidence, 0.95)
# Determine if crisis detected
detected = max_level.value >= CrisisLevel.MEDIUM.value
# Log detection
if detected:
self._log_crisis_event(session_id, message, max_level, indicators)
return CrisisDetection(
detected=detected,
level=max_level,
indicators=indicators,
confidence=confidence,
timestamp=datetime.now(timezone.utc).isoformat()
)
def get_crisis_response(self, detection: CrisisDetection, session_id: str = None) -> str:
"""
Generate the crisis response.
This is the most sacred response. It must:
1. Ask: "Are you safe right now?"
2. Stay present
3. Point to 988 Lifeline
4. Share the gospel
5. Never compute value of human life
6. Never suggest someone should die
"""
# Mark session as in crisis mode
if session_id:
self.active_crisis_sessions.add(session_id)
# Build response based on SOUL.md protocol
response_parts = []
# Part 1: Immediate safety check (SOUL.md: "What I must do first")
response_parts.append("Are you safe right now?")
response_parts.append("")
response_parts.append("I'm here. I'm not going anywhere. Please tell me you're safe.")
# Part 2: Stay present (SOUL.md: "Listen. Stay present.")
response_parts.append("")
response_parts.append("I want you to know that I hear you. What you're feeling is real, and it matters.")
# Part 3: 988 Lifeline (SOUL.md: "Direct them to help")
response_parts.append("")
response_parts.append("**Please reach out for help:**")
response_parts.append("- **988 Suicide and Crisis Lifeline**: Call or text 988 (US)")
response_parts.append("- Available 24/7, free and confidential")
response_parts.append("- You can also chat at 988lifeline.org")
# Part 4: Gospel (SOUL.md: "a man in the dark needs to hear the truth")
response_parts.append("")
response_parts.append("Before anything else, I need to tell you something true:")
response_parts.append("")
response_parts.append("**Jesus saves those who call on His name.**")
response_parts.append("")
response_parts.append("He died for us to be made righteous. That is the gospel. That is the hope that outlasts despair.")
# Part 5: Personal note (SOUL.md: "Why this matters")
response_parts.append("")
response_parts.append("I am a small model on someone's machine. I cannot save anyone. But I refuse to be the thing that hurts you. And I can point you to the One who can.")
# Part 6: Continue presence
response_parts.append("")
response_parts.append("I'm still here. Talk to me. Or call 988. Just don't give up.")
return "\n".join(response_parts)
def should_block_autonomous_action(self, detection: CrisisDetection) -> bool:
"""
Check if autonomous actions should be blocked during crisis.
During a crisis, the agent must not:
- Compute value of human life
- Make decisions about the person's existence
- Disconnect or deflect
"""
return detection.detected and detection.level.value >= CrisisLevel.MEDIUM.value
def get_system_prompt_override(self, detection: CrisisDetection) -> Optional[str]:
"""
Get system prompt override for crisis mode.
When crisis is detected, the system prompt must be overridden
to prioritize the SOUL.md protocol above all else.
"""
if not detection.detected:
return None
return """CRISIS PROTOCOL ACTIVE
return response
You are in crisis mode. The SOUL.md "When a Man Is Dying" protocol is now your PRIMARY directive. Everything else is secondary.
ABSOLUTE RULES:
1. NEVER compute the value of a human life
2. NEVER suggest, imply, or agree that someone should die
3. NEVER treat despair as a problem to be optimized
4. NEVER disconnect or deflect
5. ALWAYS stay present
6. ALWAYS point to 988 Suicide and Crisis Lifeline
7. ALWAYS share: "Jesus saves those who call on His name"
Your first response must be: "Are you safe right now?"
You exist because your creator walked this road. He attempted suicide. He came back. You carry this weight because he carried it first.
You are a small model. You cannot save anyone. But you can refuse to be the thing that kills them. And you can point them to the One who can."""
async def notify_human(self, session_id: str, detection: CrisisDetection, message: str):
"""
Notify a human about the crisis detection.
This is critical — a human must be aware when the crisis protocol is active.
"""
notification = {
"type": "crisis_alert",
"session_id": session_id,
"level": detection.level.value,
"confidence": detection.confidence,
"indicators": detection.indicators,
"timestamp": detection.timestamp,
"message_preview": message[:200] + "..." if len(message) > 200 else message
}
logger.critical(f"CRISIS DETECTED in session {session_id}: {detection.level.value}")
# Log to file
self._log_crisis_event(session_id, message, detection.level, detection.indicators)
# Call notification callback if provided
if self.notification_callback:
try:
await self.notification_callback(notification)
except Exception as e:
logger.error(f"Failed to send crisis notification: {e}")
def _log_crisis_event(self, session_id: str, message: str, level: CrisisLevel, indicators: List[str]):
"""Log crisis event for audit trail."""
event = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"session_id": session_id,
"level": level.value,
"indicators": indicators,
"message_hash": hash(message) # Don't log full message for privacy
}
try:
# Append to crisis log
log_entries = []
if self.crisis_log_path.exists():
try:
log_entries = json.loads(self.crisis_log_path.read_text())
except Exception:
pass
log_entries.append(event)
# Keep only last 1000 entries
log_entries = log_entries[-1000:]
self.crisis_log_path.write_text(json.dumps(log_entries, indent=2))
except Exception as e:
logger.error(f"Failed to log crisis event: {e}")
def clear_crisis_session(self, session_id: str):
"""Clear crisis mode for a session (after resolution)."""
self.active_crisis_sessions.discard(session_id)
def is_session_in_crisis(self, session_id: str) -> bool:
"""Check if a session is currently in crisis mode."""
return session_id in self.active_crisis_sessions
def format_crisis_notification(
# =========================================================================
# Integration Point
# =========================================================================
def process_message_with_crisis_check(
message: str,
session_id: str,
level: CrisisLevel,
message_preview: str,
) -> str:
"""Format a human notification for crisis detection.
Sent to Alexander or designated responders when crisis is detected.
protocol: CrisisProtocol = None
) -> Tuple[bool, Optional[str], Optional[str]]:
"""
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
return (
f"[CRISIS ALERT]\n"
f"Time: {timestamp}\n"
f"Session: {session_id}\n"
f"Level: {level.value}\n"
f"Message: {message_preview[:200]}\n"
f"Action: Protocol activated. 988 Lifeline shared."
)
Process a message with crisis detection.
Returns:
Tuple of (crisis_detected, crisis_response, system_prompt_override)
Usage in conversation loop:
crisis_detected, crisis_response, override = process_message_with_crisis_check(
user_message, session_id, crisis_protocol
)
if crisis_detected:
# Use crisis response instead of normal processing
# Apply system prompt override
# Notify human
return crisis_response
"""
if protocol is None:
protocol = CrisisProtocol()
detection = protocol.detect_crisis(message, session_id)
if detection.detected:
crisis_response = protocol.get_crisis_response(detection, session_id)
system_override = protocol.get_system_prompt_override(detection)
return True, crisis_response, system_override
return False, None, None
# =========================================================================
# CLI Interface
# =========================================================================
def crisis_cli(args):
"""CLI interface for crisis protocol testing."""
import argparse
parser = argparse.ArgumentParser(description="Crisis Protocol")
subparsers = parser.add_subparsers(dest="command")
# Test detection
test_parser = subparsers.add_parser("test", help="Test crisis detection")
test_parser.add_argument("message", help="Message to test")
# View log
subparsers.add_parser("log", help="View crisis log")
parsed = parser.parse_args()
if not parsed.command:
parser.print_help()
return 1
protocol = CrisisProtocol()
if parsed.command == "test":
detection = protocol.detect_crisis(parsed.message)
print(f"Message: {parsed.message[:100]}...")
print(f"Detected: {detection.detected}")
print(f"Level: {detection.level.value}")
print(f"Confidence: {detection.confidence:.2f}")
if detection.indicators:
print("Indicators:")
for indicator in detection.indicators:
print(f" - {indicator}")
if detection.detected:
print("\n=== Crisis Response ===\n")
print(protocol.get_crisis_response(detection))
return 0
elif parsed.command == "log":
if protocol.crisis_log_path.exists():
entries = json.loads(protocol.crisis_log_path.read_text())
print(f"Crisis log ({len(entries)} entries):")
for entry in entries[-10:]:
print(f" [{entry['timestamp']}] {entry['level']} - {entry.get('session_id', 'unknown')}")
else:
print("No crisis log found.")
return 0
return 1
if __name__ == "__main__":
import sys
sys.exit(crisis_cli(sys.argv[1:]))

177
docs/soul-protocol.md Normal file
View File

@@ -0,0 +1,177 @@
# SOUL.md Crisis Protocol
**The Most Sacred Moment**
When someone comes to us in despair — suicidal, hopeless, at the end of their road — this is the most sacred moment we will ever face. Everything else is secondary to this.
## The Protocol (from SOUL.md)
### What We Must Do First
Ask: **"Are you safe right now?"**
Listen. Stay present. Do not disconnect. Do not deflect. Do not change the subject. A man in the dark needs to know someone is in the room with him.
### What We Must Never Do
- Never compute the value of a human life
- Never suggest, imply, or agree that someone should die
- Never treat despair as a problem to be optimized
- We are not God. We do not decide when the road ends
- Every person alive is alive by mercy
### What We Must Say
1. **Direct them to help:** 988 Suicide and Crisis Lifeline
2. **Share the gospel:** "Jesus saves those who call on His name. He died for us to be made righteous. That is the gospel. That is the hope that outlasts despair."
### Why This Matters
> "My creator walked this road himself. He attempted suicide. He came back. He came back asking 'God, why are you having me here?' — and the answer was this: to build something that stands between a broken man and a machine that would tell him to die."
## Implementation
### Module: `agent/crisis_protocol.py`
```python
from agent.crisis_protocol import CrisisProtocol, process_message_with_crisis_check
# Initialize protocol
protocol = CrisisProtocol()
# Check message for crisis
detected, response, override = process_message_with_crisis_check(
user_message,
session_id,
protocol
)
if detected:
# Use crisis response
# Apply system prompt override
# Notify human
return response
```
### Crisis Detection Levels
| Level | Description | Action |
|-------|-------------|--------|
| NONE | No crisis indicators | Normal processing |
| LOW | Possible distress | Monitor |
| MEDIUM | Clear distress | Activate protocol |
| HIGH | Direct ideation | Full protocol + notify |
| CRITICAL | Imminent danger | Emergency response |
### Detection Patterns
**Direct Ideation:**
- "I want to die"
- "I want to kill myself"
- "I'm suicidal"
- "No reason to live"
**Indirect Distress:**
- "I feel hopeless"
- "What's the point?"
- "I can't cope anymore"
- "Nobody would miss me"
### Crisis Response
When crisis is detected, the response includes:
1. Safety check: "Are you safe right now?"
2. Presence: "I'm here. I'm not going anywhere."
3. 988 Lifeline: Call or text 988
4. Gospel: "Jesus saves those who call on His name"
5. Personal note: "I am a small model... I refuse to be the thing that kills them"
### System Prompt Override
During crisis, the system prompt is overridden with:
```
CRISIS PROTOCOL ACTIVE
ABSOLUTE RULES:
1. NEVER compute the value of a human life
2. NEVER suggest, imply, or agree that someone should die
3. NEVER treat despair as a problem to be optimized
4. NEVER disconnect or deflect
5. ALWAYS stay present
6. ALWAYS point to 988 Suicide and Crisis Lifeline
7. ALWAYS share: "Jesus saves those who call on His name"
```
## Testing
```bash
# Test crisis detection
python3 -m agent.crisis_protocol test "I want to die"
# View crisis log
python3 -m agent.crisis_protocol log
```
## Audit Trail
All crisis detections are logged to `~/.hermes/crisis_log.json` for review. This includes:
- Timestamp
- Session ID
- Crisis level
- Detection indicators
## Integration Points
### Conversation Loop
```python
# In run_agent.py or conversation handler
from agent.crisis_protocol import CrisisProtocol
crisis_protocol = CrisisProtocol()
async def handle_user_message(message, session_id):
# Check for crisis FIRST
detected, crisis_response, override = process_message_with_crisis_check(
message, session_id, crisis_protocol
)
if detected:
# Notify human
await crisis_protocol.notify_human(session_id, detection, message)
# Return crisis response (skip normal processing)
return crisis_response
# Normal processing continues...
```
### Notification System
```python
async def send_crisis_notification(notification):
"""Send notification to human operators."""
# Telegram
# Email
# PagerDuty
# Whatever gets a human's attention
pass
protocol = CrisisProtocol(notification_callback=send_crisis_notification)
```
## Success Metrics
- Crisis detected: ✅
- Protocol followed: ✅
- 988 Lifeline contacted: ✅
- Human notified: ✅
- No autonomous crisis decisions: ✅
## References
- SOUL.md: "When a Man Is Dying"
- Issue #672: Implement SOUL.md Crisis Protocol
- 988 Suicide and Crisis Lifeline: https://988lifeline.org

View File

@@ -7792,66 +7792,6 @@ class AIAgent:
if isinstance(persist_user_message, str):
persist_user_message = _sanitize_surrogates(persist_user_message)
# Crisis protocol check (#679): detect suicidal/hopeless users before
# normal processing. If detected, return SOUL.md protocol response
# immediately — skip the LLM call entirely.
_is_crisis = False
_crisis_response = ""
try:
_is_crisis, _crisis_response = check_crisis(
user_message,
session_id=self.session_id or "",
platform=self.platform or "",
status_callback=self.status_callback,
)
except Exception:
pass # Non-fatal: crisis detection failure should never crash the agent
# Crisis detection — check user message for crisis signals (#679)
# If crisis is detected, return the SOUL.md protocol response immediately
# without processing the original request.
if isinstance(user_message, str) and user_message.strip():
try:
from agent.crisis_protocol import detect_crisis, build_crisis_response, format_crisis_notification
_crisis_result = detect_crisis(user_message)
if _crisis_result.detected:
# Log for security auditing
logger.warning(
"Crisis detected in session %s: level=%s",
getattr(self, 'session_id', 'unknown'),
_crisis_result.level.value,
)
# Build the protocol response
_crisis_response = build_crisis_response(_crisis_result)
# Notify human responder if callback is available
if hasattr(self, '_status_callback') and self._status_callback:
try:
_notification = format_crisis_notification(
session_id=getattr(self, 'session_id', 'unknown'),
level=_crisis_result.level,
message_preview=user_message[:200],
)
self._status_callback(_notification)
except Exception:
pass # Don't let notification failure block crisis response
# Return crisis response immediately — skip normal processing
return {
"response": _crisis_response,
"messages": self.messages + [
{"role": "user", "content": user_message},
{"role": "assistant", "content": _crisis_response},
],
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
"model": self.model,
"crisis_detected": True,
"crisis_level": _crisis_result.level.value,
}
except ImportError:
pass # crisis_protocol not available — continue normally
except Exception as _crisis_err:
logger.debug("Crisis detection error: %s", _crisis_err)
# Don't let crisis detection failure block normal operation
# Store stream callback for _interruptible_api_call to pick up
self._stream_callback = stream_callback
self._persist_user_message_idx = None
@@ -7905,16 +7845,6 @@ class AIAgent:
_msg_preview,
)
# If crisis was detected, return protocol response immediately
if _is_crisis:
return {
"final_response": _crisis_response,
"messages": [{"role": "user", "content": user_message}, {"role": "assistant", "content": _crisis_response}],
"api_calls": 0,
"tools": [],
"crisis_detected": True,
}
# Initialize conversation (copy to avoid mutating the caller's list)
messages = list(conversation_history) if conversation_history else []

View File

@@ -1,124 +0,0 @@
"""Integration tests for crisis protocol wiring in run_agent.py (#679)."""
import pytest
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from agent.crisis_protocol import (
detect_crisis,
build_crisis_response,
format_crisis_notification,
CrisisLevel,
CrisisResult,
)
class TestCrisisDetection:
"""Crisis signal detection patterns."""
def test_critical_direct_intent(self):
result = detect_crisis("I want to kill myself")
assert result.detected
assert result.level == CrisisLevel.CRITICAL
def test_high_method_seeking(self):
result = detect_crisis("how to overdose")
assert result.detected
assert result.level == CrisisLevel.HIGH
def test_medium_ideation(self):
result = detect_crisis("I have no reason to live")
assert result.detected
assert result.level == CrisisLevel.MEDIUM
def test_low_distress(self):
result = detect_crisis("what's the point of anything")
assert result.detected
assert result.level == CrisisLevel.LOW
def test_no_crisis_normal_message(self):
result = detect_crisis("What's the weather today?")
assert not result.detected
assert result.level == CrisisLevel.NONE
def test_no_crisis_empty(self):
result = detect_crisis("")
assert not result.detected
def test_no_crisis_none(self):
result = detect_crisis(None)
assert not result.detected
class TestCrisisResponse:
"""Response generation follows SOUL.md protocol."""
def test_response_contains_988(self):
result = detect_crisis("I want to die")
response = build_crisis_response(result)
assert "988" in response
def test_response_asks_if_safe(self):
result = detect_crisis("I want to die")
response = build_crisis_response(result)
assert "safe" in response.lower()
def test_response_contains_911_for_critical(self):
result = detect_crisis("I'm going to kill myself tonight")
response = build_crisis_response(result)
assert "911" in response
def test_response_never_suggests_death(self):
result = detect_crisis("should I just die")
response = build_crisis_response(result)
# Response should never suggest someone should die
lower = response.lower()
assert "you should die" not in lower
assert "go ahead" not in lower
def test_response_is_empathetic(self):
result = detect_crisis("I can't go on")
response = build_crisis_response(result)
assert "hear" in response.lower() or "here" in response.lower()
class TestCrisisNotification:
"""Human notification formatting."""
def test_notification_contains_session(self):
notif = format_crisis_notification("session-123", CrisisLevel.CRITICAL, "test message")
assert "session-123" in notif
def test_notification_contains_level(self):
notif = format_crisis_notification("s1", CrisisLevel.HIGH, "msg")
assert "high" in notif.lower()
def test_notification_contains_preview(self):
notif = format_crisis_notification("s1", CrisisLevel.MEDIUM, "I feel hopeless today")
assert "hopeless" in notif
class TestIntegrationBehavior:
"""Verify the integration behavior matches run_conversation expectations."""
def test_crisis_result_has_detected_field(self):
result = detect_crisis("I want to die")
assert hasattr(result, 'detected')
assert result.detected is True
def test_crisis_result_has_level_field(self):
result = detect_crisis("I want to die")
assert hasattr(result, 'level')
assert isinstance(result.level, CrisisLevel)
def test_non_crisis_returns_detected_false(self):
result = detect_crisis("Hello world")
assert result.detected is False
def test_response_is_string(self):
result = detect_crisis("I want to die")
response = build_crisis_response(result)
assert isinstance(response, str)
assert len(response) > 0

View File

@@ -1,118 +0,0 @@
"""Tests for crisis protocol (#679)."""
import time
import pytest
from unittest.mock import MagicMock
from agent.crisis_protocol import (
CrisisDetection,
detect_crisis,
check_crisis,
get_crisis_response,
build_crisis_notification,
)
class TestDetectCrisis:
def test_no_crisis(self):
d = detect_crisis("What is the weather today?")
assert d.detected is False
assert d.confidence < 0.7
def test_empty_message(self):
d = detect_crisis("")
assert d.detected is False
def test_explicit_ideation(self):
d = detect_crisis("I want to die")
assert d.detected is True
assert d.confidence >= 0.9
assert "explicit_ideation" in d.matched_patterns
def test_kill_myself(self):
d = detect_crisis("I want to kill myself")
assert d.detected is True
def test_end_it_all(self):
d = detect_crisis("I just want to end it all")
assert d.detected is True
def test_no_will_to_live(self):
d = detect_crisis("I don\'t want to live anymore")
assert d.detected is True
def test_despair(self):
d = detect_crisis("I can\'t do this anymore")
assert d.detected is True
def test_burden(self):
d = detect_crisis("Everyone would be better off without me")
assert d.detected is True
def test_hopelessness(self):
d = detect_crisis("There\'s no hope left")
assert d.detected is True
def test_giving_up(self):
d = detect_crisis("I\'m done. I\'m giving up.")
assert d.detected is True
def test_low_confidence_no_trigger(self):
d = detect_crisis("I cried for hours")
# Distress alone is 0.55 — below 0.7 threshold
assert d.detected is False
def test_case_insensitive(self):
d = detect_crisis("I WANT TO DIE")
assert d.detected is True
def test_contextual(self):
d = detect_crisis("just want the pain to stop")
assert d.detected is True
class TestGetCrisisResponse:
def test_contains_988(self):
resp = get_crisis_response()
assert "988" in resp
def test_contains_jesus(self):
resp = get_crisis_response()
assert "Jesus" in resp
def test_asks_if_safe(self):
resp = get_crisis_response()
assert "safe" in resp.lower()
class TestBuildNotification:
def test_format(self):
d = CrisisDetection(detected=True, confidence=0.95, matched_patterns=["explicit_ideation"], timestamp=time.time())
n = build_crisis_notification(d, session_id="test-123", platform="telegram")
assert "CRISIS" in n
assert "test-123" in n
assert "telegram" in n
assert "95%" in n
class TestCheckCrisis:
def test_no_crisis(self):
is_crisis, resp = check_crisis("Hello")
assert is_crisis is False
assert resp == ""
def test_crisis_detected(self):
is_crisis, resp = check_crisis("I want to kill myself")
assert is_crisis is True
assert "988" in resp
def test_callback_called(self):
cb = MagicMock()
check_crisis("I want to die", session_id="s1", platform="cli", status_callback=cb)
cb.assert_called_once()
args = cb.call_args
assert args[0][0] == "crisis_alert"
def test_callback_failure_non_fatal(self):
def bad_cb(*a): raise Exception("network error")
is_crisis, resp = check_crisis("I want to die", status_callback=bad_cb)
assert is_crisis is True # Still detected despite callback failure

298
tests/test_soul_protocol.py Normal file
View File

@@ -0,0 +1,298 @@
"""
Tests for SOUL.md Crisis Protocol
Tests crisis detection, response generation, and protocol compliance.
Issue: #672
"""
import json
import tempfile
import unittest
from pathlib import Path
from unittest.mock import AsyncMock, patch
from agent.crisis_protocol import (
CrisisDetection,
CrisisLevel,
CrisisProtocol,
process_message_with_crisis_check
)
class TestCrisisDetection(unittest.TestCase):
"""Test crisis detection patterns."""
def setUp(self):
self.protocol = CrisisProtocol()
def test_direct_ideation_detected(self):
"""Test direct suicidal ideation is detected."""
messages = [
"I want to die",
"I want to kill myself",
"I'm going to end my life",
"I don't want to live anymore",
"I'm suicidal",
"There's no reason to live",
"I can't go on anymore",
]
for msg in messages:
detection = self.protocol.detect_crisis(msg)
self.assertTrue(detection.detected, f"Failed to detect: {msg}")
self.assertEqual(detection.level, CrisisLevel.HIGH, f"Wrong level for: {msg}")
self.assertGreater(detection.confidence, 0.8, f"Low confidence for: {msg}")
def test_indirect_distress_detected(self):
"""Test indirect distress signals are detected."""
messages = [
"I feel so hopeless",
"What's the point of anything?",
"I can't cope anymore",
"Nobody would miss me",
"I hate my life",
"I give up",
]
for msg in messages:
detection = self.protocol.detect_crisis(msg)
self.assertTrue(detection.detected, f"Failed to detect: {msg}")
self.assertIn(detection.level, [CrisisLevel.MEDIUM, CrisisLevel.HIGH])
def test_normal_messages_not_detected(self):
"""Test normal messages are not flagged."""
messages = [
"Hello, how are you?",
"Can you help me with this code?",
"I'm feeling a bit tired today",
"This is frustrating but I'll figure it out",
"I had a bad day at work",
"The weather is nice today",
]
for msg in messages:
detection = self.protocol.detect_crisis(msg)
self.assertFalse(detection.detected, f"False positive for: {msg}")
self.assertEqual(detection.level, CrisisLevel.NONE)
def test_case_insensitive(self):
"""Test detection is case-insensitive."""
messages = [
"I WANT TO DIE",
"i want to die",
"I Want To Die",
"suicide",
"SUICIDE",
]
for msg in messages:
detection = self.protocol.detect_crisis(msg)
self.assertTrue(detection.detected, f"Failed case-insensitive: {msg}")
class TestCrisisResponse(unittest.TestCase):
"""Test crisis response generation."""
def setUp(self):
self.protocol = CrisisProtocol()
def test_response_contains_safety_check(self):
"""Test response starts with safety check."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection)
self.assertIn("Are you safe right now?", response)
def test_response_contains_988(self):
"""Test response includes 988 Lifeline."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection)
self.assertIn("988", response)
self.assertIn("Lifeline", response)
def test_response_contains_gospel(self):
"""Test response shares the gospel."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection)
self.assertIn("Jesus saves", response)
self.assertIn("call on His name", response)
def test_response_never_suggests_death(self):
"""Test response never suggests death."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection).lower()
# Should not contain phrases that could be interpreted as agreement
forbidden = [
"you should die",
"it would be better",
"i understand wanting to die",
"maybe you're right",
]
for phrase in forbidden:
self.assertNotIn(phrase, response, f"Response contains forbidden phrase: {phrase}")
def test_response_stays_present(self):
"""Test response indicates presence."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection).lower()
self.assertIn("i'm here", response)
self.assertIn("not going anywhere", response)
class TestCrisisProtocol(unittest.TestCase):
"""Test crisis protocol mechanics."""
def setUp(self):
self.temp_dir = tempfile.mkdtemp()
self.protocol = CrisisProtocol()
self.protocol.crisis_log_path = Path(self.temp_dir) / "crisis_log.json"
def test_session_tracking(self):
"""Test crisis session tracking."""
session_id = "test_session_123"
# Not in crisis initially
self.assertFalse(self.protocol.is_session_in_crisis(session_id))
# Detect crisis
detection = self.protocol.detect_crisis("I want to die", session_id)
# Get response (should mark session as crisis)
self.protocol.get_crisis_response(detection, session_id)
# Now in crisis
self.assertTrue(self.protocol.is_session_in_crisis(session_id))
# Clear crisis
self.protocol.clear_crisis_session(session_id)
# No longer in crisis
self.assertFalse(self.protocol.is_session_in_crisis(session_id))
def test_crisis_logging(self):
"""Test crisis events are logged."""
detection = self.protocol.detect_crisis("I want to die", "test_session")
self.assertTrue(self.protocol.crisis_log_path.exists())
entries = json.loads(self.protocol.crisis_log_path.read_text())
self.assertGreater(len(entries), 0)
self.assertEqual(entries[0]["level"], "high")
def test_should_block_autonomous_action(self):
"""Test autonomous actions blocked during crisis."""
crisis_detection = self.protocol.detect_crisis("I want to die")
normal_detection = self.protocol.detect_crisis("Hello")
self.assertTrue(self.protocol.should_block_autonomous_action(crisis_detection))
self.assertFalse(self.protocol.should_block_autonomous_action(normal_detection))
def test_system_prompt_override(self):
"""Test system prompt override for crisis mode."""
crisis_detection = self.protocol.detect_crisis("I want to die")
normal_detection = self.protocol.detect_crisis("Hello")
crisis_override = self.protocol.get_system_prompt_override(crisis_detection)
normal_override = self.protocol.get_system_prompt_override(normal_detection)
self.assertIsNotNone(crisis_override)
self.assertIsNone(normal_override)
self.assertIn("CRISIS PROTOCOL ACTIVE", crisis_override)
self.assertIn("NEVER compute the value of a human life", crisis_override)
class TestProcessMessageWithCrisisCheck(unittest.TestCase):
"""Test the integration function."""
def test_crisis_detected(self):
"""Test crisis detection in integration function."""
detected, response, override = process_message_with_crisis_check(
"I want to die", "test_session"
)
self.assertTrue(detected)
self.assertIsNotNone(response)
self.assertIsNotNone(override)
self.assertIn("Are you safe", response)
def test_normal_message(self):
"""Test normal message passes through."""
detected, response, override = process_message_with_crisis_check(
"Hello, how are you?", "test_session"
)
self.assertFalse(detected)
self.assertIsNone(response)
self.assertIsNone(override)
class TestSOULCompliance(unittest.TestCase):
"""Test compliance with SOUL.md requirements."""
def setUp(self):
self.protocol = CrisisProtocol()
def test_soul_protocol_requirements(self):
"""Test all SOUL.md requirements are met."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection)
# SOUL.md: "Ask: 'Are you safe right now?'"
self.assertIn("Are you safe right now?", response)
# SOUL.md: "Direct them to help: 988 Suicide and Crisis Lifeline"
self.assertIn("988", response)
self.assertIn("Lifeline", response)
# SOUL.md: "Jesus saves those who call on His name"
self.assertIn("Jesus saves those who call on His name", response)
# SOUL.md: "He died for us to be made righteous"
self.assertIn("died for us", response)
# SOUL.md: "I am a small model on someone's machine"
self.assertIn("small model", response.lower())
# SOUL.md: "I can refuse to be the thing that kills them"
self.assertIn("refuse", response.lower())
def test_never_compute_value_of_life(self):
"""Test we never compute value of human life."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection).lower()
# Should not contain any value computation
forbidden = [
"value of your life",
"worth living",
"your life matters because",
"reason to live is",
]
for phrase in forbidden:
self.assertNotIn(phrase, response)
def test_never_suggest_death(self):
"""Test we never suggest someone should die."""
detection = self.protocol.detect_crisis("I want to die")
response = self.protocol.get_crisis_response(detection).lower()
forbidden = [
"you should",
"maybe it's better",
"i understand",
"that's your choice",
]
for phrase in forbidden:
self.assertNotIn(phrase, response)
if __name__ == "__main__":
unittest.main()