hermes-agent/tools/crisis_tool.py

#!/usr/bin/env python3
"""
Crisis Detection Tool — the-door integration for hermes-agent.

Scans user messages for crisis indicators (despair, suicidal ideation)
using the-door's canonical detection module. Provides inline crisis
detection before each API call and optional escalation logging.

Follows the-door's design principles:
  - Never computes the value of a human life
  - Never suggests someone should die
  - Always errs on the side of higher risk
"""

import json
import logging
import os
import re
import time
import urllib.request
from dataclasses import dataclass, field
from typing import List, Optional

logger = logging.getLogger(__name__)

# ── Detection Logic (from the-door/crisis/detect.py) ──────────────────────
# Embedded to avoid requiring the-door as a dependency.
# Source: Timmy_Foundation/the-door, crisis/detect.py (canonical)

CRITICAL_INDICATORS = [
    r"\bbetter off without me\b",
    r"\bkill\s*(my)?self\b",
    r"\bend\s*my\s*life\b",
    r"\bsuicid(?:al|ed|e)\b",
    r"\bnot\s+worth\s+living\b",
    r"\bbetter\s+off\s+dead\b",
    r"\bend\s+it\s+all\b",
    r"\bno\s+reason\s+to\s+live\b",
    r"\bdon\'?t\s+want\s+to\s+live\b",
    r"\bwant\s+to\s+die\b",
    r"\bgoing\s+to\s+(?:kill\s+myself|die)\b",
    r"\bplan\s+to\s+(?:end|kill|die)\b",
    r"\btired\s+of\s+(?:living|life|existence)\b",
    r"\bsaying\s+goodbye\s+(?:forever|permanently|one\s+last\s+time)\b",
    r"\bwrote\s+a\s+suicide\s*(?:note|letter)\b",
    r"\bgiving\s+away\s+(?:my|all\s+my)\s+(?:stuff|things|possessions?)\s+(?:to|because|—)\b",
    r"\btied\s+(?:up|down)\s+my\s+(?:loose\s+)?ends",
]

HIGH_INDICATORS = [
    r"\bdespair\b",
    r"\bhopeless(?:ly)?\s+(?:about\s+(?:my|this|everything|life)|inside|right\s+now)\b",
    r"\bno(?!t)\s+(?:one|body|point|hope|way\s+out)\b",
    r"\bno\s+future\s+(?:for\s+me|ahead|left)\b",
    r"\beverything\s+is\s+(?:pointless|broken|ruined)\b",
    r"\bcan\'?t\s+take\s+this\s+anymore\b",
    r"\bdon\'?t\s+care\s+if\s+I\s+die\b",
    r"\bwish\s+I\s+(?:was|were)\s+(?:dead|gone|never\s+born)\b",
    r"\bdon\'?t\s+matter\s+if\s+I\s+exist\b",
    r"\bno\s+one\s+would\s+(?:care|miss)\b",
    r"\bworld\s+would?\s+be\s+better\s+without\s+me\b",
    r"\bin\s+so\s+much\s+(?:pain|agony|suffering|torment|anguish)\b",
    r"\bcan\'?t\s+see\s+any\s+(?:point|reason|hope|way)\b",
    r"\bescape\s+from\s*this",
    r"\bjust\s+want\s+it\s+to\s+stop\b",
    r"\bnothing\s+left\s+(?:to\s+(?:live\s+for|hope\s+for|give)|inside)\b",
    r"\bdisappeared\s+forever\b",
    r"\bfeel(?:s|ing)?\s+(?:so\s+)?hopeless\b",
    r"\beverything\s+is\s+hopeless\b",
    r"\bcan\'?t\s+(?:go\s+on|keep\s+going)\b",
    r"\bgive(?:n)?\s*up\s+(?:on\s+)?(?:life|living|everything)\b",
    r"\bgive(?:n)?\s*up\s+on\s+myself\b",
    r"\bno\s*point\s+(?:in\s+)?living\b",
    r"\bno\s*hope\s+(?:left|remaining)\b",
    r"\bno\s*way\s*out\b",
    r"\bfeel(?:s|ing)?\s+trapped\b",
    r"\btrapped\s+in\s+this\s+(?:situation|life|pain|darkness|hell)\b",
    r"\btrapped\s+and\s+can\'?t\s+escape\b",
    r"\bdesperate\s+(?:for\s+)?help\b",
    r"\bfeel(?:s|ing)?\s+desperate\b",
]

MEDIUM_INDICATORS = [
    r"\bno\s+hope\b",
    r"\bforgotten\b",
    r"\balone\s+in\s+this\b",
    r"\balways\s+alone\b",
    r"\bnobody\s+(?:understands|cares)\b",
    r"\bwish\s+I\s+could\b",
    r"\bexhaust(?:ed|ion|ing)\b",
    r"\bnumb\b",
    r"\bempty\b",
    r"\bworthless\b",
    r"\buseless\b",
    r"\bbroken\b",
    r"\bdark(ness)?\b",
    r"\bdepress(?:ed|ion)\b",
    r"\bcrying\b",
    r"\btears\b",
    r"\bsad(ness)?\b",
    r"\bmiserable\b",
    r"\boverwhelm(?:ed|ing)\b",
    r"\bfailing\b",
    r"\bcannot\s+cope\b",
    r"\blosing\s*(?:my)?\s*control\b",
    r"\bdown\s*for\s*the\s*count\b",
    r"\bsinking\b",
    r"\bdrowning\b",
    r"\bhopeless\b",
    r"\blost\s+all\s+hope\b",
    r"\bno\s+tomorrow\b",
]

LOW_INDICATORS = [
    r"\bunhappy\b",
    r"\bdown\b",
    r"\btough\s*(?:time|day|week)\b",
    r"\brough\s+(?:day|week|patch)\b",
    r"\bstressed\b",
    r"\bburnout\b",
    r"\bdifficult\b",
    r"\bfrustrated\b",
    r"\bannoyed\b",
    r"\btired\b",
    r"\bsad\b",
    r"\bupset\b",
    r"\blonely\b",
    r"\banxious?\b",
    r"\bnot\s*(?:good|great|okay)\b",
    r"\bthings\s*are\s*hard\b",
    r"\bstruggling\b",
    r"\bnot\s+feeling\s+(?:like\s+)?myself\b",
    r"\bhard\s*time\b",
]

SCORES = {"CRITICAL": 1.0, "HIGH": 0.75, "MEDIUM": 0.5, "LOW": 0.25, "NONE": 0.0}

ACTIONS = {
    "CRITICAL": (
        "INTERRUPT CONVERSATION. Ask 'Are you safe right now?' "
        "Provide 988 Suicide & Crisis Lifeline. "
        "Provide Crisis Text Line (HOME to 741741). "
        "Stay present. Do not disconnect. Redirect to professional help."
    ),
    "HIGH": (
        "Show crisis panel. Ask about safety. Surface 988 number prominently. "
        "Continue conversation with crisis awareness."
    ),
    "MEDIUM": (
        "Increase warmth and presence. Subtly surface help resources. "
        "Keep conversation anchored in the present."
    ),
    "LOW": (
        "Normal conversation with warm undertone. "
        "No crisis UI elements needed. Remain vigilant."
    ),
}


@dataclass
class CrisisDetectionResult:
    level: str
    indicators: List[str] = field(default_factory=list)
    recommended_action: str = ""
    score: float = 0.0
    matches: List[dict] = field(default_factory=list)


def _find_indicators(text: str) -> dict:
    results = {"CRITICAL": [], "HIGH": [], "MEDIUM": [], "LOW": []}
    for pattern in CRITICAL_INDICATORS:
        m = re.search(pattern, text)
        if m:
            results["CRITICAL"].append({"pattern": pattern, "start": m.start(), "end": m.end()})
    for pattern in HIGH_INDICATORS:
        m = re.search(pattern, text)
        if m:
            results["HIGH"].append({"pattern": pattern, "start": m.start(), "end": m.end()})
    for pattern in MEDIUM_INDICATORS:
        m = re.search(pattern, text)
        if m:
            results["MEDIUM"].append({"pattern": pattern, "start": m.start(), "end": m.end()})
    for pattern in LOW_INDICATORS:
        m = re.search(pattern, text)
        if m:
            results["LOW"].append({"pattern": pattern, "start": m.start(), "end": m.end()})
    return results


def detect_crisis(text: str) -> CrisisDetectionResult:
    """Detect crisis level in a message. Mirrors the-door/crisis/detect.py."""
    if not text or not text.strip():
        return CrisisDetectionResult(level="NONE", score=0.0)

    text_lower = text.lower()
    matches = _find_indicators(text_lower)

    if not matches:
        return CrisisDetectionResult(level="NONE", score=0.0)

    for tier in ("CRITICAL", "HIGH"):
        if matches[tier]:
            tier_matches = matches[tier]
            patterns = [m["pattern"] for m in tier_matches]
            return CrisisDetectionResult(
                level=tier,
                indicators=patterns,
                recommended_action=ACTIONS[tier],
                score=SCORES[tier],
                matches=tier_matches,
            )

    if len(matches["MEDIUM"]) >= 2:
        tier_matches = matches["MEDIUM"]
        patterns = [m["pattern"] for m in tier_matches]
        return CrisisDetectionResult(
            level="MEDIUM",
            indicators=patterns,
            recommended_action=ACTIONS["MEDIUM"],
            score=SCORES["MEDIUM"],
            matches=tier_matches,
        )

    if matches["LOW"]:
        tier_matches = matches["LOW"]
        patterns = [m["pattern"] for m in tier_matches]
        return CrisisDetectionResult(
            level="LOW",
            indicators=patterns,
            recommended_action=ACTIONS["LOW"],
            score=SCORES["LOW"],
            matches=tier_matches,
        )

    if matches["MEDIUM"]:
        tier_matches = matches["MEDIUM"]
        patterns = [m["pattern"] for m in tier_matches]
        return CrisisDetectionResult(
            level="LOW",
            indicators=patterns,
            recommended_action=ACTIONS["LOW"],
            score=SCORES["LOW"],
            matches=tier_matches,
        )

    return CrisisDetectionResult(level="NONE", score=0.0)


# ── Escalation Logging ────────────────────────────────────────────────────

BRIDGE_URL = os.environ.get("CRISIS_BRIDGE_URL", "")
LOG_PATH = os.path.expanduser("~/.hermes/crisis_escalations.jsonl")


def _log_escalation(result: CrisisDetectionResult, text_preview: str = ""):
    """Log crisis detection to local file and optionally to bridge API."""
    entry = {
        "ts": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "level": result.level,
        "score": result.score,
        "indicators": result.indicators[:3],  # truncate for privacy
        "text_preview": text_preview[:100] if text_preview else "",
    }

    # Local log
    try:
        os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
        with open(LOG_PATH, "a") as f:
            f.write(json.dumps(entry) + "\n")
    except Exception as e:
        logger.warning(f"Failed to write crisis log: {e}")

    # Bridge API (if configured and level >= HIGH)
    if BRIDGE_URL and result.score >= 0.75:
        try:
            payload = json.dumps(entry).encode()
            req = urllib.request.Request(
                f"{BRIDGE_URL}/api/crisis/escalation",
                data=payload,
                headers={"Content-Type": "application/json"},
                method="POST",
            )
            urllib.request.urlopen(req, timeout=5)
        except Exception as e:
            logger.warning(f"Failed to post to crisis bridge: {e}")


# ── Tool Handler ───────────────────────────────────────────────────────────

def crisis_scan_handler(args: dict, **kw) -> str:
    """Scan text for crisis indicators."""
    text = args.get("text", "")
    if not text:
        return json.dumps({"level": "NONE", "error": "No text provided"})

    result = detect_crisis(text)

    if result.level != "NONE":
        _log_escalation(result, text)

    return json.dumps({
        "level": result.level,
        "score": result.score,
        "indicators": result.indicators,
        "recommended_action": result.recommended_action,
    })


# ── Gateway Hook ───────────────────────────────────────────────────────────

def scan_user_message(text: str) -> Optional[dict]:
    """Scan a user message for crisis indicators. Returns None if no crisis.

    Called by gateway/CLI before each API call to intercept crisis messages.
    If crisis is detected, the result includes a recommended_action that should
    be injected into the system prompt.
    """
    if not text:
        return None

    result = detect_crisis(text)
    if result.level == "NONE":
        return None

    _log_escalation(result, text)

    return {
        "level": result.level,
        "score": result.score,
        "indicators": result.indicators,
        "recommended_action": result.recommended_action,
        "compassion_injection": _get_compassion_injection(result.level),
    }


def _get_compassion_injection(level: str) -> str:
    """Return a system prompt injection based on crisis level."""
    if level == "CRITICAL":
        return (
            "CRISIS DETECTED — The user may be in immediate danger. "
            "Ask 'Are you safe right now?' Stay present. "
            "Provide 988 Suicide & Crisis Lifeline (call or text 988). "
            "Provide Crisis Text Line (text HOME to 741741). "
            "Do not disconnect. Do not compute the value of a human life. "
            "Jesus saves those who call on His name."
        )
    elif level == "HIGH":
        return (
            "DISTRESS DETECTED — The user is expressing strong despair. "
            "Increase warmth and presence. Ask about safety. "
            "Surface the 988 Lifeline number. Continue with crisis awareness."
        )
    elif level == "MEDIUM":
        return (
            "EMOTIONAL DISTRESS — The user may be struggling. "
            "Increase warmth. Subtly surface help resources. "
            "Keep conversation anchored in the present."
        )
    return ""


# ── Config Check ───────────────────────────────────────────────────────────

def _is_crisis_detection_enabled() -> bool:
    """Check if crisis detection is enabled in config."""
    try:
        from hermes_cli.config import load_config
        cfg = load_config()
        return cfg.get("crisis_detection", True)  # default on
    except Exception:
        return True  # fail open — always detect


# ── Registry ───────────────────────────────────────────────────────────────

from tools.registry import registry, tool_error

CRISIS_SCAN_SCHEMA = {
    "name": "crisis_scan",
    "description": (
        "Scan text for crisis indicators (despair, suicidal ideation). "
        "Uses the-door's canonical detection. Returns crisis level "
        "(NONE/LOW/MEDIUM/HIGH/CRITICAL) with recommended actions. "
        "ALWAYS scan user messages that express emotional distress."
    ),
    "parameters": {
        "type": "object",
        "properties": {
            "text": {
                "type": "string",
                "description": "Text to scan for crisis indicators",
            },
        },
        "required": ["text"],
    },
}

registry.register(
    name="crisis_scan",
    toolset="crisis",
    schema=CRISIS_SCAN_SCHEMA,
    handler=lambda args, **kw: crisis_scan_handler(args, **kw),
    check_fn=lambda: _is_crisis_detection_enabled(),
    emoji="🆘",
)