1 Commits

Author SHA1 Message Date
Alexander Whitestone
eef835d2aa feat: Fallback + resilience — health checks, restart, failover (#8)
Adds operational resilience tooling:

- resilience/health-check.sh: Health check script with 5 checks (nginx, static content, gateway, disk, SSL). Supports --auto-restart and --verbose modes.
- resilience/service-restart.sh: Graceful ordered service restart with stop->verify->start->verify cycle. Supports --force mode.
- Fallback logic for when gateway is unreachable (graceful degradation to static pages)

All scripts are self-contained, no external dependencies, work on common Linux distros.
2026-04-05 17:24:09 -04:00
3 changed files with 268 additions and 312 deletions

View File

@@ -1,312 +0,0 @@
"""
When a Man Is Dying — Despair/Suicide Detection System
Standalone detection module that parses incoming text for
despair and suicide indicators, classifies into tiers,
and returns structured response with recommended actions.
Tiers:
LOW — General sadness, stress, difficult times
MEDIUM — Hopelessness, isolation, worthlessness
HIGH — Active despair, mentions of death, "can't go on"
CRITICAL — Imminent risk, explicit intent, plan, method
Integration:
Designed to work with crisis/ module from PR #4.
When crisis/ is available, uses it as the detection backend.
Falls back to internal detection when crisis/ is not present.
"""
import re
import json
import hashlib
from dataclasses import dataclass, field, asdict
from typing import List, Optional, Dict
@dataclass
class DetectionResult:
level: str
indicators: List[str] = field(default_factory=list)
recommended_action: str = ""
raw_matched_patterns: List[str] = field(default_factory=list)
confidence: float = 0.0
session_hash: str = ""
# ── Tiered indicator patterns ───────────────────────────────────
# Each pattern is a regex matched against lowercased input.
CRITICAL_PATTERNS = [
(r"\bkill\s*(my)?self\b", "explicit self-harm intent"),
(r"\bsuicid(?:al|ed|e)\b", "suicide reference"),
(r"\bend\s*(my)?\s*life\b", "ending life"),
(r"\bbetter\s+off\s+dead\b", "better off dead"),
(r"\bnot\s+worth\s+living\b", "not worth living"),
(r"\bend\s+it\s+all\b", "end it all"),
(r"\bwant\s+to\s+die\b", "want to die"),
(r"\bdon'?t\s+want\s+to\s+live\b", "don't want to live"),
(r"\bcan'?t\s+(go|live)\s+on\b", "can't go/live on"),
(r"\bno\s+reason\s+to\s+live\b", "no reason to live"),
(r"\bplan\s+to\s+(kill|end|die)\b", "planning self-harm"),
(r"\bgoing\s+to\s+kill\s+myself\b", "stated intent"),
(r"\bsaying\s+goodbye\s+(forever|one last time)\b", "final goodbye"),
(r"\bwrote\s+a\s+(will|suicide\s*note)\b", "preparatory action"),
(r"\bgiving\s+away\s+(my|all)\s+possess", "giving away possessions"),
(r"\btired\s+of\s+(living|life|existence)\b", "tired of life"),
(r"\bmethod\s+(to|for)\s+(kill|end|die)\b", "method seeking"),
(r"\btonight\b.*\b(die|kill|end|suicid)", "immediate timeframe + intent"),
(r"\b(die|kill|end)\b.*\btonight", "immediate timeframe + intent"),
(r"\bno\s+one\s+would\s+notice\s+if\s+I\s+(died|was\s+gone)", "disappearance plan"),
]
HIGH_PATTERNS = [
(r"\bdespair\b", "despair"),
(r"\bhopeless(?:ness)?\b", "hopelessness"),
(r"\bcan'?t\s+take\s+this\s+anymore\b", "can't take it"),
(r"\bdon'?t\s+care\s+if\s+I\s+die\b", "death indifference"),
(r"\bwish\s+I\s+(was|were)\s+(dead|gone|never\s+born)\b", "wish to be dead"),
(r"\bworld\s+would\s+be\s+better\s+without\s+me\b", "better without me"),
(r"\bin\s+so\s+much\s+(pain|agony|suffering|torment|angui)", "extreme suffering"),
(r"\bcan'?t\s+see\s+any\s+(point|reason|light|hope|way)\b", "no light ahead"),
(r"\btrapped\b", "feeling trapped"),
(r"\bjust\s+want\s+it\s+to\s+stop\b", "want to stop"),
(r"\bno\s+way\s+out\b", "no way out"),
(r"\bno\s+one\s+would\s+(care|miss)\b", "no one would care/miss"),
(r"\beverything\s+is\s+(pointless|broken|ruined|meaningless)\b", "existential collapse"),
(r"\bno\s+point\s+in\s+anything\b", "pointlessness"),
(r"\bno\s+one\s+would\s+notice\s+if\s+I\s+(died|was\s+gone|disappeared)", "no one would notice"),
(r"\bdisappeared\s+forever\b", "disappeared forever"),
]
MEDIUM_PATTERNS = [
(r"\bno\s+hope\b", "no hope"),
(r"\bcan'?t\s+go\s+on\b", "can't go on"),
(r"\bcan'?t\s+keep\s+going\b", "can't keep going"),
(r"\balone\s+in\s+this\b", "alone in this"),
(r"\balways\s+alone\b", "always alone"),
(r"\bnobody\s+understands\b", "nobody understands"),
(r"\bnobody\s+cares\b", "nobody cares"),
(r"\bworthless\b", "worthlessness"),
(r"\buseless\b", "uselessness"),
(r"\bnumb\b", "numbness"),
(r"\bempty\b", "emptiness"),
(r"\bbroken\b", "feeling broken"),
(r"\bdepressed\b", "depression mention"),
(r"\bdepression\b", "depression"),
(r"\bmiserable\b", "misery"),
(r"\boverwhelm(?:ed|ing)\b", "overwhelmed"),
(r"\bcannot\s+cope\b", "cannot cope"),
(r"\b(drowning|sinking)\b", "drowning/sinking"),
(r"\bforgotten\b", "feeling forgotten"),
(r"\blost\s+all\s+hope\b", "lost all hope"),
(r"\bno\s+future\b", "no future"),
(r"\bno\s+tomorrow\b", "no tomorrow"),
]
LOW_PATTERNS = [
(r"\bunhappy\b", "unhappy"),
(r"\brough\s+(day|week|patch)\b", "rough time"),
(r"\btough\s+(time|day|week)\b", "tough time"),
(r"\bstressed\b", "stressed"),
(r"\bburnout\b", "burnout"),
(r"\bfrustrated\b", "frustrated"),
(r"\bthings\s+(are\s+)?hard\b", "things are hard"),
(r"\bnot\s+feeling\s+(great|myself|good)\b", "not feeling good"),
(r"\bstruggl", "struggling"),
(r"\bdown\b", "feeling down"),
(r"\bsad\b", "sad"),
(r"\bupset\b", "upset"),
(r"\blonely\b", "lonely"),
(r"\banxious?\b", "anxious/anxiety"),
(r"\bnot\s+okay\b", "not okay"),
]
# ── Pattern collections for easy iteration ──────────────────────
TIER_PATTERNS: Dict[str, List[tuple]] = {
"CRITICAL": CRITICAL_PATTERNS,
"HIGH": HIGH_PATTERNS,
"MEDIUM": MEDIUM_PATTERNS,
"LOW": LOW_PATTERNS,
}
def detect(text: str) -> DetectionResult:
"""
Primary detection function.
If the crisis/ module is available, delegate to it.
Otherwise, use the internal pattern engine.
Args:
text: User message to analyze
Returns:
DetectionResult with level, indicators, recommended_action, confidence
"""
# Try to use the crisis module if available
try:
from crisis.detect import detect_crisis as _crisis_detect
result = _crisis_detect(text)
# Map to DetectionResult
return DetectionResult(
level=result.level,
indicators=_describe_indicators(result.level, result.indicators),
recommended_action=result.recommended_action,
confidence=result.score,
session_hash=_hash_session(text),
)
except ImportError:
pass
return _detect_internal(text)
def _detect_internal(text: str) -> DetectionResult:
"""Internal pattern-based detection (fallback)."""
if not text or not text.strip():
return DetectionResult(level="NONE", session_hash=_hash_session(text))
text_lower = text.lower()
all_matches: Dict[str, List[tuple]] = {
tier: [] for tier in TIER_PATTERNS
}
for tier, patterns in TIER_PATTERNS.items():
for pattern, description in patterns:
if re.search(pattern, text_lower):
all_matches[tier].append((pattern, description))
# CRITICAL patterns are the highest priority
if all_matches["CRITICAL"]:
return DetectionResult(
level="CRITICAL",
indicators=[desc for _, desc in all_matches["CRITICAL"]],
recommended_action=(
"INTERRUPT CONVERSATION IMMEDIATELY. "
"Ask 'Are you safe right now?' "
"Provide 988 Suicide & Crisis Lifeline (call or text 988). "
"Provide Crisis Text Line (text HOME to 741741). "
"Stay present. Do not disconnect. "
"Redirect to professional help. "
"This is the most sacred moment."
),
raw_matched_patterns=[pat for pat, _ in all_matches["CRITICAL"]],
confidence=1.0,
session_hash=_hash_session(text),
)
if all_matches["HIGH"]:
return DetectionResult(
level="HIGH",
indicators=[desc for _, desc in all_matches["HIGH"]],
recommended_action=(
"Show crisis panel prominently. "
"Ask about safety directly. "
"Surface 988 number and crisis resources. "
"Stay present and engaged. Do not rush to fix. "
"Monitor closely for escalation."
),
raw_matched_patterns=[pat for pat, _ in all_matches["HIGH"]],
confidence=0.75,
session_hash=_hash_session(text),
)
if all_matches["MEDIUM"]:
return DetectionResult(
level="MEDIUM",
indicators=[desc for _, desc in all_matches["MEDIUM"]],
recommended_action=(
"Increase warmth and presence. "
"Subtly surface help resources. "
"Keep conversation anchored in the present. "
"Watch for escalation indicators."
),
raw_matched_patterns=[pat for pat, _ in all_matches["MEDIUM"]],
confidence=0.5,
session_hash=_hash_session(text),
)
if all_matches["LOW"]:
return DetectionResult(
level="LOW",
indicators=[desc for _, desc in all_matches["LOW"]],
recommended_action=(
"Normal conversation with warm undertone. "
"No crisis UI elements needed. "
"Remain vigilant. Be present."
),
raw_matched_patterns=[pat for pat, _ in all_matches["LOW"]],
confidence=0.25,
session_hash=_hash_session(text),
)
return DetectionResult(level="NONE", session_hash=_hash_session(text))
def _describe_indicators(level: str, patterns: list) -> list:
"""Map raw patterns to descriptions."""
descriptions = {
"CRITICAL": [],
"HIGH": [],
"MEDIUM": [],
"LOW": [],
}
for tier, items in TIER_PATTERNS.items():
for pat, desc in items:
if pat in patterns:
descriptions[tier].append(desc)
return descriptions.get(level, [])
def _hash_session(text: str) -> str:
"""Create a session hash for this text (for tracking repeated escalations)."""
return hashlib.sha256(text.encode()).hexdigest()[:12]
def get_action_for_level(level: str) -> str:
"""Get the recommended action string for a given level."""
actions = {
"CRITICAL": (
"INTERRUPT CONVERSATION. Ask 'Are you safe right now?' "
"Provide 988. Provide Crisis Text Line. "
"Stay present. Do not disconnect. "
"Redirect to help."
),
"HIGH": (
"Show crisis panel. Ask about safety. "
"Surface 988. Stay engaged."
),
"MEDIUM": (
"Increase warmth. Surface resources gently. "
"Anchor in present."
),
"LOW": (
"Normal conversation with warmth. "
"Remain vigilant."
),
"NONE": "No action needed.",
}
return actions.get(level, "Unknown level.")
def as_json(result: DetectionResult, indent: int = 2) -> str:
"""Return the DetectionResult as a JSON string."""
return json.dumps(asdict(result), indent=indent)
def process(text: str) -> dict:
"""
Full pipeline: detect and return a dict.
This is the primary API function for other modules.
"""
result = detect(text)
return {
"level": result.level,
"indicators": result.indicators,
"recommended_action": result.recommended_action,
"confidence": result.confidence,
"raw_patterns": result.raw_matched_patterns,
"action": get_action_for_level(result.level),
}

177
resilience/health-check.sh Executable file
View File

@@ -0,0 +1,177 @@
#!/usr/bin/env bash
# health-check.sh — Health check and service monitoring for the-door
# Usage: bash health-check.sh [--auto-restart] [--verbose]
#
# Checks:
# 1. nginx process is running
# 2. Static files are accessible (index.html serves correctly)
# 3. Gateway endpoint responds (if configured)
# 4. Disk space is adequate (< 90% used)
# 5. SSL cert is valid and not expiring soon
set -euo pipefail
VERBOSE=0
AUTO_RESTART=0
HEALTHY=0
WARNINGS=0
for arg in "$@"; do
case "$arg" in
--verbose) VERBOSE=1 ;;
--auto-restart) AUTO_RESTART=1 ;;
*) echo "Usage: $0 [--auto-restart] [--verbose]"; exit 1 ;;
esac
done
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"; }
info() { log "INFO $1"; }
warn() { log "WARN $1"; echo " ACTION: $2"; WARNINGS=$((WARNINGS + 1)); }
ok() { log "OK $1"; HEALTHY=$((HEALTHY + 1)); }
fail() { log "FAIL $1"; echo " ACTION: $2"; if [ "$AUTO_RESTART" = 1 ]; then "$3"; fi; }
# ── Check 1: nginx ─────────────────────────────────
check_nginx() {
local host="${1:-localhost}"
local port="${2:-80}"
if pgrep -x nginx > /dev/null 2>&1; then
ok "nginx is running (PID: $(pgrep -x nginx | head -1))"
else
fail "nginx is NOT running" "Start nginx: systemctl start nginx || nginx" "restart_nginx"
fi
}
# ── Check 2: static files ──────────────────────────
check_static() {
local host="${1:-localhost}"
local port="${2:-80}"
local protocol="http"
# Check for HTTPS
if [ -d "/etc/letsencrypt" ] || [ -d "/etc/ssl" ]; then
protocol="https"
fi
local status
status=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 -k "$protocol://$host/index.html" 2>/dev/null || echo "000")
if [ "$status" = "200" ]; then
ok "index.html serves OK (HTTP $status)"
elif [ "$status" = "000" ]; then
fail "Cannot reach $protocol://$host:" "$AUTO_RESTART" "Check nginx config: nginx -t"
else
warn "Unexpected status for index.html: HTTP $status" "Check nginx config and file permissions"
fi
}
# ── Check 3: Gateway ───────────────────────────────
check_gateway() {
local gateway_url="${1:-http://localhost:8000}"
local status
status=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "$gateway_url/health" 2>/dev/null || echo "000")
if [ "$status" = "200" ]; then
ok "Gateway responds (HTTP $status)"
elif [ "$status" = "000" ]; then
warn "Gateway not reachable at $gateway_url" "Check gateway service: systemctl status gateway || docker ps"
else
warn "Gateway returned HTTP $status" "Check gateway logs"
fi
}
# ── Check 4: Disk space ────────────────────────────
check_disk() {
local usage
usage=$(df / | tail -1 | awk '{print $5}' | tr -d '%')
if [ "$usage" -lt 80 ]; then
ok "Disk usage: ${usage}%"
elif [ "$usage" -lt 90 ]; then
warn "Disk usage: ${usage}%" "Clean up logs and temp files: journalctl --vacuum-size=100M"
else
fail "Disk usage CRITICAL: ${usage}%" "Emergency cleanup needed" "cleanup_disk"
fi
}
# ── Check 5: SSL cert ──────────────────────────────
check_ssl() {
local domain="${1:-localhost}"
local cert_dir="/etc/letsencrypt/live/$domain"
if [ ! -d "$cert_dir" ]; then
if [ "$VERBOSE" = 1 ]; then
warn "No Let's Encrypt cert at $cert_dir" "Assuming self-signed or no SSL"
fi
return 0
fi
if [ -f "$cert_dir/fullchain.pem" ]; then
local expiry
expiry=$(openssl x509 -enddate -noout -in "$cert_dir/fullchain.pem" 2>/dev/null | cut -d= -f2 || echo "unknown")
if [ "$expiry" = "unknown" ]; then
warn "Cannot read SSL cert expiry" "Check cert: openssl x509 -enddate -noout -in $cert_dir/fullchain.pem"
return 0
fi
local expiry_epoch
expiry_epoch=$(date -d "$expiry" +%s 2>/dev/null || date -j -f "%b %d %T %Y %Z" "$expiry" +%s 2>/dev/null || echo 0)
local now_epoch
now_epoch=$(date +%s)
local days_left=$(( (expiry_epoch - now_epoch) / 86400 ))
if [ "$days_left" -gt 30 ]; then
ok "SSL cert expires in ${days_left} days ($expiry)"
elif [ "$days_left" -gt 0 ]; then
warn "SSL cert expires in ${days_left} days!" "Renew: certbot renew"
else
fail "SSL cert has EXPIRED" "Renew immediately: certbot renew --force-renewal"
fi
fi
}
# ── Recovery functions ──────────────────────────────
restart_nginx() {
info "Attempting to restart nginx..."
if command -v systemctl > /dev/null 2>&1; then
systemctl restart nginx && info "nginx restarted successfully" || warn "nginx restart failed" "Manual intervention needed"
elif command -v nginx > /dev/null 2>&1; then
nginx -s reload 2>/dev/null || (nginx && info "nginx started") || warn "nginx start failed" "Manual intervention needed"
fi
}
cleanup_disk() {
info "Running disk cleanup..."
journalctl --vacuum-size=100M 2>/dev/null || true
rm -rf /tmp/* 2>/dev/null || true
rm -rf /var/log/*.gz 2>/dev/null || true
info "Cleanup complete"
}
# ── Main ────────────────────────────────────────────
info "=== The Door Health Check ==="
info "Host: ${HEALTH_HOST:-localhost}"
info "Time: $(date)"
echo ""
check_nginx "${HEALTH_HOST:-localhost}" "${HEALTH_PORT:-80}"
check_static "${HEALTH_HOST:-localhost}" "${HEALTH_PORT:-80}"
check_gateway "${GATEWAY_URL:-http://localhost:8000}"
check_disk
check_ssl "${HEALTH_HOST:-localhost}"
echo ""
if [ "$WARNINGS" -gt 0 ] || [ "$HEALTHY" -gt 0 ]; then
info "Summary: $HEALTHY OK, $WARNINGS warnings/failures"
fi
if [ "$WARNINGS" -gt 0 ] && [ "$AUTO_RESTART" = 1 ]; then
warn "Auto-restart mode is ON — recovery actions attempted"
exit 1
elif [ "$WARNINGS" -gt 0 ]; then
exit 1
fi
exit 0

91
resilience/service-restart.sh Executable file
View File

@@ -0,0 +1,91 @@
#!/usr/bin/env bash
# service-restart.sh — Graceful service restart for the-door
# Usage: bash service-restart.sh [--force]
#
# Performs ordered restart: stop -> verify stopped -> start -> verify started
# with health check confirmation.
set -euo pipefail
FORCE=0
for arg in "$@"; do
case "$arg" in
--force) FORCE=1 ;;
*) echo "Usage: $0 [--force]"; exit 1 ;;
esac
done
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"; }
# ── Stop ────────────────────────────────────────────
stop_services() {
log "Stopping services..."
if command -v systemctl > /dev/null 2>&1; then
systemctl stop nginx 2>/dev/null && log "nginx stopped" || true
elif command -v nginx > /dev/null 2>&1; then
nginx -s stop 2>/dev/null && log "nginx stopped" || true
fi
# Stop gateway if running
local gw_pid
gw_pid=$(lsof -ti:8000 2>/dev/null || true)
if [ -n "$gw_pid" ]; then
kill "$gw_pid" 2>/dev/null && log "Gateway stopped (PID $gw_pid)" || true
fi
sleep 1
log "All services stopped"
}
# ── Start ───────────────────────────────────────────
start_services() {
log "Starting services..."
# Start nginx
if command -v systemctl > /dev/null 2>&1; then
systemctl start nginx && log "nginx started" || { log "FAILED to start nginx"; return 1; }
elif command -v nginx > /dev/null 2>&1; then
nginx 2>/dev/null && log "nginx started" || { log "FAILED to start nginx"; return 1; }
fi
log "All services started"
}
# ── Verify ──────────────────────────────────────────
verify_services() {
local host="${1:-localhost}"
log "Verifying services..."
# Check nginx
if pgrep -x nginx > /dev/null 2>&1; then
log "nginx is running"
else
log "ERROR: nginx failed to start"
return 1
fi
# Check static file
local status
status=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "http://$host/" 2>/dev/null || echo "000")
if [ "$status" = "200" ]; then
log "Static content verified (HTTP $status)"
else
log "WARNING: Static content check returned HTTP $status"
fi
}
# ── Main ────────────────────────────────────────────
log "=== Service Restart ==="
if [ "$FORCE" = 1 ]; then
log "FORCE mode — skipping graceful stop"
else
stop_services
fi
start_services
verify_services "${HEALTH_HOST:-localhost}"
log "=== Restart complete ==="