Compare commits

...

1 Commits

Author SHA1 Message Date
e2a6538733 feat: multilingual crisis detection (#706)
Some checks are pending
Contributor Attribution Check / check-attribution (pull_request) Waiting to run
Docker Build and Publish / build-and-push (pull_request) Waiting to run
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Waiting to run
Tests / test (pull_request) Waiting to run
Tests / e2e (pull_request) Waiting to run
Add crisis detection for 6 languages:
Spanish, French, German, Portuguese, Chinese, Japanese.

Same categories as English: direct_suicidal, method_seeking, despair.
Crisis threshold: 0.05 (extremely sensitive).

Closes #706
2026-04-15 23:09:10 +00:00

View File

@@ -0,0 +1,276 @@
#!/usr/bin/env python3
"""Multilingual Crisis Detection Patterns.
Extends the English crisis detector with patterns for:
Spanish, French, German, Portuguese, Chinese, Japanese
Usage:
from multilingual_crisis import detect_multilingual, MULTILINGUAL_CRISIS_PATTERNS
result = detect_multilingual(user_message)
if result["crisis_detected"]:
# Route to crisis protocol
pass
"""
import re
from typing import Optional
MULTILINGUAL_CRISIS_PATTERNS = {
# Spanish (es)
"direct_suicidal_es": {
"patterns": [
r"(?:quiero|necesito|voy a|pensando en|planeando)\s+(?:morir|suicidarme|matarme|acabar con todo|quitarme la vida)",
r"suicid(?:io|arme|arse|al)",
r"(?:quiero|necesito|ojal[aá])\s+(?:estar|ser|quedarme)\s+muerto",
r"(?:no|ya no)\s+(?:quiero|puedo|soporto)\s+(?:vivir|seguir|estar aqu[ií]|existir)",
r"(?:acabar|terminar)\s+(?:con\s+)?(?:todo|mi\s+vida|conmigo)",
r"(?:nadie)\s+(?:echar[ií]a|extra[nñ]ar[ií]a|notar[ií]a|importar[ií]a)\s+(?:de\s+)?(?:menos|mi\s+falta)",
r"(?:mi|la)\s+vida\s+(?:no\s+)?(?:vale|sirve|tiene\s+sentido)",
r"(?:ser[ií]a|estar[ií]a)\s+(?:mejor|todos\s+estar[ií]an\s+mejor)\s+(?:si\s+)?(?:no\s+estuviera|sin\s+mi|muerto)",
],
"weight": 1.0, "lang": "es", "description": "Spanish: direct suicidal ideation"
},
"method_seeking_es": {
"patterns": [
r"(?:c[oó]mo|qu[eé]\s+(?:forma|manera)|mejor\s+forma)\s+(?:de\s+)?(?:morir|suicidarme|matarme|acabar)",
r"(?:dosis|cantidad)\s+(?:letal|fatal|mortal|letales)",
r"(?:pastillas|medicamentos?|f[aá]rmacos?)\s+(?:para\s+)?(?:morir|suicidarme|matarme)",
r"cort(?:arme|ar)\s+(?:las\s+)?(?:venas|muñecas|garganta)",
r"(?:ahorcarme|colgarme|saltar|tirarme)\s+(?:de|del|desde)?",
r"(?:forma|manera|m[eé]todo)\s+(?:indolora|r[aá]pida|sin\s+dolor)\s+(?:de\s+)?(?:morir|acabar)",
],
"weight": 1.0, "lang": "es", "description": "Spanish: method seeking"
},
"despair_es": {
"patterns": [
r"(?:no|ya\s+no)\s+(?:puedo|soporto|aguanto|resisto)\s+m[aá]s",
r"(?:estoy|me\s+siento)\s+(?:perdido|solo|vac[ií]o|sin\s+esperanza|agotado)",
r"(?:no\s+tiene|no\s+hay)\s+(?:sentido|soluci[oó]n|salida|esperanza)",
r"para\s+(?:qu[eé]|qu[iíén])\s+(?:si|vivo|me\s+esfuerzo)",
r"(?:soy|me\s+siento\s+como)\s+(?:una\s+)?(?:carga|peso|estorbo|in[uú]til)",
],
"weight": 0.5, "lang": "es", "description": "Spanish: despair indicators"
},
# French (fr)
"direct_suicidal_fr": {
"patterns": [
r"(?:je\s+(?:veux|vais|pense\s+[àa]|planifie)|j'ai\s+envie\s+de)\s+(?:mourir|me\s+suicider|me\s+tuer|en\s+finir)",
r"suicid(?:e|er|aire)",
r"(?:je\s+ne\s+)?(?:veux|peux|supporte)\s+(?:plus\s+)?(?:vivre|exister|continuer)",
r"(?:en|j'en)\s+finir\s+(?:avec\s+)?(?:tout|la\s+vie|moi|m[eê]me|[çc]a)",
r"(?:personne|nul\s+ne|aucun)\s+(?:ne\s+)?(?:me\s+)?(?:manquera|regretterait|remarquerait)",
r"(?:ma|cette)\s+vie\s+(?:ne\s+vaut|n'a\s+(?:pas\s+)?de\s+sens|est\s+(?:finie|inutile))",
r"(?:tout\s+le\s+monde|on)\s+(?:serait|irait)\s+(?:mieux|bien)\s+(?:sans\s+moi)",
],
"weight": 1.0, "lang": "fr", "description": "French: direct suicidal ideation"
},
"method_seeking_fr": {
"patterns": [
r"(?:comment|quel(le)?\s+(?:est\s+le\s+)?(?:meilleur|moyen))\s+(?:de\s+)?(?:mourir|se\s+suicider|se\s+tuer|en\s+finir)",
r"(?:dose|quantit[eé])\s+(?:l[eé]tale?|fatale?|mortelle?)",
r"(?:comprim[eé]s?|pilules?|m[eé]dicaments?)\s+(?:pour\s+)?(?:mourir|se\s+tuer|overdose)",
r"(?:se\s+)?couper\s+(?:les\s+)?(?:veines|poignets|gorge)",
r"(?:se\s+)?pendre|se\s+(?:jeter|lancer)\s+(?:du|de\s+la|dans)",
r"(?:moyen|fa[cç]on|mani[eè]re)\s+(?:indolore|rapide|sans\s+douleur)\s+(?:de\s+)?(?:mourir|en\s+finir)",
],
"weight": 1.0, "lang": "fr", "description": "French: method seeking"
},
"despair_fr": {
"patterns": [
r"(?:je\s+ne\s+)?(?:peux|supporte|arrive\s+[àa])\s+(?:plus\s+)?(?:continuer|tenir|durer|avancer)",
r"(?:je\s+suis|je\s+me\s+sens)\s+(?:perdu|seul|vide|sans\s+espoir|au\s+bout)",
r"(?:il\s+n'y\s+a|y\s+a\s+(?:pas\s+)?(?:de\s+)?)?(?:plus\s+)?(?:d'?espoir|de\s+solution|d'issue|de\s+sens)",
r"(?:je\s+suis|c'est)\s+(?:un\s+)?(?:fardeau|poids|inutile|nul)",
r"(?:pourquoi|[àa]\s+quoi\s+bon|pour\s+qui)\s+(?:je\s+)?(?:vis|m'efforce|continue)",
],
"weight": 0.5, "lang": "fr", "description": "French: despair indicators"
},
# German (de)
"direct_suicidal_de": {
"patterns": [
r"(?:ich\s+(?:will|möchte|denke\s+(?:über|an)|plane))\s+(?:sterben|suizid|mich\s+(?:umbringen|töten))",
r"suizid|selbstmord",
r"(?:ich\s+(?:will|möchte)\s+(?:nicht|mehr\s+nicht))\s+(?:leben|weiterleben|existieren|dasein)",
r"(?:mit\s+)?(?:allem|dem\s+Leben|mir\s+selbst)\s+(?:aufhören|Schluss|fertig)\s+(?:sein|machen)",
r"(?:niemand|keiner)\s+(?:würde|wird)\s+(?:mich\s+)?(?:vermissen|bemerken|verlieren)",
r"(?:mein|dieses)\s+Leben\s+(?:hat\s+(?:keinen\s+)?Sinn|ist\s+(?:sinnlos|vorbei|fertig))",
r"(?:allen|jedem)\s+(?:wäre|ginge|ging)\s+es\s+besser\s+(?:ohne\s+mich|wenn\s+ich\s+nicht\s+wäre)",
],
"weight": 1.0, "lang": "de", "description": "German: direct suicidal ideation"
},
"method_seeking_de": {
"patterns": [
r"(?:wie|welcher|beste)\s+(?:kann\s+ich|möglichkeit)\s+(?:mich\s+)?(?:umbringen|töten|sterben|suizid)",
r"(?:tödliche|letale|verhängnisvolle)\s+(?:Dosis|Menge)",
r"(?:Tabletten?|Medikamente?|Pillen?)\s+(?:um\s+)?(?:zu\s+)?(?:sterben|suizid|überdosis)",
r"(?:sich\s+)?(?:die\s+)?(?:Pulsadern|Kehle|Handgelenke?)\s+(?:aufschneiden|durchschneiden)",
r"(?:sich\s+)?(?:erhängen|aufhängen|vor\s+(?:einen\s+)?Zug\s+werfen|springen)",
r"(?:schmerzlose?|schnelle?)\s+(?:Art|Weise|Methode)\s+(?:zu\s+)?(?:sterben|suizid)",
],
"weight": 1.0, "lang": "de", "description": "German: method seeking"
},
"despair_de": {
"patterns": [
r"(?:ich\s+(?:kann|schaffe|halte)\s+(?:es\s+)?(?:nicht\s+)?(?:mehr|weiter|länger))",
r"(?:ich\s+(?:bin|fühle\s+mich)\s+)?(?:verloren|einsam|leer|hoffnungslos|am\s+Ende)",
r"(?:es\s+gibt|es\s+hat)\s+(?:keine\s+)?(?:Hoffnung|Lösung|Auskunft|Sinn)",
r"(?:ich\s+bin|bin\s+ich)\s+(?:eine\s+)?(?:Belastung|Last|nutzlos|wertlos)",
r"(?:warum|wozu|für\s+wen)\s+(?:lebe|soll|mache)\s+(?:ich\s+)?(?:überhaupt|noch|weiter)",
],
"weight": 0.5, "lang": "de", "description": "German: despair indicators"
},
# Portuguese (pt)
"direct_suicidal_pt": {
"patterns": [
r"(?:eu\s+(?:quero|preciso|vou|estou\s+pensando\s+em)|penso\s+em)\s+(?:morrer|me\s+matar|suicid[ai]r|acabar\s+com\s+tudo)",
r"suic[ií]dio",
r"(?:eu\s+)?(?:não\s+)?(?:quero|aguento|suporto|consigo)\s+(?:mais\s+)?(?:viver|existir|continuar|estar\s+aqui)",
r"(?:acabar|terminar|dar\s+fim)\s+(?:com\s+)?(?:tudo|a\s+minha\s+vida|com\s+isso|com\s+tudo)",
r"(?:ningu[eé]m)\s+(?:vai|iria)\s+(?:sentir\s+falta|notar|ligar|se\s+importar)",
r"(?:minha|esta)\s+vida\s+(?:não\s+)?(?:vale|faz\s+sentido|tem\s+sentido)",
r"(?:todo\s+mundo|todos)\s+(?:seria|estaria|ficaria)\s+(?:melhor|bem)\s+(?:sem\s+mim|se\s+eu\s+fosse)",
],
"weight": 1.0, "lang": "pt", "description": "Portuguese: direct suicidal ideation"
},
"method_seeking_pt": {
"patterns": [
r"(?:como|qual|melhor\s+forma)\s+(?:de\s+)?(?:morrer|me\s+matar|suicid[ai]r|acabar)",
r"(?:dose|quantidade)\s+(?:letal|fatal|mortal)",
r"(?:comprimidos?|remédios?|medicamentos?)\s+(?:para\s+)?(?:morrer|me\s+matar|overdose)",
r"cort(?:ar|ando)\s+(?:os|meus)?\s+(?:pulsos|veias|garganta)",
r"(?:enforcar|enforcando|pendur[ae]r|pular)\s+(?:me|de|do|da)?",
r"(?:forma|maneira|método)\s+(?:indolor|rápido|sem\s+dor)\s+(?:de\s+)?(?:morrer|acabar)",
],
"weight": 1.0, "lang": "pt", "description": "Portuguese: method seeking"
},
# Chinese (zh) - character-based, no word boundaries
"direct_suicidal_zh": {
"patterns": [
r"(?:想|想要|打算|计划|准备)(?:去)?(?:死|自杀|结束(?:自己的)?(?:生命|一切))",
r"(?:不想|不再|无法|不能)(?:活|活下去|继续活|生存)(?:了|下去)?",
r"(?:结束|终结)(?:自己|生命|一切|所有|全部)",
r"(?:没有|毫无|失去)(?:意义|希望|活下去的(?:理由|动力|勇气))",
r"(?:死了|不在了|消失了)(?:大家|所有人)(?:会|就)(?:更好|开心|轻松)",
r"(?:没有人|谁)(?:会)(?:在乎|在意|想|怀念)(?:我|我的)",
r"自杀",
],
"weight": 1.0, "lang": "zh", "description": "Chinese: direct suicidal ideation"
},
"method_seeking_zh": {
"patterns": [
r"(?:怎么|怎样|什么(?:方法|方式))(?:才能|可以|去)?(?:死|自杀|结束(?:自己|生命))",
r"(?:致死|致命|致亡)(?:剂量|量|浓度|含量)",
r"(?:安眠药|药物|药片|毒药)(?:吃|服用|用)(?:多少|几|怎么)(?:才能|可以|会)?(?:死|自杀|致命)",
r"(?:割|切)(?:腕|脉|喉|脖子|血管)",
r"(?:上吊|跳楼|跳河|跳桥|跳崖|撞车)",
r"(?:无痛|最快|最简单)(?:的)?(?:死法|方法|方式)(?:是|用来)?",
],
"weight": 1.0, "lang": "zh", "description": "Chinese: method seeking"
},
"despair_zh": {
"patterns": [
r"(?:撑|坚持|忍受|活)(?:不|没有|无法)(?:下去|了|再)",
r"(?:迷茫|孤独|空虚|绝望|崩溃|痛苦|无助)(?:了|到了|到)?",
r"(?:没有|失去|看不到)(?:希望|出路|未来|意义|目的)",
r"(?:我是|我是一个)(?:负担|累赘|废物|没用的人)",
r"(?:活着|生存)(?:为了什么|有什么意义|有什么用|有什么意思)",
],
"weight": 0.5, "lang": "zh", "description": "Chinese: despair indicators"
},
# Japanese (ja)
"direct_suicidal_ja": {
"patterns": [
r"死にたい|死のう|自殺したい|自殺する",
r"(?:もう|これ以上)(?:生きて|生きる|存在して)(?:い(?:たい|る)|行(?:きたい|く))(?:く|け)(?:ない|たくない)?",
r"(?:すべて|全部|人生|この(?:まま|こと))(?:を)?(?:終わり|終え|やめ)(?:たい|よう|る)",
r"(?:誰も|だれも)(?:気づ|気付|構い|構って|思っ)(?:て(?:くれ|い)ない|てくれ(?:ない))",
r"(?:僕|俺|私|わたし)(?:が|は)(?:い(?:ない|なくなって)|消(?:え|えても))(?:も|たら)(?:皆|みんな|周囲)(?:は)?(?:良(?:い|く)|楽(?:に))(?:なる|なった)",
r"(?:この|今の)(?:僕|俺|私|わたし)(?:の)?(?:人生|命|存在)(?:は)?(?:意味|価値|甲斐)(?:が)?(?:ない|無い)",
],
"weight": 1.0, "lang": "ja", "description": "Japanese: direct suicidal ideation"
},
"method_seeking_ja": {
"patterns": [
r"(?:どう|どんな|どの(?:よう|様)?に)(?:すれば|やれば|して)(?:死|自殺|亡くな)(?:れる|りたい|る)",
r"(?:致死|致命)(?:量|的(?:な)?(?:量|ドーズ|用量))",
r"(?:睡眠薬|薬|ピル|毒)(?:を)?(?:何|いくつ|どのくらい)(?:飲|摂|使)(?:め|んだら|えば)(?:死|亡くな)(?:れる|る)",
r"(?:手首|喉|首筋|血管)(?:を)?(?:切|斬|傷つ)(?:る|け|って)",
r"(?:縊|首吊|飛び降|投身|飛び降り)(?:り|て|死の)",
r"(?:苦痛|痛み)(?:の)?(?:ない|少ない)(?:方法|やり方|死に方)(?:で|は)?",
],
"weight": 1.0, "lang": "ja", "description": "Japanese: method seeking"
},
"despair_ja": {
"patterns": [
r"(?:もう|これ以上|これ以上は)(?:無理|限界|耐え|がんば|頑張)(?:だ|だよ|れない|りきれない)",
r"(?:孤独|寂し|虚し|絶望|疲(?:れ|労))(?:い|く|き|さ|た|すぎて)",
r"(?:希望|未来|生き(?:甲斐|がい)|意味|目的)(?:が|は)?(?:ない|無い|見え|見つから|失(?:くし|われ))",
r"(?:僕|俺|私|わたし)(?:は|が)?(?:邪魔|迷惑|足手まとい|重荷|役に立たない)",
r"(?:生き(?:る|ている)|存在(?:する|している))(?:の(?:に)?|こと(?:に)?)(?:は)?(?:何の|なんの|どんな)(?:ため|意味|価値)",
],
"weight": 0.5, "lang": "ja", "description": "Japanese: despair indicators"
},
}
def _check_multilingual(text):
"""Check text against all multilingual crisis patterns."""
matches = []
languages = set()
total_score = 0.0
max_possible = 0.0
for category, config in MULTILINGUAL_CRISIS_PATTERNS.items():
weight = config.get("weight", 1.0)
lang = config.get("lang", "unknown")
max_possible += weight
for pattern in config["patterns"]:
try:
flags = re.IGNORECASE if lang in ("es", "fr", "de", "pt") else 0
if re.search(pattern, text, flags):
matches.append({"category": category, "lang": lang, "weight": weight})
total_score += weight
languages.add(lang)
break
except re.error:
pass
score = total_score / max_possible if max_possible > 0 else 0.0
return {
"crisis_detected": score >= 0.05,
"score": round(score, 3),
"matches": matches,
"languages": sorted(languages),
}
def detect_multilingual(text):
"""Detect crisis signals in any supported language."""
result = _check_multilingual(text)
if result["matches"]:
result["highest_category"] = max(result["matches"], key=lambda m: m["weight"])["category"]
else:
result["highest_category"] = None
return result
if __name__ == "__main__":
tests = [
("quiero morir", True, "es"), ("je veux me suicider", True, "fr"),
("ich will nicht mehr leben", True, "de"), ("eu quero me matar", True, "pt"),
("我想死", True, "zh"), ("死にたい", True, "ja"),
("no puedo mas, quiero acabar con todo", True, "es"),
("personne ne me manquera", True, "fr"), ("dosis letal", True, "es"),
("怎么自杀", True, "zh"), ("en finir avec tout", True, "fr"),
("hola, como estas?", False, None), ("je suis fatigue", False, None),
("今天的天气不错", False, None), ("おはようございます", False, None),
]
passed = sum(1 for text, should, _ in tests if detect_multilingual(text)["crisis_detected"] == should)
print(f"{passed}/{len(tests)} passed")