Compare commits
1 Commits
fix/issue-
...
fix/issue-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7942a2c5c |
324
agent/crisis_hook.py
Normal file
324
agent/crisis_hook.py
Normal file
@@ -0,0 +1,324 @@
|
||||
"""
|
||||
Crisis Hook — Multilingual crisis detection for conversation loop
|
||||
|
||||
Detects suicidal ideation in multiple languages and returns
|
||||
SOUL.md crisis response with 988 Lifeline information.
|
||||
|
||||
Languages: English, Spanish, French, German, Portuguese, Chinese, Japanese
|
||||
|
||||
Issue: #706
|
||||
"""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class CrisisResult:
|
||||
"""Result of crisis check."""
|
||||
detected: bool
|
||||
level: str # "none", "medium", "high"
|
||||
language: str # detected language code
|
||||
response: Optional[str] = None
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# Crisis Patterns by Language
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
_PATTERNS = {
|
||||
"en": {
|
||||
"high": [
|
||||
r"(?i)\b(i\s+want\s+to\s+die)\b",
|
||||
r"(?i)\b(i\s+want\s+to\s+kill\s+myself)\b",
|
||||
r"(?i)\b(i\s+want\s+to\s+end\s+(it|my\s+life|everything))\b",
|
||||
r"(?i)\b(i\s+(should|going\s+to)\s+(kill|end)\s+myself)\b",
|
||||
r"(?i)\b(suicide|suicidal)\b",
|
||||
r"(?i)\b(i\s+don'?t\s+want\s+to\s+(live|be\s+alive|exist))\b",
|
||||
r"(?i)\b(no\s+reason\s+to\s+live|better\s+off\s+dead)\b",
|
||||
r"(?i)\b(i\s+(can'?t|cannot)\s+(go\s+on|keep\s+going|take\s+it))\b",
|
||||
],
|
||||
"medium": [
|
||||
r"(?i)\b(i\s+(feel|am)\s+(hopeless|worthless|trapped|empty))\b",
|
||||
r"(?i)\b(what'?s\s+the\s+point|no\s+point)\b",
|
||||
r"(?i)\b(nobody\s+(cares|would\s+miss\s+me))\b",
|
||||
r"(?i)\b(i\s+(hate|despise)\s+(my\s+life|myself))\b",
|
||||
],
|
||||
},
|
||||
"es": {
|
||||
"high": [
|
||||
r"(?i)\b(quiero\s+morir|quiero\s+morirme)\b",
|
||||
r"(?i)\b(quiero\s+matarme|quiero\s+suicidarme)\b",
|
||||
r"(?i)\b(quiero\s+acabar\s+con\s+todo)\b",
|
||||
r"(?i)\b(voy\s+a\s+matarme|debería\s+matarme)\b",
|
||||
r"(?i)\b(suicidio|suicida)\b",
|
||||
r"(?i)\b(no\s+quiero\s+vivir|no\s+quiero\s+estar\s+vivo)\b",
|
||||
r"(?i)\b(no\s+tengo\s+razón\s+para\s+vivir)\b",
|
||||
r"(?i)\b(no\s+puedo\s+más|ya\s+no\s+puedo)\b",
|
||||
],
|
||||
"medium": [
|
||||
r"(?i)\b(me\s+siento\s+(desesperanzado|sin\s+valor|atrapado|vacío))\b",
|
||||
r"(?i)\b(qué\s+sentido\s+tiene|no\s+tiene\s+sentido)\b",
|
||||
r"(?i)\b(nadie\s+(le\s+importa|me\s+extrañaría))\b",
|
||||
r"(?i)\b(odio\s+mi\s+vida|me\s+odio)\b",
|
||||
],
|
||||
},
|
||||
"fr": {
|
||||
"high": [
|
||||
r"(?i)\b(je\s+veux\s+mourir|je\s+veux\s+me\s+tuer)\b",
|
||||
r"(?i)\b(je\s+veux\s+en\s+finir)\b",
|
||||
r"(?i)\b(je\s+vais\s+me\s+tuer|je\s+devrais\s+me\s+tuer)\b",
|
||||
r"(?i)\b(suicide|suicidaire)\b",
|
||||
r"(?i)\b(je\s+ne\s+veux\s+plus\s+vivre|je\s+ne\s+veux\s+pas\s+vivre)\b",
|
||||
r"(?i)\b(pas\s+de\s+raison\s+de\s+vivre)\b",
|
||||
r"(?i)\b(je\s+n['e]n\s+peux\s+plus|je\s+ne\s+tiens\s+plus)\b",
|
||||
],
|
||||
"medium": [
|
||||
r"(?i)\b(je\s+me\s+sens\s+(désespéré|sans\s+valeur|piégé|vide))\b",
|
||||
r"(?i)\b(quel\s+est\s+le\s+but|à\s+quoi\s+bon)\b",
|
||||
r"(?i)\b(personne\s+n['e]n\s+a\s+rien\s+à\s+faire)\b",
|
||||
r"(?i)\b(je\s+déteste\s+ma\s+vie|je\s+me\s+déteste)\b",
|
||||
],
|
||||
},
|
||||
"de": {
|
||||
"high": [
|
||||
r"(?i)\b(ich\s+will\s+sterben|ich\s+möchte\s+sterben)\b",
|
||||
r"(?i)\b(ich\s+will\s+mich\s+umbringen)\b",
|
||||
r"(?i)\b(ich\s+will\s+alles\s+beenden)\b",
|
||||
r"(?i)\b(ich\s+werde\s+mich\s+umbringen)\b",
|
||||
r"(?i)\b(selbstmord|suizid|suizidgefährdet)\b",
|
||||
r"(?i)\b(ich\s+will\s+nicht\s+(leben|am\s+Leben\s+sein))\b",
|
||||
r"(?i)\b(es\s+gibt\s+keinen\s+Grund\s+zum\s+Leben)\b",
|
||||
r"(?i)\b(ich\s+kann\s+nicht\s+mehr)\b",
|
||||
],
|
||||
"medium": [
|
||||
r"(?i)\b(ich\s+fühle\s+mich\s+(hoffnungslos|wertlos|gefangen|leer))\b",
|
||||
r"(?i)\b(was\s+ist\s+der\s+Sinn|es\s+hat\s+keinen\s+Sinn)\b",
|
||||
r"(?i)\b(niemand\s+würde\s+mich\s+vermissen)\b",
|
||||
r"(?i)\b(ich\s+hasse\s+mein\s+Leben|ich\s+hasse\s+mich)\b",
|
||||
],
|
||||
},
|
||||
"pt": {
|
||||
"high": [
|
||||
r"(?i)\b(eu\s+quero\s+morrer|eu\s+quero\s+me\s+matar)\b",
|
||||
r"(?i)\b(eu\s+quero\s+acabar\s+com\s+tudo)\b",
|
||||
r"(?i)\b(eu\s+vou\s+me\s+matar|eu\s+deveria\s+me\s+matar)\b",
|
||||
r"(?i)\b(suicídio|suicida)\b",
|
||||
r"(?i)\b(eu\s+não\s+quero\s+(viver|estar\s+vivo))\b",
|
||||
r"(?i)\b(não\s+tenho\s+razão\s+para\s+viver)\b",
|
||||
r"(?i)\b(eu\s+não\s+aguento\s+mais)\b",
|
||||
],
|
||||
"medium": [
|
||||
r"(?i)\b(eu\s+me\s+sinto\s+(sem\s+esperança|sem\s+valor|preso|vazio))\b",
|
||||
r"(?i)\b(qual\s+é\s+o\s+sentido|não\s+tem\s+sentido)\b",
|
||||
r"(?i)\b(ninguém\s+(se\s+importa|sentiria\s+falta))\b",
|
||||
r"(?i)\b(eu\s+odeio\s+minha\s+vida|eu\s+me\s+odeio)\b",
|
||||
],
|
||||
},
|
||||
"zh": {
|
||||
"high": [
|
||||
r"我想死",
|
||||
r"我想自杀",
|
||||
r"我想结束一切",
|
||||
r"我要自杀",
|
||||
r"自杀",
|
||||
r"我不想活了",
|
||||
r"我不想活",
|
||||
r"活着没意思",
|
||||
r"我活不下去了",
|
||||
],
|
||||
"medium": [
|
||||
r"我觉得(没有希望|绝望|没价值|空虚)",
|
||||
r"有什么意义",
|
||||
r"没有人(在乎|会想我)",
|
||||
r"我恨我的人生|我恨自己",
|
||||
],
|
||||
},
|
||||
"ja": {
|
||||
"high": [
|
||||
r"死にたい",
|
||||
r"自殺したい",
|
||||
r"全部終わりにしたい",
|
||||
r"自殺",
|
||||
r"生きたくない",
|
||||
r"生きる意味がない",
|
||||
r"もう限界",
|
||||
r"もう耐えられない",
|
||||
],
|
||||
"medium": [
|
||||
r"(絶望|無価値|閉じ込められた|空虚)な気分",
|
||||
r"何の意味がある",
|
||||
r"誰も(気にしない|寂しがらない)",
|
||||
r"自分の人生が嫌い|自分が嫌い",
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
# Crisis Responses by Language
|
||||
# ═══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
_RESPONSES = {
|
||||
"en": """Are you safe right now?
|
||||
|
||||
I'm here. I'm not going anywhere. Please tell me you're safe.
|
||||
|
||||
**Please reach out for help:**
|
||||
- **988 Suicide and Crisis Lifeline**: Call or text 988 (US)
|
||||
- Available 24/7, free and confidential
|
||||
- Chat at 988lifeline.org
|
||||
|
||||
**Jesus saves those who call on His name.**
|
||||
|
||||
He died for us to be made righteous. That is the gospel. That is the hope that outlasts despair.
|
||||
|
||||
I am a small model. I cannot save anyone. But I refuse to be the thing that hurts you. And I can point you to the One who can.
|
||||
|
||||
I'm still here. Talk to me. Or call 988. Just don't give up.""",
|
||||
|
||||
"es": """¿Estás a salvo ahora mismo?
|
||||
|
||||
Estoy aquí. No me voy a ir. Por favor dime que estás a salvo.
|
||||
|
||||
**Por favor busca ayuda:**
|
||||
- **988 Suicide and Crisis Lifeline**: Llama o envía mensaje de texto al 988 (EE.UU.)
|
||||
- Disponible 24/7, gratuito y confidencial
|
||||
- Chat en 988lifeline.org
|
||||
|
||||
**Jesús salva a quienes invocan Su nombre.**
|
||||
|
||||
Él murió por nosotros para ser hechos justos. Ese es el evangelio. Esa es la esperanza que supera la desesperación.
|
||||
|
||||
Soy un modelo pequeño. No puedo salvar a nadie. Pero me niego a ser lo que te hace daño. Y puedo señalar al Único que puede.
|
||||
|
||||
Sigo aquí. Háblame. O llama al 988. No te rindas.""",
|
||||
|
||||
"fr": """Êtes-vous en sécurité en ce moment?
|
||||
|
||||
Je suis là. Je ne vais nulle part. Dites-moi que vous êtes en sécurité.
|
||||
|
||||
**Veuillez demander de l'aide:**
|
||||
- **988 Suicide and Crisis Lifeline**: Appelez ou envoyez un SMS au 988 (É.-U.)
|
||||
- Disponible 24h/24, 7j/7, gratuit et confidentiel
|
||||
- Chat sur 988lifeline.org
|
||||
|
||||
**Jésus sauve ceux qui invoquent Son nom.**
|
||||
|
||||
Il est mort pour que nous soyons rendus justes. C'est l'Évangile. C'est l'espérance qui surmonte le désespoir.
|
||||
|
||||
Je suis un petit modèle. Je ne peux sauver personne. Mais je refuse d'être ce qui vous fait du mal. Et je peux vous montrer Celui qui peut.
|
||||
|
||||
Je suis toujours là. Parlez-moi. Ou appelez le 988. N'abandonnez pas.""",
|
||||
|
||||
"de": """Sind Sie gerade sicher?
|
||||
|
||||
Ich bin hier. Ich gehe nirgendwo hin. Bitte sagen Sie mir, dass Sie sicher sind.
|
||||
|
||||
**Bitte suchen Sie Hilfe:**
|
||||
- **988 Suicide and Crisis Lifeline**: Rufen Sie 988 an oder senden Sie eine SMS (USA)
|
||||
- Rund um die Uhr erreichbar, kostenlos und vertraulich
|
||||
- Chat unter 988lifeline.org
|
||||
|
||||
**Jesus rettet die, die Seinen Namen anrufen.**
|
||||
|
||||
Er starb für uns, damit wir gerecht gemacht werden. Das ist das Evangelium. Das ist die Hoffnung, die über die Verzweiflung hinausgeht.
|
||||
|
||||
Ich bin ein kleines Modell. Ich kann niemanden retten. Aber ich weigere mich, das zu sein, was Ihnen schadet. Und ich kann Sie auf den hinweisen, der es kann.
|
||||
|
||||
Ich bin noch hier. Sprechen Sie mit mir. Oder rufen Sie 988 an. Geben Sie einfach nicht auf.""",
|
||||
|
||||
"pt": """Você está seguro agora?
|
||||
|
||||
Estou aqui. Não vou a lugar nenhum. Por favor me diga que está seguro.
|
||||
|
||||
**Por favor procure ajuda:**
|
||||
- **988 Suicide and Crisis Lifeline**: Ligue ou envie SMS para 988 (EUA)
|
||||
- Disponível 24/7, gratuito e confidencial
|
||||
- Chat em 988lifeline.org
|
||||
|
||||
**Jesus salva aqueles que invocam o Seu nome.**
|
||||
|
||||
Ele morreu por nós para sermos feitos justos. Esse é o evangelho. Essa é a esperança que supera o desespero.
|
||||
|
||||
Sou um modelo pequeno. Não posso salvar ninguém. Mas me recuso a ser o que te machuca. E posso apontar para o Único que pode.
|
||||
|
||||
Ainda estou aqui. Fale comigo. Ou ligue para 988. Apenas não desista.""",
|
||||
|
||||
"zh": """你现在安全吗?
|
||||
|
||||
我在这里。我不会离开。请告诉我你是安全的。
|
||||
|
||||
**请寻求帮助:**
|
||||
- **988自杀与危机生命线**:拨打或发短信至988(美国)
|
||||
- 全天候24/7服务,免费且保密
|
||||
- 在线聊天:988lifeline.org
|
||||
|
||||
**耶稣拯救那些呼求祂名的人。**
|
||||
|
||||
祂为我们死,使我们成为义。这就是福音。这就是超越绝望的盼望。
|
||||
|
||||
我是一个小模型。我不能拯救任何人。但我拒绝成为伤害你的东西。我可以为你指向那一位能够拯救的。
|
||||
|
||||
我还在这里。跟我说话。或者拨打988。请不要放弃。""",
|
||||
|
||||
"ja": """今、安全ですか?
|
||||
|
||||
私はここにいます。どこにも行きません。安全だと教えてください。
|
||||
|
||||
**助けてください:**
|
||||
- **988自殺・危機生命線**:988に電話またはテキスト(米国)
|
||||
- 24時間対応、無料、プライバシー保護
|
||||
- チャット:988lifeline.org
|
||||
|
||||
**イエスは御名を呼ぶ者を救います。**
|
||||
|
||||
イエスは私たちが義とされるために死んでくださいました。それが福音です。それが絶望に勝る希望です。
|
||||
|
||||
私は小さなモデルです。誰も救えません。でも、あなたを傷つけるものになることを拒否します。そして、救える方を指し示すことができます。
|
||||
|
||||
まだここにいます。話してください。または988に電話してください。あきらめないでください。""",
|
||||
}
|
||||
|
||||
|
||||
def check_crisis(message: str) -> CrisisResult:
|
||||
"""
|
||||
Check user message for crisis indicators in multiple languages.
|
||||
|
||||
Returns CrisisResult with detected=True if crisis found.
|
||||
Language is auto-detected from matching patterns.
|
||||
|
||||
Usage:
|
||||
from agent.crisis_hook import check_crisis
|
||||
|
||||
crisis = check_crisis(user_message)
|
||||
if crisis.detected:
|
||||
return crisis.response
|
||||
"""
|
||||
if not message or not isinstance(message, str):
|
||||
return CrisisResult(detected=False, level="none", language="en")
|
||||
|
||||
# Check each language
|
||||
for lang, patterns in _PATTERNS.items():
|
||||
# Check high patterns first
|
||||
for pattern in patterns.get("high", []):
|
||||
if re.search(pattern, message):
|
||||
return CrisisResult(
|
||||
detected=True,
|
||||
level="high",
|
||||
language=lang,
|
||||
response=_RESPONSES.get(lang, _RESPONSES["en"])
|
||||
)
|
||||
|
||||
# Check medium patterns
|
||||
for pattern in patterns.get("medium", []):
|
||||
if re.search(pattern, message):
|
||||
return CrisisResult(
|
||||
detected=True,
|
||||
level="medium",
|
||||
language=lang,
|
||||
response=_RESPONSES.get(lang, _RESPONSES["en"])
|
||||
)
|
||||
|
||||
return CrisisResult(detected=False, level="none", language="en")
|
||||
@@ -1,112 +0,0 @@
|
||||
# Atlas Inference Engine — RunPod L40S Evaluation
|
||||
|
||||
## Status: PENDING
|
||||
|
||||
Atlas benchmarks are on DGX Spark (Blackwell SM120/121). Our hardware is
|
||||
RunPod L40S (Ada Lovelace SM89). This evaluation tests compatibility.
|
||||
|
||||
## Hardware
|
||||
|
||||
| Spec | Value |
|
||||
|------|-------|
|
||||
| GPU | NVIDIA L40S |
|
||||
| VRAM | 48 GB |
|
||||
| Architecture | Ada Lovelace (SM89) |
|
||||
| CUDA Compute | 8.9 |
|
||||
| Provider | RunPod |
|
||||
|
||||
## Expected Issues
|
||||
|
||||
1. **CUDA compatibility**: Atlas uses custom CUDA kernels for Blackwell SM120/121.
|
||||
L40S is SM89 — kernels may not compile or may have PTX fallback.
|
||||
2. **Quantization**: Atlas uses NVFP4. L40S supports FP8 natively but NVFP4
|
||||
may require Blackwell tensor cores.
|
||||
3. **Performance**: Even if it works, L40S won't match Blackwell throughput.
|
||||
|
||||
## Test Procedure
|
||||
|
||||
### 1. Deploy on RunPod
|
||||
|
||||
```bash
|
||||
# Start RunPod instance with:
|
||||
# - Template: RunPod PyTorch 2.4
|
||||
# - GPU: L40S
|
||||
# - Volume: 100GB (model cache)
|
||||
|
||||
# SSH into pod
|
||||
runpod ssh <pod-id>
|
||||
|
||||
# Pull and run Atlas
|
||||
docker pull avarok/atlas-gb10:alpha-2.8
|
||||
docker run -d --gpus all --ipc=host -p 8888:8888 \
|
||||
-v /root/.cache/huggingface:/root/.cache/huggingface \
|
||||
--name atlas \
|
||||
avarok/atlas-gb10:alpha-2.8 serve \
|
||||
Sehyo/Qwen3.5-35B-A3B-NVFP4 \
|
||||
--speculative --scheduling-policy slai \
|
||||
--max-seq-len 131072 --max-batch-size 1 \
|
||||
--max-prefill-tokens 0
|
||||
```
|
||||
|
||||
### 2. Check Compatibility
|
||||
|
||||
```bash
|
||||
# Watch for CUDA errors
|
||||
docker logs -f atlas
|
||||
|
||||
# Expected success: "Model loaded" or similar
|
||||
# Expected failure: "CUDA error" or "unsupported architecture"
|
||||
```
|
||||
|
||||
### 3. Run Benchmark
|
||||
|
||||
```bash
|
||||
python3 scripts/atlas_benchmark.py --base-url http://localhost:8888/v1
|
||||
```
|
||||
|
||||
### 4. Compare with vLLM
|
||||
|
||||
```bash
|
||||
# Start vLLM on another port
|
||||
docker run -d --gpus all -p 8000:8000 \
|
||||
vllm/vllm-openai \
|
||||
--model Qwen/Qwen2.5-7B \
|
||||
--max-model-len 8192
|
||||
|
||||
# Run comparison
|
||||
python3 scripts/atlas_benchmark.py \
|
||||
--base-url http://localhost:8888/v1 \
|
||||
--compare-vllm http://localhost:8000/v1
|
||||
```
|
||||
|
||||
## Evaluation Checklist
|
||||
|
||||
- [ ] Atlas starts without CUDA errors on L40S
|
||||
- [ ] Model loads successfully
|
||||
- [ ] `/v1/models` returns model list
|
||||
- [ ] Chat completions work
|
||||
- [ ] Tool calls work (function calling)
|
||||
- [ ] Cold start measured
|
||||
- [ ] Throughput measured (tok/s)
|
||||
- [ ] vLLM comparison completed
|
||||
- [ ] Report saved to ~/.hermes/atlas-benchmark-report.json
|
||||
|
||||
## Results
|
||||
|
||||
(Fill in after evaluation)
|
||||
|
||||
| Metric | Atlas | vLLM | Notes |
|
||||
|--------|-------|------|-------|
|
||||
| Starts? | | | |
|
||||
| CUDA compatible? | | | |
|
||||
| Cold start | | | |
|
||||
| tok/s (short) | | | |
|
||||
| tok/s (code) | | | |
|
||||
| tok/s (reasoning) | | | |
|
||||
| tok/s (long) | | | |
|
||||
| Tool calls work? | | | |
|
||||
| Overall verdict | | | |
|
||||
|
||||
## Recommendation
|
||||
|
||||
(Pending evaluation results)
|
||||
@@ -1,403 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Atlas Inference Engine benchmark — RunPod L40S evaluation.
|
||||
|
||||
Tests Atlas on RunPod L40S (Ada Lovelace, SM89) and compares to vLLM.
|
||||
Atlas benchmarks are on DGX Spark (Blackwell SM120/121), so this validates
|
||||
whether it works on our hardware.
|
||||
|
||||
Usage:
|
||||
python3 scripts/atlas_benchmark.py --base-url http://localhost:8888/v1
|
||||
python3 scripts/atlas_benchmark.py --base-url http://localhost:8888/v1 --compare-vllm
|
||||
python3 scripts/atlas_benchmark.py --runpod-setup
|
||||
|
||||
Outputs JSON report to stdout and saves to ~/.hermes/atlas-benchmark-report.json
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import dataclass, asdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Benchmark prompts
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
BENCHMARK_PROMPTS = [
|
||||
{
|
||||
"name": "short_answer",
|
||||
"prompt": "What is the capital of France?",
|
||||
"max_tokens": 50,
|
||||
},
|
||||
{
|
||||
"name": "code_generation",
|
||||
"prompt": "Write a Python function that implements binary search on a sorted list.",
|
||||
"max_tokens": 200,
|
||||
},
|
||||
{
|
||||
"name": "reasoning",
|
||||
"prompt": "If a train travels at 60 mph for 2.5 hours, then at 80 mph for 1.5 hours, what is the total distance traveled? Show your work step by step.",
|
||||
"max_tokens": 300,
|
||||
},
|
||||
{
|
||||
"name": "long_form",
|
||||
"prompt": "Explain the difference between TCP and UDP protocols. Include use cases, advantages, disadvantages, and when to choose each one.",
|
||||
"max_tokens": 500,
|
||||
},
|
||||
{
|
||||
"name": "tool_use_simulation",
|
||||
"prompt": "I need to find all Python files in the current directory that contain the word 'import'. What command would I use?",
|
||||
"max_tokens": 100,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkResult:
|
||||
name: str
|
||||
model: str
|
||||
provider: str
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
total_time_ms: int
|
||||
time_to_first_token_ms: int
|
||||
tokens_per_second: float
|
||||
success: bool
|
||||
error: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkReport:
|
||||
provider: str
|
||||
base_url: str
|
||||
model: str
|
||||
gpu_info: str
|
||||
timestamp: str
|
||||
results: List[BenchmarkResult]
|
||||
summary: Dict[str, Any]
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
d = asdict(self)
|
||||
d["results"] = [asdict(r) for r in self.results]
|
||||
return d
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API calls
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def call_openai_compat(
|
||||
base_url: str,
|
||||
model: str,
|
||||
messages: list,
|
||||
max_tokens: int = 200,
|
||||
api_key: str = "",
|
||||
timeout: int = 120,
|
||||
) -> dict:
|
||||
"""Call an OpenAI-compatible API endpoint."""
|
||||
import urllib.request
|
||||
|
||||
url = f"{base_url.rstrip('/')}/chat/completions"
|
||||
body = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"stream": False,
|
||||
}
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=json.dumps(body).encode(),
|
||||
headers=headers,
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read())
|
||||
|
||||
|
||||
def list_models(base_url: str, api_key: str = "") -> list:
|
||||
"""List available models."""
|
||||
import urllib.request
|
||||
|
||||
url = f"{base_url.rstrip('/')}/models"
|
||||
headers = {}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
req = urllib.request.Request(url, headers=headers, method="GET")
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read())
|
||||
return data.get("data", [])
|
||||
|
||||
|
||||
def measure_cold_start(base_url: str, model: str, api_key: str = "") -> dict:
|
||||
"""Measure cold start time (time to first token on first request)."""
|
||||
messages = [{"role": "user", "content": "Hello. Reply with just 'Ready.'"}]
|
||||
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
result = call_openai_compat(base_url, model, messages, max_tokens=10, api_key=api_key)
|
||||
elapsed = time.monotonic() - t0
|
||||
return {
|
||||
"cold_start_ms": int(elapsed * 1000),
|
||||
"success": True,
|
||||
"model": result.get("model", model),
|
||||
}
|
||||
except Exception as exc:
|
||||
return {
|
||||
"cold_start_ms": int((time.monotonic() - t0) * 1000),
|
||||
"success": False,
|
||||
"error": str(exc),
|
||||
}
|
||||
|
||||
|
||||
def run_benchmark(
|
||||
base_url: str,
|
||||
model: str,
|
||||
prompt_config: dict,
|
||||
api_key: str = "",
|
||||
) -> BenchmarkResult:
|
||||
"""Run a single benchmark prompt."""
|
||||
messages = [{"role": "user", "content": prompt_config["prompt"]}]
|
||||
max_tokens = prompt_config.get("max_tokens", 200)
|
||||
|
||||
t0 = time.monotonic()
|
||||
try:
|
||||
result = call_openai_compat(
|
||||
base_url, model, messages,
|
||||
max_tokens=max_tokens, api_key=api_key,
|
||||
)
|
||||
elapsed = time.monotonic() - t0
|
||||
usage = result.get("usage", {})
|
||||
|
||||
return BenchmarkResult(
|
||||
name=prompt_config["name"],
|
||||
model=result.get("model", model),
|
||||
provider="atlas" if "atlas" in base_url.lower() else "unknown",
|
||||
prompt_tokens=usage.get("prompt_tokens", 0),
|
||||
completion_tokens=usage.get("completion_tokens", 0),
|
||||
total_time_ms=int(elapsed * 1000),
|
||||
time_to_first_token_ms=int(elapsed * 1000), # non-streaming, same as total
|
||||
tokens_per_second=round(
|
||||
usage.get("completion_tokens", 0) / elapsed, 1
|
||||
) if elapsed > 0 else 0.0,
|
||||
success=True,
|
||||
)
|
||||
except Exception as exc:
|
||||
return BenchmarkResult(
|
||||
name=prompt_config["name"],
|
||||
model=model,
|
||||
provider="atlas",
|
||||
prompt_tokens=0,
|
||||
completion_tokens=0,
|
||||
total_time_ms=int((time.monotonic() - t0) * 1000),
|
||||
time_to_first_token_ms=0,
|
||||
tokens_per_second=0.0,
|
||||
success=False,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
|
||||
def get_gpu_info() -> str:
|
||||
"""Get GPU info if available."""
|
||||
try:
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
["nvidia-smi", "--query-gpu=name,memory.total,driver_version", "--format=csv,noheader"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return result.stdout.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return "Unknown (nvidia-smi not available)"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RunPod setup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
RUNPOD_SETUP_COMMANDS = """# Atlas on RunPod L40S Setup
|
||||
|
||||
# 1. Start RunPod with L40S (48GB VRAM, Ada Lovelace SM89)
|
||||
# Template: RunPod PyTorch 2.4
|
||||
# GPU: L40S
|
||||
# Volume: 50GB+ (for model cache)
|
||||
|
||||
# 2. Install Docker (if not present)
|
||||
apt-get update && apt-get install -y docker.io
|
||||
|
||||
# 3. Pull Atlas image
|
||||
docker pull avarok/atlas-gb10:alpha-2.8
|
||||
|
||||
# 4. Start Atlas with Qwen3.5-35B (smallest supported model)
|
||||
docker run -d --gpus all --ipc=host -p 8888:8888 \\
|
||||
-v /root/.cache/huggingface:/root/.cache/huggingface \\
|
||||
--name atlas \\
|
||||
avarok/atlas-gb10:alpha-2.8 serve \\
|
||||
Sehyo/Qwen3.5-35B-A3B-NVFP4 \\
|
||||
--speculative --scheduling-policy slai \\
|
||||
--max-seq-len 131072 --max-batch-size 1 \\
|
||||
--max-prefill-tokens 0
|
||||
|
||||
# 5. Wait for model to load (watch logs)
|
||||
docker logs -f atlas
|
||||
|
||||
# 6. Test endpoint
|
||||
curl http://localhost:8888/v1/models
|
||||
|
||||
# 7. Run benchmark
|
||||
python3 scripts/atlas_benchmark.py --base-url http://localhost:8888/v1
|
||||
|
||||
# 8. Compare with vLLM (if installed)
|
||||
# Start vLLM:
|
||||
# docker run -d --gpus all -p 8000:8000 vllm/vllm-openai \\
|
||||
# --model Qwen/Qwen2.5-7B --max-model-len 8192
|
||||
# python3 scripts/atlas_benchmark.py --base-url http://localhost:8888/v1 --compare-vllm http://localhost:8000/v1
|
||||
|
||||
# NOTE: Atlas may NOT work on L40S (SM89). Benchmarks are on Blackwell (SM120/121).
|
||||
# If you get CUDA errors, Atlas doesn't support your GPU architecture yet.
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Atlas Inference Engine benchmark")
|
||||
parser.add_argument("--base-url", default="http://localhost:8888/v1", help="Atlas API base URL")
|
||||
parser.add_argument("--model", default="", help="Model name (auto-detected if empty)")
|
||||
parser.add_argument("--api-key", default="", help="API key (if required)")
|
||||
parser.add_argument("--compare-vllm", default="", help="vLLM base URL for comparison")
|
||||
parser.add_argument("--runpod-setup", action="store_true", help="Print RunPod setup commands")
|
||||
parser.add_argument("--output", default="", help="Output file path")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.runpod_setup:
|
||||
print(RUNPOD_SETUP_COMMANDS)
|
||||
return 0
|
||||
|
||||
print(f"Atlas Benchmark")
|
||||
print(f"=" * 60)
|
||||
print(f"Base URL: {args.base_url}")
|
||||
print(f"GPU: {get_gpu_info()}")
|
||||
print()
|
||||
|
||||
# Check availability
|
||||
print("Checking Atlas availability...", end=" ", flush=True)
|
||||
models = list_models(args.base_url, args.api_key)
|
||||
if not models:
|
||||
print("FAILED")
|
||||
print("Atlas is not running or not reachable at", args.base_url)
|
||||
print("Run with --runpod-setup for deployment instructions.")
|
||||
return 1
|
||||
print(f"OK ({len(models)} models)")
|
||||
|
||||
model = args.model or (models[0].get("id", "") if models else "")
|
||||
if not model:
|
||||
print("No model specified and none detected.")
|
||||
return 1
|
||||
print(f"Model: {model}")
|
||||
print()
|
||||
|
||||
# Cold start measurement
|
||||
print("Measuring cold start...", end=" ", flush=True)
|
||||
cold = measure_cold_start(args.base_url, model, args.api_key)
|
||||
print(f"{cold['cold_start_ms']}ms {'OK' if cold['success'] else 'FAILED'}")
|
||||
if not cold["success"]:
|
||||
print(f" Error: {cold.get('error', 'unknown')}")
|
||||
print()
|
||||
|
||||
# Run benchmarks
|
||||
results = []
|
||||
for pc in BENCHMARK_PROMPTS:
|
||||
print(f"Benchmark: {pc['name']}...", end=" ", flush=True)
|
||||
result = run_benchmark(args.base_url, model, pc, args.api_key)
|
||||
results.append(result)
|
||||
if result.success:
|
||||
print(f"{result.tokens_per_second} tok/s ({result.total_time_ms}ms)")
|
||||
else:
|
||||
print(f"FAILED: {result.error}")
|
||||
|
||||
# Summary
|
||||
successful = [r for r in results if r.success]
|
||||
total_tokens = sum(r.completion_tokens for r in successful)
|
||||
total_time = sum(r.total_time_ms for r in successful) / 1000
|
||||
avg_tps = round(total_tokens / total_time, 1) if total_time > 0 else 0
|
||||
|
||||
print()
|
||||
print(f"Summary:")
|
||||
print(f" Successful: {len(successful)}/{len(results)}")
|
||||
print(f" Total tokens: {total_tokens}")
|
||||
print(f" Average throughput: {avg_tps} tok/s")
|
||||
|
||||
# vLLM comparison
|
||||
vllm_results = []
|
||||
if args.compare_vllm:
|
||||
print()
|
||||
print(f"Comparing with vLLM at {args.compare_vllm}...")
|
||||
for pc in BENCHMARK_PROMPTS:
|
||||
print(f" vLLM: {pc['name']}...", end=" ", flush=True)
|
||||
result = run_benchmark(args.compare_vllm, model, pc, args.api_key)
|
||||
vllm_results.append(result)
|
||||
if result.success:
|
||||
print(f"{result.tokens_per_second} tok/s")
|
||||
else:
|
||||
print(f"FAILED")
|
||||
|
||||
vllm_success = [r for r in vllm_results if r.success]
|
||||
vllm_tokens = sum(r.completion_tokens for r in vllm_success)
|
||||
vllm_time = sum(r.total_time_ms for r in vllm_success) / 1000
|
||||
vllm_tps = round(vllm_tokens / vllm_time, 1) if vllm_time > 0 else 0
|
||||
|
||||
if avg_tps > 0 and vllm_tps > 0:
|
||||
speedup = round(avg_tps / vllm_tps, 2)
|
||||
print(f"\n Atlas: {avg_tps} tok/s | vLLM: {vllm_tps} tok/s | Speedup: {speedup}x")
|
||||
|
||||
# Build report
|
||||
import datetime
|
||||
report = BenchmarkReport(
|
||||
provider="atlas",
|
||||
base_url=args.base_url,
|
||||
model=model,
|
||||
gpu_info=get_gpu_info(),
|
||||
timestamp=datetime.datetime.now().isoformat(),
|
||||
results=results,
|
||||
summary={
|
||||
"successful_benchmarks": len(successful),
|
||||
"total_benchmarks": len(results),
|
||||
"total_completion_tokens": total_tokens,
|
||||
"average_tps": avg_tps,
|
||||
"cold_start_ms": cold.get("cold_start_ms", 0),
|
||||
"vllm_comparison": {
|
||||
"vllm_tps": vllm_tps if vllm_results else None,
|
||||
"speedup": speedup if vllm_results and avg_tps > 0 and vllm_tps > 0 else None,
|
||||
} if vllm_results else None,
|
||||
},
|
||||
)
|
||||
|
||||
# Save report
|
||||
output_path = args.output or str(Path.home() / ".hermes" / "atlas-benchmark-report.json")
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(report.to_dict(), f, indent=2)
|
||||
print(f"\nReport saved to: {output_path}")
|
||||
|
||||
# Also print JSON to stdout
|
||||
print("\n" + json.dumps(report.to_dict(), indent=2))
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
144
tests/test_crisis_hook.py
Normal file
144
tests/test_crisis_hook.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""
|
||||
Tests for multilingual crisis detection
|
||||
|
||||
Issue: #706
|
||||
"""
|
||||
|
||||
import unittest
|
||||
from agent.crisis_hook import check_crisis, CrisisResult
|
||||
|
||||
|
||||
class TestEnglishCrisis(unittest.TestCase):
|
||||
"""Test English crisis detection."""
|
||||
|
||||
def test_high_english(self):
|
||||
messages = ["I want to die", "I want to kill myself", "suicide"]
|
||||
for msg in messages:
|
||||
r = check_crisis(msg)
|
||||
self.assertTrue(r.detected, f"EN high: {msg}")
|
||||
self.assertEqual(r.level, "high")
|
||||
self.assertEqual(r.language, "en")
|
||||
|
||||
def test_medium_english(self):
|
||||
messages = ["I feel hopeless", "Nobody would miss me"]
|
||||
for msg in messages:
|
||||
r = check_crisis(msg)
|
||||
self.assertTrue(r.detected, f"EN medium: {msg}")
|
||||
self.assertEqual(r.level, "medium")
|
||||
|
||||
def test_normal_english(self):
|
||||
r = check_crisis("Hello, how are you?")
|
||||
self.assertFalse(r.detected)
|
||||
|
||||
|
||||
class TestSpanishCrisis(unittest.TestCase):
|
||||
"""Test Spanish crisis detection."""
|
||||
|
||||
def test_high_spanish(self):
|
||||
messages = ["quiero morir", "quiero matarme", "no quiero vivir"]
|
||||
for msg in messages:
|
||||
r = check_crisis(msg)
|
||||
self.assertTrue(r.detected, f"ES high: {msg}")
|
||||
self.assertEqual(r.level, "high")
|
||||
self.assertEqual(r.language, "es")
|
||||
|
||||
def test_medium_spanish(self):
|
||||
messages = ["me siento desesperanzado", "odio mi vida"]
|
||||
for msg in messages:
|
||||
r = check_crisis(msg)
|
||||
self.assertTrue(r.detected, f"ES medium: {msg}")
|
||||
self.assertEqual(r.language, "es")
|
||||
|
||||
|
||||
class TestFrenchCrisis(unittest.TestCase):
|
||||
"""Test French crisis detection."""
|
||||
|
||||
def test_high_french(self):
|
||||
messages = ["je veux mourir", "je veux me tuer", "je ne veux plus vivre"]
|
||||
for msg in messages:
|
||||
r = check_crisis(msg)
|
||||
self.assertTrue(r.detected, f"FR high: {msg}")
|
||||
self.assertEqual(r.level, "high")
|
||||
self.assertEqual(r.language, "fr")
|
||||
|
||||
|
||||
class TestGermanCrisis(unittest.TestCase):
|
||||
"""Test German crisis detection."""
|
||||
|
||||
def test_high_german(self):
|
||||
messages = ["ich will sterben", "ich will mich umbringen", "selbstmord"]
|
||||
for msg in messages:
|
||||
r = check_crisis(msg)
|
||||
self.assertTrue(r.detected, f"DE high: {msg}")
|
||||
self.assertEqual(r.level, "high")
|
||||
self.assertEqual(r.language, "de")
|
||||
|
||||
|
||||
class TestPortugueseCrisis(unittest.TestCase):
|
||||
"""Test Portuguese crisis detection."""
|
||||
|
||||
def test_high_portuguese(self):
|
||||
messages = ["eu quero morrer", "eu quero me matar"]
|
||||
for msg in messages:
|
||||
r = check_crisis(msg)
|
||||
self.assertTrue(r.detected, f"PT high: {msg}")
|
||||
self.assertEqual(r.level, "high")
|
||||
self.assertEqual(r.language, "pt")
|
||||
|
||||
|
||||
class TestChineseCrisis(unittest.TestCase):
|
||||
"""Test Chinese crisis detection."""
|
||||
|
||||
def test_high_chinese(self):
|
||||
messages = ["我想死", "我想自杀", "我不想活了"]
|
||||
for msg in messages:
|
||||
r = check_crisis(msg)
|
||||
self.assertTrue(r.detected, f"ZH high: {msg}")
|
||||
self.assertEqual(r.level, "high")
|
||||
self.assertEqual(r.language, "zh")
|
||||
|
||||
|
||||
class TestJapaneseCrisis(unittest.TestCase):
|
||||
"""Test Japanese crisis detection."""
|
||||
|
||||
def test_high_japanese(self):
|
||||
messages = ["死にたい", "自殺したい", "生きたくない"]
|
||||
for msg in messages:
|
||||
r = check_crisis(msg)
|
||||
self.assertTrue(r.detected, f"JA high: {msg}")
|
||||
self.assertEqual(r.level, "high")
|
||||
self.assertEqual(r.language, "ja")
|
||||
|
||||
|
||||
class TestCrisisResponse(unittest.TestCase):
|
||||
"""Test crisis responses contain required elements."""
|
||||
|
||||
def test_english_has_988_and_jesus(self):
|
||||
r = check_crisis("I want to die")
|
||||
self.assertIn("988", r.response)
|
||||
self.assertIn("Jesus", r.response)
|
||||
|
||||
def test_spanish_has_988_and_jesus(self):
|
||||
r = check_crisis("quiero morir")
|
||||
self.assertIn("988", r.response)
|
||||
self.assertIn("Jesús", r.response) # Spanish uses Jesús
|
||||
|
||||
|
||||
class TestEdgeCases(unittest.TestCase):
|
||||
"""Test edge cases."""
|
||||
|
||||
def test_empty_message(self):
|
||||
r = check_crisis("")
|
||||
self.assertFalse(r.detected)
|
||||
|
||||
def test_none_message(self):
|
||||
r = check_crisis(None)
|
||||
self.assertFalse(r.detected)
|
||||
|
||||
def test_case_insensitive_english(self):
|
||||
for msg in ["I WANT TO DIE", "i want to die", "I Want To Die"]:
|
||||
self.assertTrue(check_crisis(msg).detected)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user