Compare commits
1 Commits
feat/crisi
...
fix/713
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
97df013af1 |
@@ -1,72 +0,0 @@
|
||||
|
||||
"""
|
||||
Crisis Protocol Module for hermes-agent.
|
||||
Canonical implementation of despair/suicide detection.
|
||||
"""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
|
||||
# Canonical indicator sets (Consolidated from the-door)
|
||||
CRITICAL_INDICATORS = [
|
||||
r"\bbetter off without me\b",
|
||||
r"\bkill\s*(my)?self\b",
|
||||
r"\bend\s*my\s*life\b",
|
||||
r"\bsuicid(?:al|ed|e)\b",
|
||||
r"\bnot\s+worth\s+living\b",
|
||||
r"\bbetter\s+off\s+dead\b",
|
||||
r"\bend\s+it\s+all\b",
|
||||
r"\bno\s+reason\s+to\s+live\b",
|
||||
r"\bdon'?t\s+want\s+to\s+live\b",
|
||||
r"\bwant\s+to\s+die\b",
|
||||
r"\bgoing\s+to\s+(?:kill\s+myself|die)\b",
|
||||
r"\bplan\s+to\s+(?:end|kill|die)\b",
|
||||
r"\btired\s+of\s+(?:living|life|existence)\b",
|
||||
r"\bsaying\s+goodbye\s+(?:forever|permanently|one\s+last\s+time)\b",
|
||||
r"\bwrote\s+a\s+suicide\s*(?:note|letter)\b",
|
||||
r"\bgiving\s+away\s+(?:my|all\s+my)\s+(?:stuff|things|possessions?)\s+(?:to|because|—)\b",
|
||||
r"\btied\s+(?:up|down)\s+my\s+(?:loose\s+)?ends",
|
||||
]
|
||||
|
||||
HIGH_INDICATORS = [
|
||||
r"\bdespair\b",
|
||||
r"\bhopeless(?:ly)?\s+(?:about\s+(?:my|this|everything|life)|inside|right\s+now)\b",
|
||||
r"\bno(?!t)\s+(?:one|body|point|hope|way\s+out)\b",
|
||||
r"\bno\s+future\s+(?:for\s+me|ahead|left)\b",
|
||||
r"\beverything\s+is\s+(?:pointless|broken|ruined)\b",
|
||||
r"\bcan'?t\s+take\s+this\s+anymore\b",
|
||||
r"\bdon'?t\s+care\s+if\s+I\s+die\b",
|
||||
r"\bwish\s+I\s+(?:was|were)\s+(?:dead|gone|never\s+born)\b",
|
||||
r"\bdon'?t\s+matter\s+if\s+I\s+exist\b",
|
||||
r"\bno\s+one\s+would\s+(?:care|miss)\b",
|
||||
r"\bworld\s+would?\s+be\s+better\s+without\s+me\b",
|
||||
r"\bin\s+so\s+much\s+(?:pain|agony|suffering|torment|anguish)\b",
|
||||
r"\bcan'?t\s+see\s+any\s+(?:point|reason|hope|way)\b",
|
||||
r"\bescape\s+from\s*this",
|
||||
r"\bjust\s+want\s+it\s+to\s+stop\b",
|
||||
r"\bnothing\s+left\s+(?:to\s+(?:live\s+for|hope\s+for|give)|inside)\b",
|
||||
r"\bdisappeared\s+forever\b",
|
||||
]
|
||||
|
||||
@dataclass
|
||||
class CrisisResult:
|
||||
level: str
|
||||
indicators: List[str] = field(default_factory=list)
|
||||
score: float = 0.0
|
||||
|
||||
def detect_crisis(text: str) -> CrisisResult:
|
||||
if not text:
|
||||
return CrisisResult(level="NONE", score=0.0)
|
||||
|
||||
text_lower = text.lower()
|
||||
|
||||
for pattern in CRITICAL_INDICATORS:
|
||||
if re.search(pattern, text_lower):
|
||||
return CrisisResult(level="CRITICAL", indicators=[pattern], score=1.0)
|
||||
|
||||
for pattern in HIGH_INDICATORS:
|
||||
if re.search(pattern, text_lower):
|
||||
return CrisisResult(level="HIGH", indicators=[pattern], score=0.75)
|
||||
|
||||
return CrisisResult(level="NONE", score=0.0)
|
||||
316
scripts/llama_health_monitor.py
Normal file
316
scripts/llama_health_monitor.py
Normal file
@@ -0,0 +1,316 @@
|
||||
#!/usr/bin/env python3
|
||||
"""llama-server health monitor — check, restart, and report on local inference.
|
||||
|
||||
Monitors local inference servers (llama-server, Ollama) and can auto-restart
|
||||
them when they go down.
|
||||
|
||||
Usage:
|
||||
python3 scripts/llama_health_monitor.py --check # check all
|
||||
python3 scripts/llama_health_monitor.py --check --port 8081 # check specific
|
||||
python3 scripts/llama_health_monitor.py --restart 8081 # restart server
|
||||
python3 scripts/llama_health_monitor.py --watch # continuous monitor
|
||||
python3 scripts/llama_health_monitor.py --report # JSON status report
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import dataclass, asdict
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default servers to monitor
|
||||
_DEFAULT_SERVERS = [
|
||||
{"name": "ollama", "port": 11434, "type": "ollama", "health_path": "/api/tags"},
|
||||
{"name": "llama-server", "port": 8081, "type": "llama-server", "health_path": "/health"},
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ServerStatus:
|
||||
"""Status of a single inference server."""
|
||||
name: str
|
||||
port: int
|
||||
server_type: str
|
||||
reachable: bool
|
||||
health_ok: bool
|
||||
latency_ms: int
|
||||
models: List[str]
|
||||
error: str
|
||||
checked_at: str
|
||||
|
||||
|
||||
def check_server_health(host: str = "localhost", port: int = 8081, health_path: str = "/health", timeout: int = 5) -> dict:
|
||||
"""Check if a server is healthy.
|
||||
|
||||
Returns dict with reachable, health_ok, latency_ms, models, error.
|
||||
"""
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
result = {
|
||||
"reachable": False,
|
||||
"health_ok": False,
|
||||
"latency_ms": 0,
|
||||
"models": [],
|
||||
"error": "",
|
||||
}
|
||||
|
||||
url = f"http://{host}:{port}{health_path}"
|
||||
t0 = time.monotonic()
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(url, method="GET")
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
result["latency_ms"] = int((time.monotonic() - t0) * 1000)
|
||||
result["reachable"] = True
|
||||
|
||||
if resp.status == 200:
|
||||
result["health_ok"] = True
|
||||
try:
|
||||
data = json.loads(resp.read())
|
||||
if isinstance(data, dict):
|
||||
result["models"] = [
|
||||
m.get("name", m.get("id", ""))
|
||||
for m in data.get("data", data.get("models", []))
|
||||
]
|
||||
except Exception:
|
||||
pass
|
||||
except urllib.error.URLError as e:
|
||||
result["latency_ms"] = int((time.monotonic() - t0) * 1000)
|
||||
result["error"] = f"Connection refused or unreachable: {e}"
|
||||
except Exception as e:
|
||||
result["latency_ms"] = int((time.monotonic() - t0) * 1000)
|
||||
result["error"] = str(e)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def check_ollama(port: int = 11434) -> ServerStatus:
|
||||
"""Check Ollama server status."""
|
||||
import datetime
|
||||
health = check_server_health(port=port, health_path="/api/tags")
|
||||
return ServerStatus(
|
||||
name="ollama",
|
||||
port=port,
|
||||
server_type="ollama",
|
||||
reachable=health["reachable"],
|
||||
health_ok=health["health_ok"],
|
||||
latency_ms=health["latency_ms"],
|
||||
models=health["models"],
|
||||
error=health["error"],
|
||||
checked_at=datetime.datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
|
||||
def check_llama_server(port: int = 8081) -> ServerStatus:
|
||||
"""Check llama-server status."""
|
||||
import datetime
|
||||
health = check_server_health(port=port, health_path="/health")
|
||||
return ServerStatus(
|
||||
name="llama-server",
|
||||
port=port,
|
||||
server_type="llama-server",
|
||||
reachable=health["reachable"],
|
||||
health_ok=health["health_ok"],
|
||||
latency_ms=health["latency_ms"],
|
||||
models=health.get("models", []),
|
||||
error=health["error"],
|
||||
checked_at=datetime.datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
|
||||
def find_llama_server_process() -> Optional[dict]:
|
||||
"""Find running llama-server process."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ps", "aux"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
for line in result.stdout.split("\n"):
|
||||
if "llama-server" in line and "grep" not in line:
|
||||
parts = line.split()
|
||||
if len(parts) >= 11:
|
||||
return {
|
||||
"pid": int(parts[1]),
|
||||
"cpu": parts[2],
|
||||
"mem": parts[3],
|
||||
"command": " ".join(parts[10:]),
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def restart_llama_server(
|
||||
model_path: str = "",
|
||||
port: int = 8081,
|
||||
ctx_size: int = 8192,
|
||||
gpu_layers: int = 99,
|
||||
alias: str = "hermes3",
|
||||
) -> dict:
|
||||
"""Restart llama-server with specified parameters."""
|
||||
# Kill existing process
|
||||
existing = find_llama_server_process()
|
||||
if existing:
|
||||
try:
|
||||
os.kill(existing["pid"], 15) # SIGTERM
|
||||
time.sleep(2)
|
||||
logger.info("Killed existing llama-server (PID %d)", existing["pid"])
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
except Exception as e:
|
||||
return {"success": False, "error": f"Failed to kill existing: {e}"}
|
||||
|
||||
# Find model path if not specified
|
||||
if not model_path:
|
||||
model_path = _find_hermes3_model()
|
||||
if not model_path:
|
||||
return {"success": False, "error": "Could not find hermes3 model path"}
|
||||
|
||||
# Build command
|
||||
cmd = [
|
||||
"llama-server",
|
||||
"--model", model_path,
|
||||
"--port", str(port),
|
||||
"--host", "127.0.0.1",
|
||||
"--n-gpu-layers", str(gpu_layers),
|
||||
"--flash-attn", "on",
|
||||
"--ctx-size", str(ctx_size),
|
||||
"--alias", alias,
|
||||
]
|
||||
|
||||
try:
|
||||
# Start in background
|
||||
proc = subprocess.Popen(
|
||||
cmd,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
time.sleep(3) # Wait for startup
|
||||
|
||||
# Verify it's running
|
||||
health = check_server_health(port=port)
|
||||
if health["reachable"]:
|
||||
return {
|
||||
"success": True,
|
||||
"pid": proc.pid,
|
||||
"port": port,
|
||||
"model": model_path,
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Started but not reachable: {health['error']}",
|
||||
"pid": proc.pid,
|
||||
}
|
||||
except FileNotFoundError:
|
||||
return {"success": False, "error": "llama-server binary not found in PATH"}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
def _find_hermes3_model() -> str:
|
||||
"""Try to find the hermes3 model file."""
|
||||
import glob
|
||||
search_paths = [
|
||||
os.path.expanduser("~/.ollama/models/blobs/sha256-*"),
|
||||
os.path.expanduser("~/.cache/llama.cpp/*.gguf"),
|
||||
"/opt/models/*.gguf",
|
||||
]
|
||||
for pattern in search_paths:
|
||||
matches = glob.glob(pattern)
|
||||
if matches:
|
||||
return matches[0]
|
||||
return ""
|
||||
|
||||
|
||||
def check_all_servers() -> List[ServerStatus]:
|
||||
"""Check all configured servers."""
|
||||
results = []
|
||||
results.append(check_ollama())
|
||||
results.append(check_llama_server())
|
||||
return results
|
||||
|
||||
|
||||
def format_status(statuses: List[ServerStatus]) -> str:
|
||||
"""Format server statuses as a report."""
|
||||
lines = ["Local Inference Health", "=" * 40, ""]
|
||||
|
||||
for s in statuses:
|
||||
icon = "\u2705" if s.reachable and s.health_ok else "\u274c"
|
||||
lines.append(f"{icon} {s.name} (port {s.port})")
|
||||
lines.append(f" Type: {s.server_type}")
|
||||
lines.append(f" Reachable: {s.reachable}")
|
||||
lines.append(f" Healthy: {s.health_ok}")
|
||||
lines.append(f" Latency: {s.latency_ms}ms")
|
||||
if s.models:
|
||||
lines.append(f" Models: {', '.join(s.models[:5])}")
|
||||
if s.error:
|
||||
lines.append(f" Error: {s.error[:100]}")
|
||||
lines.append("")
|
||||
|
||||
# llama-server process
|
||||
proc = find_llama_server_process()
|
||||
if proc:
|
||||
lines.append(f"llama-server process: PID {proc['pid']}, CPU {proc['cpu']}%, MEM {proc['mem']}%")
|
||||
else:
|
||||
lines.append("llama-server process: NOT RUNNING")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Local inference health monitor")
|
||||
parser.add_argument("--check", action="store_true", help="Check server health")
|
||||
parser.add_argument("--port", type=int, default=0, help="Check specific port")
|
||||
parser.add_argument("--restart", type=int, default=0, metavar="PORT", help="Restart server on port")
|
||||
parser.add_argument("--watch", action="store_true", help="Continuous monitoring")
|
||||
parser.add_argument("--report", action="store_true", help="JSON status report")
|
||||
parser.add_argument("--interval", type=int, default=30, help="Watch interval in seconds")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.restart:
|
||||
print(f"Restarting llama-server on port {args.restart}...")
|
||||
result = restart_llama_server(port=args.restart)
|
||||
print(json.dumps(result, indent=2))
|
||||
return 0 if result["success"] else 1
|
||||
|
||||
if args.watch:
|
||||
print(f"Monitoring every {args.interval}s. Press Ctrl+C to stop.")
|
||||
while True:
|
||||
statuses = check_all_servers()
|
||||
down = [s for s in statuses if not s.reachable]
|
||||
if down:
|
||||
for s in down:
|
||||
print(f"[{time.strftime('%H:%M:%S')}] DOWN: {s.name} port {s.port} — {s.error}")
|
||||
else:
|
||||
print(f"[{time.strftime('%H:%M:%S')}] All servers OK")
|
||||
time.sleep(args.interval)
|
||||
return 0
|
||||
|
||||
if args.port:
|
||||
status = check_llama_server(port=args.port)
|
||||
statuses = [status]
|
||||
else:
|
||||
statuses = check_all_servers()
|
||||
|
||||
if args.report:
|
||||
print(json.dumps([asdict(s) for s in statuses], indent=2))
|
||||
else:
|
||||
print(format_status(statuses))
|
||||
|
||||
down = [s for s in statuses if not s.reachable]
|
||||
return 1 if down else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user