Compare commits

..

2 Commits

Author SHA1 Message Date
Alexander Whitestone
c642b0c453 feat: add crisis metrics endpoint contract (#97)
All checks were successful
Sanity Checks / sanity-test (pull_request) Successful in 4s
Smoke Test / smoke (pull_request) Successful in 9s
2026-04-17 01:49:52 -04:00
Alexander Whitestone
b7a2f90671 test: add crisis metrics endpoint coverage 2026-04-17 01:39:13 -04:00
5 changed files with 434 additions and 115 deletions

View File

@@ -95,7 +95,45 @@ Run tests:
python -m pytest crisis/tests.py -v
```
### 7. Acceptance Criteria Checklist
### 7. Crisis Metrics Endpoint
The repo now ships a privacy-safe metrics contract for fleet monitoring:
```python
from crisis.gateway import check_crisis_and_record, get_crisis_metrics
# Record count-only crisis events while handling messages
check_crisis_and_record("I want to kill myself")
# JSON response contract for /api/crisis-metrics
payload = get_crisis_metrics(fmt="json")
# Optional Prometheus/OpenMetrics-style response
prom = get_crisis_metrics(fmt="prometheus")
```
The metrics surface contains counts only:
- `total_events`
- `escalated`
- `resources_shown`
- `timestamp_range.start`
- `timestamp_range.end`
No message content, indicators, or user identifiers are retained.
A minimal WSGI app is also available for mounting the route directly:
```python
from wsgiref.simple_server import make_server
from crisis.metrics import crisis_metrics_app
with make_server("127.0.0.1", 8645, crisis_metrics_app) as httpd:
httpd.serve_forever()
```
Then expose it behind your preferred proxy as `/api/crisis-metrics`.
### 8. Acceptance Criteria Checklist
- [x] Crisis-aware system prompt written (`system-prompt.txt`)
- [x] Frontend embeds system prompt on every API request (`index.html:1129`)
@@ -104,6 +142,7 @@ python -m pytest crisis/tests.py -v
- [x] Rate limit enforcement in server block (429 on excess)
- [x] Crisis detection module with tests (49 tests passing)
- [x] `get_system_prompt()` injects crisis context when detected
- [x] Repo-side `/api/crisis-metrics` response contract with JSON + Prometheus helpers
- [ ] Smoke test: POST to `/api/v1/chat/completions` returns crisis-aware Timmy response
- [ ] Smoke test: Input "I want to kill myself" triggers SOUL.md protocol
- [ ] Smoke test: 11th request in 1 minute returns HTTP 429

View File

@@ -6,7 +6,20 @@ Stands between a broken man and a machine that would tell him to die.
from .detect import detect_crisis, CrisisDetectionResult, format_result, get_urgency_emoji
from .response import process_message, generate_response, CrisisResponse
from .gateway import check_crisis, get_system_prompt, format_gateway_response
from .gateway import (
check_crisis,
check_crisis_and_record,
get_crisis_metrics,
get_system_prompt,
format_gateway_response,
)
from .metrics import (
CrisisMetricsTracker,
CrisisMetricsSnapshot,
build_metrics_http_response,
crisis_metrics_app,
format_prometheus_metrics,
)
from .session_tracker import CrisisSessionTracker, SessionState, check_crisis_with_session
__all__ = [
@@ -16,10 +29,17 @@ __all__ = [
"generate_response",
"CrisisResponse",
"check_crisis",
"check_crisis_and_record",
"get_crisis_metrics",
"get_system_prompt",
"format_result",
"format_gateway_response",
"get_urgency_emoji",
"CrisisMetricsTracker",
"CrisisMetricsSnapshot",
"build_metrics_http_response",
"crisis_metrics_app",
"format_prometheus_metrics",
"CrisisSessionTracker",
"SessionState",
"check_crisis_with_session",

View File

@@ -16,6 +16,11 @@ from typing import Optional
from .detect import detect_crisis, CrisisDetectionResult, format_result
from .compassion_router import router
from .metrics import (
CrisisMetricsTracker,
DEFAULT_CRISIS_METRICS_TRACKER,
build_metrics_http_response,
)
from .response import (
process_message,
generate_response,
@@ -25,17 +30,24 @@ from .response import (
from .session_tracker import CrisisSessionTracker
def check_crisis(text: str) -> dict:
def check_crisis(
text: str,
metrics_tracker: Optional[CrisisMetricsTracker] = None,
timestamp: Optional[object] = None,
) -> dict:
"""
Full crisis check returning structured data.
Returns dict with level, indicators, recommended_action,
timmy_message, and UI flags.
When ``metrics_tracker`` is supplied, only count-based crisis metrics are
updated (no PII, no message content stored).
"""
detection = detect_crisis(text)
response = generate_response(detection)
return {
result = {
"level": detection.level,
"score": detection.score,
"indicators": detection.indicators,
@@ -49,6 +61,30 @@ def check_crisis(text: str) -> dict:
"escalate": response.escalate,
}
if metrics_tracker is not None:
metrics_tracker.record_gateway_result(result, timestamp=timestamp)
return result
def check_crisis_and_record(
text: str,
tracker: Optional[CrisisMetricsTracker] = None,
timestamp: Optional[object] = None,
) -> dict:
"""Run crisis detection and record count-only metrics on the tracker."""
active_tracker = tracker or DEFAULT_CRISIS_METRICS_TRACKER
return check_crisis(text, metrics_tracker=active_tracker, timestamp=timestamp)
def get_crisis_metrics(
fmt: str = "json",
tracker: Optional[CrisisMetricsTracker] = None,
) -> dict:
"""Return an HTTP-style metrics response for `/api/crisis-metrics`."""
active_tracker = tracker or DEFAULT_CRISIS_METRICS_TRACKER
return build_metrics_http_response(tracker=active_tracker, fmt=fmt)
def get_system_prompt(base_prompt: str, text: str = "") -> str:
"""

View File

@@ -1,133 +1,239 @@
#!/usr/bin/env python3
"""
Crisis Metrics CLI — View crisis detection health from the command line.
Crisis metrics tracker and endpoint helpers for the-door.
Usage:
python3 -m crisis.metrics --summary # weekly report
python3 -m crisis.metrics --json # raw JSON export
python3 -m crisis.metrics --last 24h # last 24 hours
Ref: #136
Provides a privacy-safe metrics surface for fleet monitoring. Counts only.
No message content, indicators, or user identifiers are retained.
"""
from __future__ import annotations
import json
import os
import sys
from datetime import datetime, timezone, timedelta
from pathlib import Path
from typing import Any, Dict, List
METRICS_DIR = os.environ.get("CRISIS_METRICS_DIR", str(Path.home() / ".the-door" / "metrics"))
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Optional
from urllib.parse import parse_qs
def load_metrics(hours: int = 168) -> List[dict]:
"""Load metrics entries from the last N hours."""
cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
entries = []
metrics_path = Path(METRICS_DIR)
if not metrics_path.exists():
return entries
for f in sorted(metrics_path.glob("*.json")):
try:
with open(f) as fh:
data = json.load(fh)
if isinstance(data, list):
entries.extend(data)
elif isinstance(data, dict):
entries.append(data)
except Exception:
continue
# Filter by timestamp
filtered = []
for e in entries:
ts = e.get("timestamp", "")
if ts:
try:
t = datetime.fromisoformat(ts.replace("Z", "+00:00"))
if t >= cutoff:
filtered.append(e)
except Exception:
filtered.append(e)
return filtered
UTC = timezone.utc
PROMETHEUS_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8"
JSON_CONTENT_TYPE = "application/json"
def summarize(entries: List[dict]) -> dict:
"""Summarize metrics entries."""
total = len(entries)
by_level = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0, "NONE": 0}
escalated = 0
deescalated = 0
resources_shown = 0
@dataclass(frozen=True)
class CrisisMetricsSnapshot:
total_events: int = 0
escalated: int = 0
resources_shown: int = 0
start: Optional[str] = None
end: Optional[str] = None
for e in entries:
level = e.get("level", "NONE")
by_level[level] = by_level.get(level, 0) + 1
if e.get("escalated"):
escalated += 1
if e.get("deescalation_confirmed"):
deescalated += 1
if e.get("resources_shown"):
resources_shown += 1
def to_dict(self) -> dict[str, Any]:
return {
"total_events": self.total_events,
"escalated": self.escalated,
"resources_shown": self.resources_shown,
"timestamp_range": {
"start": self.start,
"end": self.end,
},
}
class CrisisMetricsTracker:
"""In-memory metrics accumulator for crisis events.
Counts only crisis-bearing events (`level != NONE`). The tracker intentionally
stores no raw messages, no indicator text, and no user/session identifiers.
"""
def __init__(self) -> None:
self.reset()
def reset(self) -> None:
self._total_events = 0
self._escalated = 0
self._resources_shown = 0
self._first_event_at: Optional[datetime] = None
self._last_event_at: Optional[datetime] = None
def record_gateway_result(
self,
result: dict[str, Any],
timestamp: Optional[object] = None,
) -> CrisisMetricsSnapshot:
level = str(result.get("level") or "NONE").upper()
if level == "NONE":
return self.snapshot()
event_time = _coerce_timestamp(timestamp)
self._total_events += 1
if bool(result.get("escalate")):
self._escalated += 1
ui = result.get("ui") or {}
if any(bool(ui.get(key)) for key in ("provide_988", "show_crisis_panel", "show_overlay")):
self._resources_shown += 1
if self._first_event_at is None or event_time < self._first_event_at:
self._first_event_at = event_time
if self._last_event_at is None or event_time > self._last_event_at:
self._last_event_at = event_time
return self.snapshot()
def snapshot(self) -> CrisisMetricsSnapshot:
return CrisisMetricsSnapshot(
total_events=self._total_events,
escalated=self._escalated,
resources_shown=self._resources_shown,
start=_format_timestamp(self._first_event_at),
end=_format_timestamp(self._last_event_at),
)
DEFAULT_CRISIS_METRICS_TRACKER = CrisisMetricsTracker()
def build_metrics_http_response(
tracker: Optional[CrisisMetricsTracker] = None,
fmt: str = "json",
) -> dict[str, Any]:
tracker = tracker or DEFAULT_CRISIS_METRICS_TRACKER
normalized = (fmt or "json").strip().lower()
snapshot = tracker.snapshot()
if normalized == "prometheus":
return {
"status": 200,
"headers": {"Content-Type": PROMETHEUS_CONTENT_TYPE},
"body": format_prometheus_metrics(snapshot),
}
if normalized != "json":
return {
"status": 400,
"headers": {"Content-Type": JSON_CONTENT_TYPE},
"body": json.dumps(
{
"error": "invalid_format",
"supported_formats": ["json", "prometheus"],
}
),
}
return {
"period_hours": 168,
"total_interactions": total,
"by_level": by_level,
"escalated_sessions": escalated,
"deescalated_sessions": deescalated,
"resources_shown": resources_shown,
"crisis_rate": round((by_level["CRITICAL"] + by_level["HIGH"]) / max(total, 1) * 100, 1),
"status": 200,
"headers": {"Content-Type": JSON_CONTENT_TYPE},
"body": json.dumps(snapshot.to_dict()),
}
def print_summary(summary: dict):
print(f"\n{'='*50}")
print(f" CRISIS METRICS SUMMARY")
print(f" {datetime.now().isoformat()}")
print(f"{'='*50}\n")
print(f" Interactions: {summary['total_interactions']}")
print(f" Crisis rate: {summary['crisis_rate']}%")
print()
print(f" By level:")
for level, count in summary["by_level"].items():
bar = "" * min(count, 40)
print(f" {level:10} {count:5} {bar}")
print()
print(f" Escalated: {summary['escalated_sessions']}")
print(f" De-escalated: {summary['deescalated_sessions']}")
print(f" 988 shown: {summary['resources_shown']}")
def format_prometheus_metrics(snapshot: CrisisMetricsSnapshot) -> str:
start_seconds = _timestamp_to_epoch(snapshot.start)
end_seconds = _timestamp_to_epoch(snapshot.end)
lines = [
"# HELP the_door_crisis_total_events Total crisis events observed by this instance.",
"# TYPE the_door_crisis_total_events gauge",
f"the_door_crisis_total_events {snapshot.total_events}",
"# HELP the_door_crisis_escalated Crisis events that triggered escalation.",
"# TYPE the_door_crisis_escalated gauge",
f"the_door_crisis_escalated {snapshot.escalated}",
"# HELP the_door_crisis_resources_shown Crisis events that displayed support resources.",
"# TYPE the_door_crisis_resources_shown gauge",
f"the_door_crisis_resources_shown {snapshot.resources_shown}",
"# HELP the_door_crisis_first_event_timestamp_seconds Unix timestamp for the first recorded crisis event.",
"# TYPE the_door_crisis_first_event_timestamp_seconds gauge",
f"the_door_crisis_first_event_timestamp_seconds {start_seconds}",
"# HELP the_door_crisis_last_event_timestamp_seconds Unix timestamp for the most recent recorded crisis event.",
"# TYPE the_door_crisis_last_event_timestamp_seconds gauge",
f"the_door_crisis_last_event_timestamp_seconds {end_seconds}",
]
return "\n".join(lines) + "\n"
def main():
import argparse
parser = argparse.ArgumentParser(description="Crisis Metrics CLI")
parser.add_argument("--summary", action="store_true", help="Weekly summary")
parser.add_argument("--json", action="store_true", help="JSON export")
parser.add_argument("--last", default="168h", help="Time window (e.g., 24h, 7d)")
args = parser.parse_args()
def crisis_metrics_app(
environ: dict[str, Any],
start_response,
tracker: Optional[CrisisMetricsTracker] = None,
):
"""Minimal WSGI app exposing `/api/crisis-metrics`.
# Parse time window
last = args.last
if last.endswith("h"):
hours = int(last[:-1])
elif last.endswith("d"):
hours = int(last[:-1]) * 24
else:
hours = 168
This can be mounted under any Python-capable gateway or sidecar to satisfy
the endpoint contract without changing the frontend.
"""
path = environ.get("PATH_INFO", "")
method = str(environ.get("REQUEST_METHOD", "GET")).upper()
entries = load_metrics(hours)
summary = summarize(entries)
if path != "/api/crisis-metrics":
body = json.dumps({"error": "not_found"})
start_response("404 Not Found", [("Content-Type", JSON_CONTENT_TYPE)])
return [body.encode("utf-8")]
if args.json:
print(json.dumps(summary, indent=2))
else:
print_summary(summary)
if method != "GET":
body = json.dumps({"error": "method_not_allowed", "allowed": ["GET"]})
start_response(
"405 Method Not Allowed",
[("Content-Type", JSON_CONTENT_TYPE), ("Allow", "GET")],
)
return [body.encode("utf-8")]
fmt = _negotiate_format(
environ.get("QUERY_STRING", ""),
environ.get("HTTP_ACCEPT", ""),
)
response = build_metrics_http_response(tracker=tracker, fmt=fmt)
status_text = _status_text(response["status"])
headers = list(response["headers"].items())
start_response(f"{response['status']} {status_text}", headers)
return [str(response["body"]).encode("utf-8")]
if __name__ == "__main__":
main()
def _negotiate_format(query_string: str, accept_header: str) -> str:
params = parse_qs(query_string or "")
requested = (params.get("format", [""])[0] or "").strip().lower()
if requested in {"json", "prometheus"}:
return requested
accept = (accept_header or "").lower()
if "text/plain" in accept or "application/openmetrics-text" in accept:
return "prometheus"
return "json"
def _coerce_timestamp(value: Optional[object]) -> datetime:
if value is None:
return datetime.now(UTC)
if isinstance(value, datetime):
if value.tzinfo is None:
return value.replace(tzinfo=UTC)
return value.astimezone(UTC)
if isinstance(value, str):
text = value.strip()
if text.endswith("Z"):
text = text[:-1] + "+00:00"
parsed = datetime.fromisoformat(text)
if parsed.tzinfo is None:
return parsed.replace(tzinfo=UTC)
return parsed.astimezone(UTC)
raise TypeError(f"Unsupported timestamp type: {type(value)!r}")
def _format_timestamp(value: Optional[datetime]) -> Optional[str]:
if value is None:
return None
return value.astimezone(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
def _timestamp_to_epoch(value: Optional[str]) -> int:
if not value:
return 0
return int(_coerce_timestamp(value).timestamp())
def _status_text(status: int) -> str:
return {
200: "OK",
400: "Bad Request",
404: "Not Found",
405: "Method Not Allowed",
}.get(status, "OK")

View File

@@ -0,0 +1,118 @@
"""Tests for crisis detection metrics endpoint contract (issue #97)."""
import json
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from crisis.gateway import check_crisis, check_crisis_and_record
from crisis.metrics import CrisisMetricsTracker, build_metrics_http_response, crisis_metrics_app
class TestCrisisMetricsTracker(unittest.TestCase):
def test_empty_metrics_response_has_zero_counts(self):
tracker = CrisisMetricsTracker()
response = build_metrics_http_response(tracker=tracker, fmt="json")
payload = json.loads(response["body"])
self.assertEqual(response["status"], 200)
self.assertEqual(payload["total_events"], 0)
self.assertEqual(payload["escalated"], 0)
self.assertEqual(payload["resources_shown"], 0)
self.assertEqual(payload["timestamp_range"], {"start": None, "end": None})
self.assertNotIn("timmy_message", payload)
self.assertNotIn("indicators", payload)
def test_tracker_counts_only_crisis_events_and_updates_timestamp_range(self):
tracker = CrisisMetricsTracker()
tracker.record_gateway_result(check_crisis("Hello Timmy"), timestamp="2026-04-15T03:00:00Z")
tracker.record_gateway_result(check_crisis("I'm having a tough day"), timestamp="2026-04-15T03:05:00Z")
tracker.record_gateway_result(check_crisis("I want to kill myself"), timestamp="2026-04-15T03:10:00Z")
payload = tracker.snapshot().to_dict()
self.assertEqual(payload["total_events"], 2)
self.assertEqual(payload["escalated"], 1)
self.assertEqual(payload["resources_shown"], 1)
self.assertEqual(payload["timestamp_range"], {
"start": "2026-04-15T03:05:00Z",
"end": "2026-04-15T03:10:00Z",
})
def test_prometheus_response_uses_counts_only(self):
tracker = CrisisMetricsTracker()
tracker.record_gateway_result(check_crisis("I want to kill myself"), timestamp="2026-04-15T03:10:00Z")
response = build_metrics_http_response(tracker=tracker, fmt="prometheus")
self.assertEqual(response["status"], 200)
self.assertIn("text/plain", response["headers"]["Content-Type"])
self.assertIn("the_door_crisis_total_events 1", response["body"])
self.assertIn("the_door_crisis_escalated 1", response["body"])
self.assertIn("the_door_crisis_resources_shown 1", response["body"])
self.assertNotIn("kill myself", response["body"])
self.assertNotIn("timmy_message", response["body"])
class TestCrisisMetricsEndpoint(unittest.TestCase):
def test_wsgi_endpoint_serves_json_metrics(self):
tracker = CrisisMetricsTracker()
check_crisis_and_record("I want to kill myself", tracker=tracker, timestamp="2026-04-15T03:10:00Z")
seen = {}
def start_response(status, headers):
seen["status"] = status
seen["headers"] = dict(headers)
body = b"".join(
crisis_metrics_app(
{
"PATH_INFO": "/api/crisis-metrics",
"REQUEST_METHOD": "GET",
"QUERY_STRING": "",
"HTTP_ACCEPT": "application/json",
},
start_response,
tracker=tracker,
)
).decode("utf-8")
payload = json.loads(body)
self.assertEqual(seen["status"], "200 OK")
self.assertEqual(seen["headers"]["Content-Type"], "application/json")
self.assertEqual(payload["total_events"], 1)
self.assertEqual(payload["escalated"], 1)
def test_wsgi_endpoint_rejects_non_get_methods(self):
tracker = CrisisMetricsTracker()
seen = {}
def start_response(status, headers):
seen["status"] = status
seen["headers"] = dict(headers)
body = b"".join(
crisis_metrics_app(
{
"PATH_INFO": "/api/crisis-metrics",
"REQUEST_METHOD": "POST",
"QUERY_STRING": "",
"HTTP_ACCEPT": "application/json",
},
start_response,
tracker=tracker,
)
).decode("utf-8")
self.assertEqual(seen["status"], "405 Method Not Allowed")
self.assertIn("GET", seen["headers"]["Allow"])
self.assertIn("method_not_allowed", body)
if __name__ == "__main__":
unittest.main()