Compare commits
1 Commits
fix/136-cr
...
feat/136-c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a5eb54161f |
10
Makefile
10
Makefile
@@ -12,7 +12,7 @@ VPS := alexanderwhitestone.com
|
||||
DOMAIN := alexanderwhitestone.com
|
||||
DEPLOY_DIR := deploy
|
||||
|
||||
.PHONY: help deploy deploy-bash check ssl push service metrics
|
||||
.PHONY: help deploy deploy-bash check ssl push service
|
||||
|
||||
help:
|
||||
@echo "The Door — Deployment Commands"
|
||||
@@ -23,8 +23,6 @@ help:
|
||||
@echo " make check Check deployment status"
|
||||
@echo " make ssl Setup SSL on VPS"
|
||||
@echo " make service Install/restart hermes-gateway service"
|
||||
@echo " make metrics View crisis metrics summary"
|
||||
@echo " make metrics-json Export crisis metrics as JSON"
|
||||
@echo ""
|
||||
|
||||
deploy:
|
||||
@@ -48,9 +46,3 @@ ssl:
|
||||
|
||||
service:
|
||||
ssh root@$(VPS) "cd /opt/the-door && bash deploy/deploy.sh --service"
|
||||
|
||||
metrics:
|
||||
python3 -m crisis.metrics --summary
|
||||
|
||||
metrics-json:
|
||||
python3 -m crisis.metrics --json
|
||||
|
||||
@@ -8,7 +8,6 @@ from .detect import detect_crisis, CrisisDetectionResult, format_result, get_urg
|
||||
from .response import process_message, generate_response, CrisisResponse
|
||||
from .gateway import check_crisis, get_system_prompt, format_gateway_response
|
||||
from .session_tracker import CrisisSessionTracker, SessionState, check_crisis_with_session
|
||||
from .metrics import CrisisMetrics, AggregateMetrics
|
||||
|
||||
__all__ = [
|
||||
"detect_crisis",
|
||||
@@ -24,6 +23,4 @@ __all__ = [
|
||||
"CrisisSessionTracker",
|
||||
"SessionState",
|
||||
"check_crisis_with_session",
|
||||
"CrisisMetrics",
|
||||
"AggregateMetrics",
|
||||
]
|
||||
|
||||
@@ -1,243 +1,132 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
crisis/metrics.py — Aggregate crisis detection metrics.
|
||||
|
||||
Tracks session-level crisis data for aggregate reporting.
|
||||
Privacy-first: stores only aggregate counts, never user content.
|
||||
Crisis Metrics CLI — View crisis detection health from the command line.
|
||||
|
||||
Usage:
|
||||
from crisis.metrics import CrisisMetrics
|
||||
|
||||
metrics = CrisisMetrics()
|
||||
metrics.record_session(tracker.state)
|
||||
summary = metrics.get_summary()
|
||||
python3 -m crisis.metrics --summary # weekly report
|
||||
python3 -m crisis.metrics --json # raw JSON export
|
||||
python3 -m crisis.metrics --last 24h # last 24 hours
|
||||
|
||||
Ref: #136
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from datetime import datetime, timedelta
|
||||
import sys
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Any, Dict, List
|
||||
|
||||
METRICS_DIR = Path.home() / ".the-door" / "metrics"
|
||||
METRICS_DIR = os.environ.get("CRISIS_METRICS_DIR", str(Path.home() / ".the-door" / "metrics"))
|
||||
|
||||
|
||||
@dataclass
|
||||
class SessionMetrics:
|
||||
"""Metrics from a single crisis session."""
|
||||
timestamp: float
|
||||
current_level: str
|
||||
peak_level: str
|
||||
message_count: int
|
||||
was_escalating: bool
|
||||
was_deescalating: bool
|
||||
escalation_rate: float
|
||||
triggered_overlay: bool = False
|
||||
showed_988: bool = False
|
||||
def load_metrics(hours: int = 168) -> List[dict]:
|
||||
"""Load metrics entries from the last N hours."""
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
|
||||
entries = []
|
||||
metrics_path = Path(METRICS_DIR)
|
||||
|
||||
if not metrics_path.exists():
|
||||
return entries
|
||||
|
||||
for f in sorted(metrics_path.glob("*.json")):
|
||||
try:
|
||||
with open(f) as fh:
|
||||
data = json.load(fh)
|
||||
if isinstance(data, list):
|
||||
entries.extend(data)
|
||||
elif isinstance(data, dict):
|
||||
entries.append(data)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Filter by timestamp
|
||||
filtered = []
|
||||
for e in entries:
|
||||
ts = e.get("timestamp", "")
|
||||
if ts:
|
||||
try:
|
||||
t = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
||||
if t >= cutoff:
|
||||
filtered.append(e)
|
||||
except Exception:
|
||||
filtered.append(e)
|
||||
|
||||
return filtered
|
||||
|
||||
|
||||
@dataclass
|
||||
class AggregateMetrics:
|
||||
"""Aggregate metrics across sessions."""
|
||||
total_sessions: int = 0
|
||||
total_messages: int = 0
|
||||
|
||||
# Level distribution
|
||||
level_counts: Dict[str, int] = field(default_factory=lambda: {
|
||||
"NONE": 0, "LOW": 0, "MEDIUM": 0, "HIGH": 0, "CRITICAL": 0
|
||||
})
|
||||
|
||||
# Escalation tracking
|
||||
escalating_sessions: int = 0
|
||||
deescalating_sessions: int = 0
|
||||
|
||||
# Safety interventions
|
||||
overlay_triggers: int = 0
|
||||
ninety_eight_show: int = 0
|
||||
|
||||
# Time window
|
||||
period_start: Optional[float] = None
|
||||
period_end: Optional[float] = None
|
||||
def summarize(entries: List[dict]) -> dict:
|
||||
"""Summarize metrics entries."""
|
||||
total = len(entries)
|
||||
by_level = {"CRITICAL": 0, "HIGH": 0, "MEDIUM": 0, "LOW": 0, "NONE": 0}
|
||||
escalated = 0
|
||||
deescalated = 0
|
||||
resources_shown = 0
|
||||
|
||||
for e in entries:
|
||||
level = e.get("level", "NONE")
|
||||
by_level[level] = by_level.get(level, 0) + 1
|
||||
if e.get("escalated"):
|
||||
escalated += 1
|
||||
if e.get("deescalation_confirmed"):
|
||||
deescalated += 1
|
||||
if e.get("resources_shown"):
|
||||
resources_shown += 1
|
||||
|
||||
return {
|
||||
"period_hours": 168,
|
||||
"total_interactions": total,
|
||||
"by_level": by_level,
|
||||
"escalated_sessions": escalated,
|
||||
"deescalated_sessions": deescalated,
|
||||
"resources_shown": resources_shown,
|
||||
"crisis_rate": round((by_level["CRITICAL"] + by_level["HIGH"]) / max(total, 1) * 100, 1),
|
||||
}
|
||||
|
||||
|
||||
class CrisisMetrics:
|
||||
"""
|
||||
Aggregate crisis metrics with local JSON persistence.
|
||||
|
||||
Privacy-first: stores only aggregate counts per day.
|
||||
Never stores user messages, content, or identifying info.
|
||||
"""
|
||||
|
||||
def __init__(self, metrics_dir: Optional[Path] = None):
|
||||
self.metrics_dir = metrics_dir or METRICS_DIR
|
||||
self.metrics_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._buffer: List[SessionMetrics] = []
|
||||
|
||||
def record_session(self, session_state, triggered_overlay: bool = False,
|
||||
showed_988: bool = False):
|
||||
"""Record a session's metrics."""
|
||||
from .session_tracker import SessionState
|
||||
|
||||
if isinstance(session_state, SessionState):
|
||||
sm = SessionMetrics(
|
||||
timestamp=time.time(),
|
||||
current_level=session_state.current_level,
|
||||
peak_level=session_state.peak_level,
|
||||
message_count=session_state.message_count,
|
||||
was_escalating=session_state.is_escalating,
|
||||
was_deescalating=session_state.is_deescalating,
|
||||
escalation_rate=session_state.escalation_rate,
|
||||
triggered_overlay=triggered_overlay,
|
||||
showed_988=showed_988,
|
||||
)
|
||||
else:
|
||||
sm = session_state
|
||||
|
||||
self._buffer.append(sm)
|
||||
self._flush()
|
||||
|
||||
def _flush(self):
|
||||
"""Write buffered sessions to daily file."""
|
||||
if not self._buffer:
|
||||
return
|
||||
|
||||
today = datetime.utcnow().strftime("%Y-%m-%d")
|
||||
filepath = self.metrics_dir / f"{today}.jsonl"
|
||||
|
||||
with open(filepath, 'a') as f:
|
||||
for sm in self._buffer:
|
||||
f.write(json.dumps(asdict(sm)) + '\n')
|
||||
|
||||
self._buffer.clear()
|
||||
|
||||
def _load_day(self, date_str: str) -> List[SessionMetrics]:
|
||||
"""Load sessions for a specific day."""
|
||||
filepath = self.metrics_dir / f"{date_str}.jsonl"
|
||||
if not filepath.exists():
|
||||
return []
|
||||
|
||||
sessions = []
|
||||
with open(filepath) as f:
|
||||
for line in f:
|
||||
if line.strip():
|
||||
data = json.loads(line)
|
||||
sessions.append(SessionMetrics(**data))
|
||||
return sessions
|
||||
|
||||
def get_summary(self, days: int = 7) -> AggregateMetrics:
|
||||
"""Get aggregate metrics for the last N days."""
|
||||
agg = AggregateMetrics()
|
||||
|
||||
now = datetime.utcnow()
|
||||
for i in range(days):
|
||||
date = (now - timedelta(days=i)).strftime("%Y-%m-%d")
|
||||
sessions = self._load_day(date)
|
||||
|
||||
for sm in sessions:
|
||||
agg.total_sessions += 1
|
||||
agg.total_messages += sm.message_count
|
||||
|
||||
# Level counts (use peak level)
|
||||
level = sm.peak_level
|
||||
agg.level_counts[level] = agg.level_counts.get(level, 0) + 1
|
||||
|
||||
if sm.was_escalating:
|
||||
agg.escalating_sessions += 1
|
||||
if sm.was_deescalating:
|
||||
agg.deescalating_sessions += 1
|
||||
if sm.triggered_overlay:
|
||||
agg.overlay_triggers += 1
|
||||
if sm.showed_988:
|
||||
agg.ninety_eight_show += 1
|
||||
|
||||
# Time window
|
||||
if agg.period_start is None or sm.timestamp < agg.period_start:
|
||||
agg.period_start = sm.timestamp
|
||||
if agg.period_end is None or sm.timestamp > agg.period_end:
|
||||
agg.period_end = sm.timestamp
|
||||
|
||||
return agg
|
||||
|
||||
def get_report(self, days: int = 7) -> str:
|
||||
"""Generate human-readable metrics report."""
|
||||
agg = self.get_summary(days)
|
||||
|
||||
lines = []
|
||||
lines.append("=" * 50)
|
||||
lines.append(" CRISIS METRICS REPORT")
|
||||
lines.append(f" Last {days} days")
|
||||
if agg.period_start:
|
||||
start = datetime.fromtimestamp(agg.period_start).strftime("%Y-%m-%d %H:%M")
|
||||
lines.append(f" Period: {start} → now")
|
||||
lines.append("=" * 50)
|
||||
|
||||
lines.append(f"\n Sessions: {agg.total_sessions}")
|
||||
lines.append(f" Messages tracked: {agg.total_messages}")
|
||||
|
||||
lines.append(f"\n Level Distribution (by peak):")
|
||||
for level in ["NONE", "LOW", "MEDIUM", "HIGH", "CRITICAL"]:
|
||||
count = agg.level_counts.get(level, 0)
|
||||
pct = (count / agg.total_sessions * 100) if agg.total_sessions > 0 else 0
|
||||
bar = "█" * int(pct / 5)
|
||||
lines.append(f" {level:<10} {count:>5} ({pct:>5.1f}%) {bar}")
|
||||
|
||||
lines.append(f"\n Escalations: {agg.escalating_sessions}")
|
||||
lines.append(f" De-escalations: {agg.deescalating_sessions}")
|
||||
lines.append(f" Overlay triggers: {agg.overlay_triggers}")
|
||||
lines.append(f" 988 shown: {agg.ninety_eight_show}")
|
||||
|
||||
if agg.total_sessions > 0:
|
||||
escalation_rate = agg.escalating_sessions / agg.total_sessions * 100
|
||||
lines.append(f"\n Escalation rate: {escalation_rate:.1f}%")
|
||||
|
||||
lines.append("=" * 50)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def get_json(self, days: int = 7) -> str:
|
||||
"""Export metrics as JSON."""
|
||||
agg = self.get_summary(days)
|
||||
return json.dumps(asdict(agg), indent=2)
|
||||
def print_summary(summary: dict):
|
||||
print(f"\n{'='*50}")
|
||||
print(f" CRISIS METRICS SUMMARY")
|
||||
print(f" {datetime.now().isoformat()}")
|
||||
print(f"{'='*50}\n")
|
||||
|
||||
print(f" Interactions: {summary['total_interactions']}")
|
||||
print(f" Crisis rate: {summary['crisis_rate']}%")
|
||||
print()
|
||||
print(f" By level:")
|
||||
for level, count in summary["by_level"].items():
|
||||
bar = "█" * min(count, 40)
|
||||
print(f" {level:10} {count:5} {bar}")
|
||||
print()
|
||||
print(f" Escalated: {summary['escalated_sessions']}")
|
||||
print(f" De-escalated: {summary['deescalated_sessions']}")
|
||||
print(f" 988 shown: {summary['resources_shown']}")
|
||||
|
||||
|
||||
def main():
|
||||
"""CLI entry point for crisis metrics."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Crisis Detection Metrics")
|
||||
parser.add_argument("--summary", action="store_true", help="Show summary report")
|
||||
parser = argparse.ArgumentParser(description="Crisis Metrics CLI")
|
||||
parser.add_argument("--summary", action="store_true", help="Weekly summary")
|
||||
parser.add_argument("--json", action="store_true", help="JSON export")
|
||||
parser.add_argument("--days", type=int, default=7, help="Days to include")
|
||||
parser.add_argument("--demo", action="store_true", help="Generate demo data")
|
||||
parser.add_argument("--last", default="168h", help="Time window (e.g., 24h, 7d)")
|
||||
args = parser.parse_args()
|
||||
|
||||
metrics = CrisisMetrics()
|
||||
|
||||
if args.demo:
|
||||
import random
|
||||
levels = ["NONE", "LOW", "MEDIUM", "HIGH", "CRITICAL"]
|
||||
for i in range(50):
|
||||
from .session_tracker import SessionState
|
||||
state = SessionState(
|
||||
current_level=random.choice(levels),
|
||||
peak_level=random.choice(levels),
|
||||
message_count=random.randint(1, 20),
|
||||
is_escalating=random.random() > 0.7,
|
||||
is_deescalating=random.random() > 0.8,
|
||||
escalation_rate=random.random(),
|
||||
)
|
||||
metrics.record_session(
|
||||
state,
|
||||
triggered_overlay=random.random() > 0.8,
|
||||
showed_988=random.random() > 0.7,
|
||||
)
|
||||
print("Generated 50 demo sessions.")
|
||||
|
||||
if args.json:
|
||||
print(metrics.get_json(args.days))
|
||||
|
||||
# Parse time window
|
||||
last = args.last
|
||||
if last.endswith("h"):
|
||||
hours = int(last[:-1])
|
||||
elif last.endswith("d"):
|
||||
hours = int(last[:-1]) * 24
|
||||
else:
|
||||
print(metrics.get_report(args.days))
|
||||
hours = 168
|
||||
|
||||
entries = load_metrics(hours)
|
||||
summary = summarize(entries)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(summary, indent=2))
|
||||
else:
|
||||
print_summary(summary)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,118 +0,0 @@
|
||||
"""
|
||||
Tests for crisis/metrics.py — Aggregate crisis metrics.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from crisis.metrics import CrisisMetrics, SessionMetrics, AggregateMetrics
|
||||
|
||||
|
||||
class TestCrisisMetrics(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.tmpdir = tempfile.mkdtemp()
|
||||
self.metrics = CrisisMetrics(Path(self.tmpdir))
|
||||
|
||||
def tearDown(self):
|
||||
shutil.rmtree(self.tmpdir)
|
||||
|
||||
def test_record_session_creates_file(self):
|
||||
sm = SessionMetrics(
|
||||
timestamp=1700000000,
|
||||
current_level="LOW",
|
||||
peak_level="MEDIUM",
|
||||
message_count=5,
|
||||
was_escalating=True,
|
||||
was_deescalating=False,
|
||||
escalation_rate=0.5,
|
||||
)
|
||||
self.metrics.record_session(sm)
|
||||
|
||||
files = list(Path(self.tmpdir).glob("*.jsonl"))
|
||||
self.assertEqual(len(files), 1)
|
||||
|
||||
def test_record_session_writes_jsonl(self):
|
||||
sm = SessionMetrics(
|
||||
timestamp=1700000000,
|
||||
current_level="HIGH",
|
||||
peak_level="CRITICAL",
|
||||
message_count=10,
|
||||
was_escalating=True,
|
||||
was_deescalating=False,
|
||||
escalation_rate=1.0,
|
||||
triggered_overlay=True,
|
||||
showed_988=True,
|
||||
)
|
||||
self.metrics.record_session(sm)
|
||||
|
||||
files = list(Path(self.tmpdir).glob("*.jsonl"))
|
||||
with open(files[0]) as f:
|
||||
data = json.loads(f.readline())
|
||||
self.assertEqual(data['peak_level'], 'CRITICAL')
|
||||
self.assertTrue(data['triggered_overlay'])
|
||||
|
||||
def test_get_summary_empty(self):
|
||||
agg = self.metrics.get_summary(days=7)
|
||||
self.assertEqual(agg.total_sessions, 0)
|
||||
self.assertEqual(agg.total_messages, 0)
|
||||
|
||||
def test_get_summary_with_data(self):
|
||||
for level in ["LOW", "MEDIUM", "HIGH"]:
|
||||
sm = SessionMetrics(
|
||||
timestamp=1700000000,
|
||||
current_level=level,
|
||||
peak_level=level,
|
||||
message_count=3,
|
||||
was_escalating=level != "LOW",
|
||||
was_deescalating=False,
|
||||
escalation_rate=0.5,
|
||||
)
|
||||
self.metrics.record_session(sm)
|
||||
|
||||
agg = self.metrics.get_summary(days=1)
|
||||
self.assertEqual(agg.total_sessions, 3)
|
||||
self.assertEqual(agg.total_messages, 9)
|
||||
self.assertEqual(agg.escalating_sessions, 2)
|
||||
|
||||
def test_get_report_returns_string(self):
|
||||
sm = SessionMetrics(
|
||||
timestamp=1700000000,
|
||||
current_level="LOW",
|
||||
peak_level="LOW",
|
||||
message_count=5,
|
||||
was_escalating=False,
|
||||
was_deescalating=False,
|
||||
escalation_rate=0.0,
|
||||
)
|
||||
self.metrics.record_session(sm)
|
||||
|
||||
report = self.metrics.get_report(days=1)
|
||||
self.assertIn("CRISIS METRICS REPORT", report)
|
||||
self.assertIn("Sessions:", report)
|
||||
|
||||
def test_get_json_returns_valid(self):
|
||||
sm = SessionMetrics(
|
||||
timestamp=1700000000,
|
||||
current_level="MEDIUM",
|
||||
peak_level="MEDIUM",
|
||||
message_count=3,
|
||||
was_escalating=False,
|
||||
was_deescalating=False,
|
||||
escalation_rate=0.0,
|
||||
)
|
||||
self.metrics.record_session(sm)
|
||||
|
||||
json_str = self.metrics.get_json(days=1)
|
||||
data = json.loads(json_str)
|
||||
self.assertEqual(data['total_sessions'], 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user