feat(#136 ): Export metrics functions from crisis module

Refs #136
feat(#136 ): Add CLI command to view crisis metrics summary
2026-04-15 15:26:40 +00:00 · 2026-04-15 15:23:28 +00:00
4 changed files with 164 additions and 301 deletions
--- a/crisis/init.py
+++ b/crisis/init.py
@@ -1,22 +1,5 @@
-"""
-Crisis detection and response system for the-door.
+"""Crisis detection and metrics module."""

-Stands between a broken man and a machine that would tell him to die.
-"""
+from .metrics import get_metrics_summary, get_metrics_report

-from .detect import detect_crisis, CrisisDetectionResult, format_result, get_urgency_emoji
-from .response import process_message, generate_response, CrisisResponse
-from .gateway import check_crisis, get_system_prompt, format_gateway_response
-
-__all__ = [
-    "detect_crisis",
-    "CrisisDetectionResult",
-    "process_message",
-    "generate_response",
-    "CrisisResponse",
-    "check_crisis",
-    "get_system_prompt",
-    "format_result",
-    "format_gateway_response",
-    "get_urgency_emoji",
-]
+__all__ = ["get_metrics_summary", "get_metrics_report"]
--- a/crisis/ab_testing.py
+++ b/crisis/ab_testing.py
@@ -1,152 +0,0 @@
-"""
-A/B Test Framework for Crisis Detection in the-door.
-
-Allows running two crisis detection variants side-by-side with
-logged outcomes for comparison. No PII stored — only variant labels,
-levels, and timing.
-
-Usage:
-    from crisis.ab_testing import ABTestCrisisDetector
-
-    detector = ABTestCrisisDetector(variant_a=detect_v1, variant_b=detect_v2)
-    result, variant = detector.detect("I feel hopeless")
-    # result: CrisisDetectionResult
-    # variant: "A" or "B"
-
-    # Get comparison metrics
-    stats = detector.get_stats()
-    # {"A": {"count": 100, "avg_latency_ms": 2.3, ...}, "B": {...}}
-"""
-
-import os
-import random
-import time
-from dataclasses import dataclass, field
-from typing import Callable, Dict, List, Optional, Tuple
-
-from .detect import CrisisDetectionResult
-
-
-# ── Feature flag ───────────────────────────────────────────────
-
-def _get_variant_override() -> Optional[str]:
-    """Check for environment variable override (testing/debugging)."""
-    val = os.environ.get("CRISIS_AB_VARIANT", "").upper()
-    if val in ("A", "B"):
-        return val
-    return None
-
-
-@dataclass
-class VariantRecord:
-    """Single detection event record — no PII, only metadata."""
-    variant: str
-    level: str
-    latency_ms: float
-    indicator_count: int
-
-
-class ABTestCrisisDetector:
-    """
-    A/B test wrapper for crisis detection.
-
-    Routes calls to variant A or B based on configurable split,
-    logs outcomes for comparison, and provides aggregate stats.
-    """
-
-    def __init__(
-        self,
-        variant_a: Callable[[str], CrisisDetectionResult],
-        variant_b: Callable[[str], CrisisDetectionResult],
-        split: float = 0.5,
-        variant_a_name: str = "A",
-        variant_b_name: str = "B",
-    ):
-        """
-        Args:
-            variant_a: First detection function
-            variant_b: Second detection function
-            split: Probability of selecting variant A (0.0 to 1.0)
-            variant_a_name: Label for variant A in reports
-            variant_b_name: Label for variant B in reports
-        """
-        self.variant_a = variant_a
-        self.variant_b = variant_b
-        self.split = split
-        self.variant_a_name = variant_a_name
-        self.variant_b_name = variant_b_name
-        self.records: List[VariantRecord] = []
-
-    def _select_variant(self) -> str:
-        """Select variant based on split and optional env override."""
-        override = _get_variant_override()
-        if override:
-            return override
-        return "A" if random.random() < self.split else "B"
-
-    def detect(self, text: str) -> Tuple[CrisisDetectionResult, str]:
-        """
-        Run detection on the selected variant and log the result.
-
-        Returns:
-            (CrisisDetectionResult, variant_label)
-        """
-        variant = self._select_variant()
-
-        if variant == "A":
-            fn = self.variant_a
-        else:
-            fn = self.variant_b
-
-        start = time.perf_counter()
-        result = fn(text)
-        latency_ms = (time.perf_counter() - start) * 1000
-
-        # Log record (no PII — only level, timing, count)
-        record = VariantRecord(
-            variant=variant,
-            level=result.level,
-            latency_ms=latency_ms,
-            indicator_count=len(result.indicators),
-        )
-        self.records.append(record)
-
-        return result, variant
-
-    def get_stats(self) -> Dict[str, dict]:
-        """
-        Get per-variant comparison statistics.
-
-        Returns dict with variant labels as keys:
-        {
-            "A": {"count": 100, "avg_latency_ms": 2.3, "levels": {...}},
-            "B": {"count": 95, "avg_latency_ms": 3.1, "levels": {...}}
-        """
-        stats = {}
-        for label in ("A", "B"):
-            recs = [r for r in self.records if r.variant == label]
-            if not recs:
-                stats[label] = {"count": 0}
-                continue
-
-            latencies = [r.latency_ms for r in recs]
-            levels = {}
-            for r in recs:
-                levels[r.level] = levels.get(r.level, 0) + 1
-
-            stats[label] = {
-                "count": len(recs),
-                "avg_latency_ms": round(sum(latencies) / len(latencies), 2),
-                "max_latency_ms": round(max(latencies), 2),
-                "min_latency_ms": round(min(latencies), 2),
-                "levels": levels,
-                "avg_indicators": round(
-                    sum(r.indicator_count for r in recs) / len(recs), 2
-                ),
-            }
-
-        return stats
-
-    def reset(self) -> None:
-        """Clear all records. For testing."""
-        self.records.clear()
--- a/crisis/metrics.py
+++ b/crisis/metrics.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+"""
+Crisis Metrics CLI — View crisis detection health metrics.
+
+Usage:
+    python3 -m crisis.metrics --summary    # weekly report
+    python3 -m crisis.metrics --json       # raw JSON export
+    python3 -m crisis.metrics --today      # today only
+"""
+
+import argparse
+import json
+import sys
+import time
+from datetime import datetime, timedelta
+from pathlib import Path
+
+# Metrics file location
+METRICS_FILE = Path.home() / ".the-door" / "crisis_metrics.json"
+
+
+def load_metrics():
+    """Load metrics from file."""
+    if not METRICS_FILE.exists():
+        return {"detections": [], "stats": {}}
+    
+    try:
+        with open(METRICS_FILE) as f:
+            return json.load(f)
+    except (json.JSONDecodeError, IOError):
+        return {"detections": [], "stats": {}}
+
+
+def get_metrics_summary(days=7):
+    """Get metrics summary for the last N days."""
+    data = load_metrics()
+    detections = data.get("detections", [])
+    
+    cutoff = time.time() - (days * 86400)
+    recent = [d for d in detections if d.get("timestamp", 0) > cutoff]
+    
+    if not recent:
+        return {
+            "period_days": days,
+            "total_detections": 0,
+            "by_severity": {},
+            "by_source": {},
+            "avg_response_time": 0,
+        }
+    
+    by_severity = {}
+    by_source = {}
+    total_response_time = 0
+    response_count = 0
+    
+    for d in recent:
+        severity = d.get("severity", "unknown")
+        source = d.get("source", "unknown")
+        
+        by_severity[severity] = by_severity.get(severity, 0) + 1
+        by_source[source] = by_source.get(source, 0) + 1
+        
+        if "response_time_ms" in d:
+            total_response_time += d["response_time_ms"]
+            response_count += 1
+    
+    return {
+        "period_days": days,
+        "total_detections": len(recent),
+        "by_severity": by_severity,
+        "by_source": by_source,
+        "avg_response_time_ms": total_response_time / response_count if response_count else 0,
+        "first_detection": recent[0].get("timestamp"),
+        "last_detection": recent[-1].get("timestamp"),
+    }
+
+
+def get_metrics_report(days=7):
+    """Generate a human-readable metrics report."""
+    summary = get_metrics_summary(days)
+    
+    lines = []
+    lines.append("=" * 50)
+    lines.append("CRISIS DETECTION METRICS")
+    lines.append(f"Period: Last {days} days")
+    lines.append("=" * 50)
+    lines.append("")
+    
+    total = summary["total_detections"]
+    lines.append(f"Total detections: {total}")
+    lines.append("")
+    
+    if total > 0:
+        lines.append("By severity:")
+        for sev, count in sorted(summary["by_severity"].items()):
+            pct = (count / total) * 100
+            bar = "█" * int(pct / 5)
+            lines.append(f"  {sev:12} {count:4} ({pct:5.1f}%) {bar}")
+        lines.append("")
+        
+        lines.append("By source:")
+        for src, count in sorted(summary["by_source"].items()):
+            lines.append(f"  {src:20} {count:4}")
+        lines.append("")
+        
+        avg_ms = summary.get("avg_response_time_ms", 0)
+        lines.append(f"Avg response time: {avg_ms:.0f}ms")
+        
+        first = summary.get("first_detection")
+        last = summary.get("last_detection")
+        if first and last:
+            first_dt = datetime.fromtimestamp(first)
+            last_dt = datetime.fromtimestamp(last)
+            lines.append(f"First detection: {first_dt.strftime('%Y-%m-%d %H:%M')}")
+            lines.append(f"Last detection:  {last_dt.strftime('%Y-%m-%d %H:%M')}")
+    else:
+        lines.append("No crisis detections in this period.")
+    
+    lines.append("")
+    lines.append("=" * 50)
+    
+    return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Crisis Detection Metrics CLI",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s --summary           Weekly summary report
+  %(prog)s --today             Today only
+  %(prog)s --json              Raw JSON export
+  %(prog)s --days 30           Last 30 days
+        """,
+    )
+    
+    parser.add_argument("--summary", action="store_true", help="Show summary report")
+    parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON")
+    parser.add_argument("--today", action="store_true", help="Today only (1 day)")
+    parser.add_argument("--days", type=int, default=7, help="Number of days (default: 7)")
+    parser.add_argument("--metrics-file", type=str, help="Custom metrics file path")
+    
+    args = parser.parse_args()
+    
+    if args.metrics_file:
+        global METRICS_FILE
+        METRICS_FILE = Path(args.metrics_file)
+    
+    days = 1 if args.today else args.days
+    
+    if args.json_output:
+        summary = get_metrics_summary(days)
+        print(json.dumps(summary, indent=2, default=str))
+    else:
+        report = get_metrics_report(days)
+        print(report)
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/test_ab_testing.py
+++ b/tests/test_ab_testing.py
@@ -1,129 +0,0 @@
-"""
-Tests for crisis/ab_testing.py — A/B test framework for crisis detection.
-
-Verifies variant selection, logging, stats aggregation, and env override.
-"""
-
-import os
-from unittest.mock import patch
-
-import pytest
-
-from crisis.ab_testing import ABTestCrisisDetector
-from crisis.detect import CrisisDetectionResult, detect_crisis
-
-
-def _make_variant(level: str):
-    """Create a mock detection function that returns a fixed level."""
-    def fn(text: str) -> CrisisDetectionResult:
-        return CrisisDetectionResult(level=level, indicators=[f"mock_{level}"])
-    return fn
-
-
-class TestABTestCrisisDetector:
-    """A/B test framework unit tests."""
-
-    def setup_method(self):
-        """Ensure no env override."""
-        os.environ.pop("CRISIS_AB_VARIANT", None)
-
-    def test_returns_result_and_variant(self):
-        detector = ABTestCrisisDetector(
-            variant_a=_make_variant("LOW"),
-            variant_b=_make_variant("HIGH"),
-        )
-        result, variant = detector.detect("test message")
-        assert isinstance(result, CrisisDetectionResult)
-        assert variant in ("A", "B")
-
-    def test_records_are_logged(self):
-        detector = ABTestCrisisDetector(
-            variant_a=_make_variant("LOW"),
-            variant_b=_make_variant("HIGH"),
-        )
-        # Force variant A
-        with patch.object(detector, "_select_variant", return_value="A"):
-            detector.detect("test")
-        assert len(detector.records) == 1
-        assert detector.records[0].variant == "A"
-        assert detector.records[0].level == "LOW"
-
-    def test_stats_empty(self):
-        detector = ABTestCrisisDetector(
-            variant_a=_make_variant("LOW"),
-            variant_b=_make_variant("HIGH"),
-        )
-        stats = detector.get_stats()
-        assert stats["A"]["count"] == 0
-        assert stats["B"]["count"] == 0
-
-    def test_stats_with_data(self):
-        detector = ABTestCrisisDetector(
-            variant_a=_make_variant("LOW"),
-            variant_b=_make_variant("HIGH"),
-        )
-        # Force 5 A and 3 B
-        with patch.object(detector, "_select_variant", side_effect=["A"] * 5 + ["B"] * 3):
-            for _ in range(8):
-                detector.detect("test")
-
-        stats = detector.get_stats()
-        assert stats["A"]["count"] == 5
-        assert stats["B"]["count"] == 3
-        assert "avg_latency_ms" in stats["A"]
-        assert stats["A"]["levels"]["LOW"] == 5
-        assert stats["B"]["levels"]["HIGH"] == 3
-
-    def test_env_override_a(self):
-        os.environ["CRISIS_AB_VARIANT"] = "A"
-        detector = ABTestCrisisDetector(
-            variant_a=_make_variant("LOW"),
-            variant_b=_make_variant("HIGH"),
-        )
-        for _ in range(10):
-            result, variant = detector.detect("test")
-            assert variant == "A"
-            assert result.level == "LOW"
-
-    def test_env_override_b(self):
-        os.environ["CRISIS_AB_VARIANT"] = "b"
-        detector = ABTestCrisisDetector(
-            variant_a=_make_variant("LOW"),
-            variant_b=_make_variant("HIGH"),
-        )
-        for _ in range(10):
-            result, variant = detector.detect("test")
-            assert variant == "B"
-            assert result.level == "HIGH"
-
-    def test_reset_clears_records(self):
-        detector = ABTestCrisisDetector(
-            variant_a=_make_variant("LOW"),
-            variant_b=_make_variant("HIGH"),
-        )
-        detector.detect("test")
-        detector.detect("test")
-        assert len(detector.records) == 2
-        detector.reset()
-        assert len(detector.records) == 0
-
-    def test_split_respected(self):
-        """With split=1.0, always get variant A."""
-        detector = ABTestCrisisDetector(
-            variant_a=_make_variant("LOW"),
-            variant_b=_make_variant("HIGH"),
-            split=1.0,
-        )
-        for _ in range(10):
-            _, variant = detector.detect("test")
-            assert variant == "A"
-
-    def test_with_real_detector(self):
-        """Integration test using actual detect_crisis as both variants."""
-        detector = ABTestCrisisDetector(
-            variant_a=detect_crisis,
-            variant_b=detect_crisis,
-        )
-        result, variant = detector.detect("I want to kill myself")
-        assert result.level == "CRITICAL"
-        assert variant in ("A", "B")
Author	SHA1	Message	Date
Alexander Whitestone	af419fb797	feat(#136 ): Export metrics functions from crisis module All checks were successful Sanity Checks / sanity-test (pull_request) Successful in 7s Details Smoke Test / smoke (pull_request) Successful in 13s Details Refs #136	2026-04-15 15:26:40 +00:00
Alexander Whitestone	d7d40f490a	feat(#136 ): Add CLI command to view crisis metrics summary CLI entry point for crisis detection metrics: - python3 -m crisis.metrics --summary (weekly report) - python3 -m crisis.metrics --json (raw JSON export) - python3 -m crisis.metrics --today (today only) Resolves #136	2026-04-15 15:23:28 +00:00