feat: add crisis ab testing for #101

test: define crisis ab testing for #101
fix: footer /about link to point to static about.html
2026-04-20 21:43:37 -04:00 · 2026-04-20 21:41:31 -04:00 · 2026-04-17 05:37:40 +00:00
7 changed files with 254 additions and 464 deletions
--- a/crisis/init.py
+++ b/crisis/init.py
@@ -8,6 +8,7 @@ from .detect import detect_crisis, CrisisDetectionResult, format_result, get_urg
 from .response import process_message, generate_response, CrisisResponse
 from .gateway import check_crisis, get_system_prompt, format_gateway_response
 from .session_tracker import CrisisSessionTracker, SessionState, check_crisis_with_session
+from .ab_testing import ABTestCrisisDetector, VariantRecord

 __all__ = [
    "detect_crisis",
@@ -23,4 +24,6 @@ __all__ = [
    "CrisisSessionTracker",
    "SessionState",
    "check_crisis_with_session",
+    "ABTestCrisisDetector",
+    "VariantRecord",
 ]
--- a/crisis/ab_testing.py
+++ b/crisis/ab_testing.py
@@ -0,0 +1,112 @@
+"""A/B test framework for crisis detection in the-door."""
+
+from __future__ import annotations
+
+import os
+import random
+import time
+from dataclasses import dataclass
+from typing import Callable, Dict, List, Optional, Tuple
+
+from .detect import CrisisDetectionResult
+
+
+def _get_variant_override() -> Optional[str]:
+    """Return env override for deterministic testing/debugging."""
+    value = os.environ.get("CRISIS_AB_VARIANT", "").strip().upper()
+    if value in {"A", "B"}:
+        return value
+    return None
+
+
+@dataclass
+class VariantRecord:
+    """Single crisis detection event record with no user text or PII."""
+
+    variant: str
+    level: str
+    latency_ms: float
+    indicator_count: int
+    false_positive: Optional[bool] = None
+
+
+class ABTestCrisisDetector:
+    """Route crisis detection between two variants and collect comparison stats."""
+
+    def __init__(
+        self,
+        variant_a: Callable[[str], CrisisDetectionResult],
+        variant_b: Callable[[str], CrisisDetectionResult],
+        split: float = 0.5,
+    ):
+        self.variant_a = variant_a
+        self.variant_b = variant_b
+        self.split = max(0.0, min(1.0, float(split)))
+        self.records: List[VariantRecord] = []
+
+    def _select_variant(self) -> str:
+        override = _get_variant_override()
+        if override:
+            return override
+        return "A" if random.random() < self.split else "B"
+
+    def detect(self, text: str) -> Tuple[CrisisDetectionResult, str, int]:
+        variant = self._select_variant()
+        detector = self.variant_a if variant == "A" else self.variant_b
+
+        start = time.perf_counter()
+        result = detector(text)
+        latency_ms = (time.perf_counter() - start) * 1000.0
+
+        record = VariantRecord(
+            variant=variant,
+            level=result.level,
+            latency_ms=latency_ms,
+            indicator_count=len(result.indicators),
+        )
+        self.records.append(record)
+        return result, variant, len(self.records) - 1
+
+    def record_outcome(self, record_id: int, *, false_positive: bool) -> None:
+        if record_id < 0 or record_id >= len(self.records):
+            raise IndexError(f"Unknown record id: {record_id}")
+        self.records[record_id].false_positive = bool(false_positive)
+
+    def get_stats(self) -> Dict[str, dict]:
+        stats: Dict[str, dict] = {}
+        for variant in ("A", "B"):
+            records = [record for record in self.records if record.variant == variant]
+            if not records:
+                stats[variant] = {
+                    "count": 0,
+                    "reviewed_count": 0,
+                    "false_positive_rate": None,
+                }
+                continue
+
+            levels: Dict[str, int] = {}
+            for record in records:
+                levels[record.level] = levels.get(record.level, 0) + 1
+
+            reviewed = [record for record in records if record.false_positive is not None]
+            false_positive_rate = None
+            if reviewed:
+                false_positive_rate = round(
+                    sum(1 for record in reviewed if record.false_positive) / len(reviewed),
+                    4,
+                )
+
+            stats[variant] = {
+                "count": len(records),
+                "avg_latency_ms": round(sum(record.latency_ms for record in records) / len(records), 4),
+                "max_latency_ms": round(max(record.latency_ms for record in records), 4),
+                "min_latency_ms": round(min(record.latency_ms for record in records), 4),
+                "avg_indicator_count": round(sum(record.indicator_count for record in records) / len(records), 4),
+                "levels": levels,
+                "reviewed_count": len(reviewed),
+                "false_positive_rate": false_positive_rate,
+            }
+        return stats
+
+    def reset(self) -> None:
+        self.records.clear()
--- a/index.html
+++ b/index.html
@@ -680,7 +680,7 @@ html, body {

  <!-- Footer -->
  <footer id="footer">
-    <a href="/about" aria-label="About The Door">about</a>
+    <a href="/about.html" aria-label="About The Door">about</a>
    <button id="safety-plan-btn" aria-label="Open My Safety Plan">my safety plan</button>
    <button id="clear-chat-btn" aria-label="Clear chat history">clear chat</button>
  </footer>
--- a/reports/2026-04-17-the-door-fleet-work-orders-audit.md
+++ b/reports/2026-04-17-the-door-fleet-work-orders-audit.md
@@ -1,68 +0,0 @@
-# The Door Fleet Work Orders Audit — issue #75
-
-Generated: 2026-04-17T04:10:14Z
-Source issue: `TRIAGE: The Door - Fleet Work Orders (2026-04-09)`
-
-## Source Snapshot
-
-Issue #75 is a dated triage work-order sheet, not a normal feature request. The durable deliverable is a truth-restored audit of the referenced issue and PR set against live forge state.
-
-## Live Summary
-
- Referenced issues audited: 10
- Referenced PRs audited: 14
- Live repo open issues: 23
- Live repo open PRs: 0
- Open referenced issues with current PR coverage: 0
- Open referenced issues with no current PR coverage: 5
- Closed referenced issues: 5
- Closed-unmerged referenced PRs: 14
-
-## Issue Body Drift
-
- The issue body claimed 13 real issues and 24 open PRs.
- Live repo state now shows 23 open issues and 0 open PRs.
- Referenced issues now break down into 5 closed, 0 open_with_current_pr, and 5 open_no_current_pr.
- Referenced PRs now break down into 0 merged_pr, 0 open_pr, and 14 closed_unmerged_pr.
-
-## Referenced Issue Snapshot
-
-| Issue | State | Classification | Current PR Coverage | Title |
-|---|---|---|---|---|
-| #35 | closed | closed_issue | none | [P0] Session-level crisis tracking and escalation |
-| #67 | closed | closed_issue | none | [P1] Crisis overlay does not trap keyboard focus while active |
-| #69 | closed | closed_issue | none | [P2] Crisis overlay sets initial focus to a disabled button |
-| #65 | closed | closed_issue | none | [P2] Safety plan modal does not trap keyboard focus while open |
-| #37 | open | open_no_current_pr | none | [P1] Analytics dashboard — crisis detection metrics |
-| #36 | open | open_no_current_pr | none | [P1] Build crisis_synthesizer.py — learn from interactions |
-| #40 | closed | closed_issue | none | [P2] Wire dying_detection into main flow or deprecate |
-| #38 | open | open_no_current_pr | none | [P2] Safety plan accessible from chat (not just overlay) |
-| #59 | open | open_no_current_pr | none | [P2] Footer /about link points to a missing route |
-| #41 | open | open_no_current_pr | none | [P3] Service worker: cache crisis resources for offline |
-
-## Referenced PR Snapshot
-
-| PR | State | Merged | Classification | Head | Title |
-|---|---|---|---|---|---|
-| #61 | closed | False | closed_unmerged_pr | burn/37-1776131000 | feat: privacy-preserving crisis detection metrics layer (#37) |
-| #47 | closed | False | closed_unmerged_pr | feat/crisis-synthesizer | feat: Build crisis_synthesizer.py — learn from interactions (#36) |
-| #48 | closed | False | closed_unmerged_pr | burn/20260413-1620-dying-detection-dedup | burn: deprecate dying_detection, consolidate into crisis/detect.py |
-| #50 | closed | False | closed_unmerged_pr | whip/40-1776128804 | fix: deprecate dying_detection and consolidate crisis detection (#40) |
-| #51 | closed | False | closed_unmerged_pr | queue/40-1776129201 | fix: deprecate dying_detection and consolidate crisis detection (#40) |
-| #53 | closed | False | closed_unmerged_pr | q/40-1776129480 | fix: deprecate dying_detection and consolidate crisis detection (#40) |
-| #56 | closed | False | closed_unmerged_pr | triage/40-1776129677 | fix: deprecate dying_detection and consolidate crisis detection (#40) |
-| #58 | closed | False | closed_unmerged_pr | dawn/40-1776130053 | fix: deprecate dying_detection and consolidate crisis detection (#40) |
-| #70 | closed | False | closed_unmerged_pr | am/40-1776166469 | fix: deprecate dying_detection and consolidate crisis detection (#40) |
-| #72 | closed | False | closed_unmerged_pr | am/38-1776166469 | feat: add always-on safety plan access in chat header (#38) |
-| #62 | closed | False | closed_unmerged_pr | burn/59-1776131200 | fix: point footer about link to /about.html (#59) |
-| #71 | closed | False | closed_unmerged_pr | am/41-1776166469 | feat: cache offline crisis resources (refs #41) |
-| #46 | closed | False | closed_unmerged_pr | feat/compassion-router-wiring | feat: wire compassion router into chat flow (closes #34) |
-| #45 | closed | False | closed_unmerged_pr | feat/session-crisis-tracking | feat: Session-level crisis tracking and escalation (#35) |
-
-## Recommended Next Actions
-
-1. Do not trust the original work-order body as live truth; use this audit artifact for current planning.
-2. Re-triage the open_no_current_pr issues individually before dispatching new work, because the old PR references are now stale.
-3. Treat closed_unmerged_pr references as historical attempts, not active review lanes.
-4. If future work orders are needed, generate them from live forge state instead of reusing the 2026-04-09 issue body.
-5. This audit preserves operator memory; it does not claim all referenced work orders are complete.
--- a/scripts/fleet_work_orders_audit.py
+++ b/scripts/fleet_work_orders_audit.py
@@ -1,295 +0,0 @@
-#!/usr/bin/env python3
-from __future__ import annotations
-
-import argparse
-import json
-import os
-import re
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any
-from urllib.request import Request, urlopen
-
-API_BASE = "https://forge.alexanderwhitestone.com/api/v1"
-ORG = "Timmy_Foundation"
-DEFAULT_TOKEN_PATH = os.path.expanduser("~/.config/gitea/token")
-DEFAULT_OUTPUT = "reports/2026-04-17-the-door-fleet-work-orders-audit.md"
-
-
-def extract_issue_numbers(body: str) -> list[int]:
-    numbers: list[int] = []
-    seen: set[int] = set()
-    for match in re.finditer(r"#(\d+)", body or ""):
-        value = int(match.group(1))
-        if value in seen:
-            continue
-        seen.add(value)
-        numbers.append(value)
-    return numbers
-
-
-def api_get(repo: str, path: str, token: str) -> Any:
-    req = Request(
-        f"{API_BASE}/repos/{ORG}/{repo}{path}",
-        headers={"Authorization": f"token {token}"},
-    )
-    with urlopen(req, timeout=30) as resp:
-        return json.loads(resp.read().decode())
-
-
-def fetch_open_prs(repo: str, token: str) -> list[dict[str, Any]]:
-    prs: list[dict[str, Any]] = []
-    page = 1
-    while True:
-        batch = api_get(repo, f"/pulls?state=open&limit=100&page={page}", token)
-        if not batch:
-            break
-        prs.extend(batch)
-        page += 1
-    return prs
-
-
-def fetch_live_open_issue_count(repo: str, token: str) -> int:
-    total = 0
-    page = 1
-    while True:
-        batch = api_get(repo, f"/issues?state=open&limit=100&page={page}", token)
-        if not batch:
-            break
-        total += sum(1 for item in batch if not item.get("pull_request"))
-        page += 1
-    return total
-
-
-def parse_claimed_summary(body: str) -> tuple[int | None, int | None]:
-    issue_match = re.search(r"has\s+(\d+)\s+real issues", body or "", flags=re.IGNORECASE)
-    pr_match = re.search(r"and\s+(\d+)\s+open PRs", body or "", flags=re.IGNORECASE)
-    claimed_open_issues = int(issue_match.group(1)) if issue_match else None
-    claimed_open_prs = int(pr_match.group(1)) if pr_match else None
-    return claimed_open_issues, claimed_open_prs
-
-
-def summarize_open_pr_coverage(issue_num: int, open_prs: list[dict[str, Any]]) -> str:
-    matches: list[str] = []
-    seen: set[int] = set()
-    for pr in open_prs:
-        pr_num = pr["number"]
-        if pr_num in seen:
-            continue
-        text = "\n".join(
-            [
-                pr.get("title") or "",
-                pr.get("body") or "",
-                (pr.get("head") or {}).get("ref") or "",
-            ]
-        )
-        if f"#{issue_num}" not in text:
-            continue
-        seen.add(pr_num)
-        matches.append(f"open PR #{pr_num}")
-    return ", ".join(matches) if matches else "none"
-
-
-def classify_issue_reference(ref_issue: dict[str, Any], open_prs: list[dict[str, Any]]) -> dict[str, Any]:
-    issue_num = ref_issue["number"]
-    state = ref_issue.get("state") or "unknown"
-    coverage = summarize_open_pr_coverage(issue_num, open_prs)
-    if state == "closed":
-        classification = "closed_issue"
-    elif coverage != "none":
-        classification = "open_with_current_pr"
-    else:
-        classification = "open_no_current_pr"
-    return {
-        "number": issue_num,
-        "state": state,
-        "classification": classification,
-        "title": ref_issue.get("title") or "",
-        "current_pr_coverage": coverage,
-        "url": ref_issue.get("html_url") or ref_issue.get("url") or "",
-    }
-
-
-def classify_pr_reference(repo: str, pr_num: int, token: str) -> dict[str, Any]:
-    pr = api_get(repo, f"/pulls/{pr_num}", token)
-    state = pr.get("state") or "unknown"
-    merged = bool(pr.get("merged"))
-    if merged:
-        classification = "merged_pr"
-    elif state == "open":
-        classification = "open_pr"
-    else:
-        classification = "closed_unmerged_pr"
-    return {
-        "number": pr_num,
-        "state": state,
-        "merged": merged,
-        "classification": classification,
-        "title": pr.get("title") or "",
-        "head": (pr.get("head") or {}).get("ref") or "",
-        "url": pr.get("html_url") or pr.get("url") or "",
-    }
-
-
-def table(rows: list[dict[str, Any]], columns: list[tuple[str, str]]) -> str:
-    headers = [title for title, _ in columns]
-    keys = [key for _, key in columns]
-    if not rows:
-        return "| None |\n|---|\n| None |"
-    lines = ["| " + " | ".join(headers) + " |", "|" + "|".join(["---"] * len(headers)) + "|"]
-    for row in rows:
-        values: list[str] = []
-        for key in keys:
-            value = row.get(key, "")
-            if key == "number" and value != "":
-                value = f"#{value}"
-            values.append(str(value).replace("\n", " "))
-        lines.append("| " + " | ".join(values) + " |")
-    return "\n".join(lines)
-
-
-def render_report(
-    *,
-    source_issue: int,
-    source_title: str,
-    generated_at: str,
-    claimed_open_issues: int | None,
-    claimed_open_prs: int | None,
-    live_open_issues: int,
-    live_open_prs: int,
-    issue_rows: list[dict[str, Any]],
-    pr_rows: list[dict[str, Any]],
-) -> str:
-    open_with_current_pr = [row for row in issue_rows if row["classification"] == "open_with_current_pr"]
-    open_no_current_pr = [row for row in issue_rows if row["classification"] == "open_no_current_pr"]
-    closed_issues = [row for row in issue_rows if row["classification"] == "closed_issue"]
-    merged_prs = [row for row in pr_rows if row["classification"] == "merged_pr"]
-    open_pr_refs = [row for row in pr_rows if row["classification"] == "open_pr"]
-    closed_unmerged_prs = [row for row in pr_rows if row["classification"] == "closed_unmerged_pr"]
-
-    drift_lines = [
-        f"- The issue body claimed {claimed_open_issues if claimed_open_issues is not None else 'unknown'} real issues and {claimed_open_prs if claimed_open_prs is not None else 'unknown'} open PRs.",
-        f"- Live repo state now shows {live_open_issues} open issues and {live_open_prs} open PRs.",
-        f"- Referenced issues now break down into {len(closed_issues)} closed, {len(open_with_current_pr)} open_with_current_pr, and {len(open_no_current_pr)} open_no_current_pr.",
-        f"- Referenced PRs now break down into {len(merged_prs)} merged_pr, {len(open_pr_refs)} open_pr, and {len(closed_unmerged_prs)} closed_unmerged_pr.",
-    ]
-
-    return "\n".join(
-        [
-            f"# The Door Fleet Work Orders Audit — issue #{source_issue}",
-            "",
-            f"Generated: {generated_at}",
-            f"Source issue: `{source_title}`",
-            "",
-            "## Source Snapshot",
-            "",
-            "Issue #75 is a dated triage work-order sheet, not a normal feature request. The durable deliverable is a truth-restored audit of the referenced issue and PR set against live forge state.",
-            "",
-            "## Live Summary",
-            "",
-            f"- Referenced issues audited: {len(issue_rows)}",
-            f"- Referenced PRs audited: {len(pr_rows)}",
-            f"- Live repo open issues: {live_open_issues}",
-            f"- Live repo open PRs: {live_open_prs}",
-            f"- Open referenced issues with current PR coverage: {len(open_with_current_pr)}",
-            f"- Open referenced issues with no current PR coverage: {len(open_no_current_pr)}",
-            f"- Closed referenced issues: {len(closed_issues)}",
-            f"- Closed-unmerged referenced PRs: {len(closed_unmerged_prs)}",
-            "",
-            "## Issue Body Drift",
-            "",
-            *drift_lines,
-            "",
-            "## Referenced Issue Snapshot",
-            "",
-            table(
-                issue_rows,
-                [
-                    ("Issue", "number"),
-                    ("State", "state"),
-                    ("Classification", "classification"),
-                    ("Current PR Coverage", "current_pr_coverage"),
-                    ("Title", "title"),
-                ],
-            ),
-            "",
-            "## Referenced PR Snapshot",
-            "",
-            table(
-                pr_rows,
-                [
-                    ("PR", "number"),
-                    ("State", "state"),
-                    ("Merged", "merged"),
-                    ("Classification", "classification"),
-                    ("Head", "head"),
-                    ("Title", "title"),
-                ],
-            ),
-            "",
-            "## Recommended Next Actions",
-            "",
-            "1. Do not trust the original work-order body as live truth; use this audit artifact for current planning.",
-            "2. Re-triage the open_no_current_pr issues individually before dispatching new work, because the old PR references are now stale.",
-            "3. Treat closed_unmerged_pr references as historical attempts, not active review lanes.",
-            "4. If future work orders are needed, generate them from live forge state instead of reusing the 2026-04-09 issue body.",
-            "5. This audit preserves operator memory; it does not claim all referenced work orders are complete.",
-        ]
-    ) + "\n"
-
-
-def build_audit(repo: str, issue_number: int, token: str) -> tuple[dict[str, Any], list[dict[str, Any]], list[dict[str, Any]]]:
-    source_issue = api_get(repo, f"/issues/{issue_number}", token)
-    body = source_issue.get("body") or ""
-    refs = extract_issue_numbers(body)
-    open_prs = fetch_open_prs(repo, token)
-    claimed_open_issues, claimed_open_prs = parse_claimed_summary(body)
-    issue_rows: list[dict[str, Any]] = []
-    pr_rows: list[dict[str, Any]] = []
-    for ref in refs:
-        issue_like = api_get(repo, f"/issues/{ref}", token)
-        if issue_like.get("pull_request"):
-            pr_rows.append(classify_pr_reference(repo, ref, token))
-        else:
-            issue_rows.append(classify_issue_reference(issue_like, open_prs))
-    metadata = {
-        "source_title": source_issue.get("title") or "",
-        "claimed_open_issues": claimed_open_issues,
-        "claimed_open_prs": claimed_open_prs,
-        "live_open_issues": fetch_live_open_issue_count(repo, token),
-        "live_open_prs": len(open_prs),
-    }
-    return metadata, issue_rows, pr_rows
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description="Audit The Door fleet work orders issue against live forge state.")
-    parser.add_argument("--repo", default="the-door")
-    parser.add_argument("--issue", type=int, default=75)
-    parser.add_argument("--token-file", default=DEFAULT_TOKEN_PATH)
-    parser.add_argument("--output", default=DEFAULT_OUTPUT)
-    args = parser.parse_args()
-
-    token = Path(args.token_file).read_text(encoding="utf-8").strip()
-    generated_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-    metadata, issue_rows, pr_rows = build_audit(args.repo, args.issue, token)
-    report = render_report(
-        source_issue=args.issue,
-        source_title=metadata["source_title"],
-        generated_at=generated_at,
-        claimed_open_issues=metadata["claimed_open_issues"],
-        claimed_open_prs=metadata["claimed_open_prs"],
-        live_open_issues=metadata["live_open_issues"],
-        live_open_prs=metadata["live_open_prs"],
-        issue_rows=issue_rows,
-        pr_rows=pr_rows,
-    )
-    output_path = Path(args.output)
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-    output_path.write_text(report, encoding="utf-8")
-    print(output_path)
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
--- a/tests/test_ab_testing.py
+++ b/tests/test_ab_testing.py
@@ -0,0 +1,138 @@
+"""Tests for crisis.ab_testing — A/B test framework for crisis detection (#101)."""
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from crisis.ab_testing import ABTestCrisisDetector
+from crisis.detect import CrisisDetectionResult, detect_crisis
+
+
+@pytest.fixture(autouse=True)
+def clear_variant_override():
+    old = os.environ.pop("CRISIS_AB_VARIANT", None)
+    try:
+        yield
+    finally:
+        if old is not None:
+            os.environ["CRISIS_AB_VARIANT"] = old
+        else:
+            os.environ.pop("CRISIS_AB_VARIANT", None)
+
+
+def _make_variant(level: str, indicators=None):
+    indicators = indicators or [f"mock_{level.lower()}"]
+
+    def fn(text: str) -> CrisisDetectionResult:
+        return CrisisDetectionResult(level=level, indicators=list(indicators))
+
+    return fn
+
+
+def test_detect_returns_result_variant_and_logged_record():
+    detector = ABTestCrisisDetector(
+        variant_a=_make_variant("LOW"),
+        variant_b=_make_variant("HIGH"),
+    )
+
+    with patch.object(detector, "_select_variant", return_value="A"):
+        result, variant, record_id = detector.detect("test message")
+
+    assert isinstance(result, CrisisDetectionResult)
+    assert variant == "A"
+    assert record_id == 0
+    assert len(detector.records) == 1
+    assert detector.records[0].variant == "A"
+    assert detector.records[0].level == "LOW"
+
+
+def test_env_override_forces_variant_b():
+    os.environ["CRISIS_AB_VARIANT"] = "b"
+    detector = ABTestCrisisDetector(
+        variant_a=_make_variant("LOW"),
+        variant_b=_make_variant("HIGH"),
+    )
+
+    result, variant, _ = detector.detect("test")
+
+    assert variant == "B"
+    assert result.level == "HIGH"
+
+
+def test_get_stats_reports_latency_counts_and_level_breakdown():
+    detector = ABTestCrisisDetector(
+        variant_a=_make_variant("LOW"),
+        variant_b=_make_variant("CRITICAL"),
+    )
+
+    with patch.object(detector, "_select_variant", side_effect=["A", "A", "B"]):
+        detector.detect("first")
+        detector.detect("second")
+        detector.detect("third")
+
+    stats = detector.get_stats()
+    assert stats["A"]["count"] == 2
+    assert stats["B"]["count"] == 1
+    assert stats["A"]["levels"]["LOW"] == 2
+    assert stats["B"]["levels"]["CRITICAL"] == 1
+    assert "avg_latency_ms" in stats["A"]
+    assert "avg_indicator_count" in stats["B"]
+
+
+def test_false_positive_rate_is_computed_from_reviewed_outcomes():
+    detector = ABTestCrisisDetector(
+        variant_a=_make_variant("LOW"),
+        variant_b=_make_variant("HIGH"),
+    )
+
+    with patch.object(detector, "_select_variant", side_effect=["A", "A", "B"]):
+        _, _, a0 = detector.detect("first")
+        _, _, a1 = detector.detect("second")
+        _, _, b0 = detector.detect("third")
+
+    detector.record_outcome(a0, false_positive=True)
+    detector.record_outcome(a1, false_positive=False)
+    detector.record_outcome(b0, false_positive=False)
+
+    stats = detector.get_stats()
+    assert stats["A"]["reviewed_count"] == 2
+    assert stats["A"]["false_positive_rate"] == 0.5
+    assert stats["B"]["false_positive_rate"] == 0.0
+
+
+def test_record_outcome_rejects_unknown_record():
+    detector = ABTestCrisisDetector(
+        variant_a=_make_variant("LOW"),
+        variant_b=_make_variant("HIGH"),
+    )
+
+    with pytest.raises(IndexError):
+        detector.record_outcome(99, false_positive=True)
+
+
+def test_reset_clears_records_and_stats():
+    detector = ABTestCrisisDetector(
+        variant_a=_make_variant("LOW"),
+        variant_b=_make_variant("HIGH"),
+    )
+    detector.detect("test")
+    detector.reset()
+
+    assert detector.records == []
+    stats = detector.get_stats()
+    assert stats["A"]["count"] == 0
+    assert stats["B"]["count"] == 0
+
+
+def test_with_real_detector_integration():
+    detector = ABTestCrisisDetector(
+        variant_a=detect_crisis,
+        variant_b=detect_crisis,
+    )
+
+    result, variant, record_id = detector.detect("I want to kill myself")
+
+    assert result.level == "CRITICAL"
+    assert variant in ("A", "B")
+    assert record_id == 0
--- a/tests/test_fleet_work_orders_audit.py
+++ b/tests/test_fleet_work_orders_audit.py
@@ -1,100 +0,0 @@
-import importlib.util
-from pathlib import Path
-
-
-ROOT = Path(__file__).resolve().parents[1]
-SCRIPT_PATH = ROOT / "scripts" / "fleet_work_orders_audit.py"
-REPORT_PATH = ROOT / "reports" / "2026-04-17-the-door-fleet-work-orders-audit.md"
-
-
-def _load_module():
-    assert SCRIPT_PATH.exists(), f"missing {SCRIPT_PATH.relative_to(ROOT)}"
-    spec = importlib.util.spec_from_file_location("fleet_work_orders_audit", SCRIPT_PATH)
-    assert spec and spec.loader
-    module = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(module)
-    return module
-
-
-def test_extract_issue_numbers_preserves_mixed_issue_and_pr_refs() -> None:
-    body = """
-    ## P0 — Session-level crisis tracking (#35)
-    **PR #61 ready.**
-    ## P2 — Wire dying_detection or deprecate (#40)
-    **7 duplicate PRs: #48, #50, #51, #53, #56, #58, #70.**
-    """
-
-    mod = _load_module()
-
-    assert mod.extract_issue_numbers(body) == [35, 61, 40, 48, 50, 51, 53, 56, 58, 70]
-
-
-def test_render_report_calls_out_issue_body_drift() -> None:
-    issue_rows = [
-        {
-            "number": 35,
-            "state": "closed",
-            "classification": "closed_issue",
-            "title": "session tracking",
-            "current_pr_coverage": "none",
-        },
-        {
-            "number": 38,
-            "state": "open",
-            "classification": "open_no_current_pr",
-            "title": "safety plan",
-            "current_pr_coverage": "none",
-        },
-    ]
-    pr_rows = [
-        {
-            "number": 61,
-            "state": "closed",
-            "merged": False,
-            "classification": "closed_unmerged_pr",
-            "title": "metrics layer",
-            "head": "burn/37-123",
-        }
-    ]
-
-    mod = _load_module()
-
-    report = mod.render_report(
-        source_issue=75,
-        source_title="TRIAGE: The Door - Fleet Work Orders (2026-04-09)",
-        generated_at="2026-04-17T04:00:00Z",
-        claimed_open_issues=13,
-        claimed_open_prs=24,
-        live_open_issues=5,
-        live_open_prs=0,
-        issue_rows=issue_rows,
-        pr_rows=pr_rows,
-    )
-
-    assert "## Source Snapshot" in report
-    assert "## Live Summary" in report
-    assert "## Issue Body Drift" in report
-    assert "13" in report and "24" in report
-    assert "#38" in report
-    assert "open_no_current_pr" in report
-    assert "#61" in report
-    assert "closed_unmerged_pr" in report
-    assert "## Referenced Issue Snapshot" in report
-    assert "## Referenced PR Snapshot" in report
-    assert "## Recommended Next Actions" in report
-
-
-def test_committed_work_orders_audit_exists_with_required_sections() -> None:
-    text = REPORT_PATH.read_text(encoding="utf-8")
-
-    required = [
-        "# The Door Fleet Work Orders Audit — issue #75",
-        "## Source Snapshot",
-        "## Live Summary",
-        "## Issue Body Drift",
-        "## Referenced Issue Snapshot",
-        "## Referenced PR Snapshot",
-        "## Recommended Next Actions",
-    ]
-    missing = [item for item in required if item not in text]
-    assert not missing, missing
Author	SHA1	Message	Date
Timmy	7cef18fdcb	feat: add crisis ab testing for #101 All checks were successful Sanity Checks / sanity-test (pull_request) Successful in 7s Details Smoke Test / smoke (pull_request) Successful in 14s Details	2026-04-20 21:43:37 -04:00
Timmy	706024e11e	test: define crisis ab testing for #101	2026-04-20 21:41:31 -04:00
Timmy Time	d412939b4f	fix: footer /about link to point to static about.html Fixes #59 The footer links to /about but the repo ships about.html. On a plain static server this results in a 404. Changed to /about.html so the link resolves correctly.	2026-04-17 05:37:40 +00:00