Timmy-time-dashboard/tests/timmy/test_three_strike.py

"""Tests for the three-strike detector.

Refs: #962
"""

import pytest

from timmy.sovereignty.three_strike import (
    CATEGORIES,
    STRIKE_BLOCK,
    STRIKE_WARNING,
    FalseworkChecklist,
    StrikeRecord,
    ThreeStrikeError,
    ThreeStrikeStore,
    falsework_check,
)


@pytest.fixture
def store(tmp_path):
    """Isolated store backed by a temp DB."""
    return ThreeStrikeStore(db_path=tmp_path / "test_strikes.db")


# ── Category constants ────────────────────────────────────────────────────────


class TestCategories:
    @pytest.mark.unit
    def test_all_categories_present(self):
        expected = {
            "vlm_prompt_edit",
            "game_bug_review",
            "parameter_tuning",
            "portal_adapter_creation",
            "deployment_step",
        }
        assert expected == CATEGORIES

    @pytest.mark.unit
    def test_strike_thresholds(self):
        assert STRIKE_WARNING == 2
        assert STRIKE_BLOCK == 3


# ── ThreeStrikeStore ──────────────────────────────────────────────────────────


class TestThreeStrikeStore:
    @pytest.mark.unit
    def test_first_strike_returns_record(self, store):
        record = store.record("vlm_prompt_edit", "login_button")
        assert isinstance(record, StrikeRecord)
        assert record.count == 1
        assert record.blocked is False
        assert record.category == "vlm_prompt_edit"
        assert record.key == "login_button"

    @pytest.mark.unit
    def test_second_strike_count(self, store):
        store.record("vlm_prompt_edit", "login_button")
        record = store.record("vlm_prompt_edit", "login_button")
        assert record.count == 2
        assert record.blocked is False

    @pytest.mark.unit
    def test_third_strike_raises(self, store):
        store.record("vlm_prompt_edit", "login_button")
        store.record("vlm_prompt_edit", "login_button")
        with pytest.raises(ThreeStrikeError) as exc_info:
            store.record("vlm_prompt_edit", "login_button")
        err = exc_info.value
        assert err.category == "vlm_prompt_edit"
        assert err.key == "login_button"
        assert err.count == 3

    @pytest.mark.unit
    def test_fourth_strike_still_raises(self, store):
        for _ in range(3):
            try:
                store.record("deployment_step", "build_docker")
            except ThreeStrikeError:
                pass
        with pytest.raises(ThreeStrikeError):
            store.record("deployment_step", "build_docker")

    @pytest.mark.unit
    def test_different_keys_are_independent(self, store):
        store.record("vlm_prompt_edit", "login_button")
        store.record("vlm_prompt_edit", "login_button")
        # Different key — should not be blocked
        record = store.record("vlm_prompt_edit", "logout_button")
        assert record.count == 1

    @pytest.mark.unit
    def test_different_categories_are_independent(self, store):
        store.record("vlm_prompt_edit", "foo")
        store.record("vlm_prompt_edit", "foo")
        # Different category, same key — should not be blocked
        record = store.record("game_bug_review", "foo")
        assert record.count == 1

    @pytest.mark.unit
    def test_invalid_category_raises_value_error(self, store):
        with pytest.raises(ValueError, match="Unknown category"):
            store.record("nonexistent_category", "some_key")

    @pytest.mark.unit
    def test_metadata_stored_in_events(self, store):
        store.record("parameter_tuning", "learning_rate", metadata={"value": 0.01})
        events = store.get_events("parameter_tuning", "learning_rate")
        assert len(events) == 1
        assert events[0]["metadata"]["value"] == 0.01

    @pytest.mark.unit
    def test_get_returns_none_for_missing(self, store):
        assert store.get("vlm_prompt_edit", "not_there") is None

    @pytest.mark.unit
    def test_get_returns_record(self, store):
        store.record("vlm_prompt_edit", "submit_btn")
        record = store.get("vlm_prompt_edit", "submit_btn")
        assert record is not None
        assert record.count == 1

    @pytest.mark.unit
    def test_list_all_empty(self, store):
        assert store.list_all() == []

    @pytest.mark.unit
    def test_list_all_returns_records(self, store):
        store.record("vlm_prompt_edit", "a")
        store.record("vlm_prompt_edit", "b")
        records = store.list_all()
        assert len(records) == 2

    @pytest.mark.unit
    def test_list_blocked_empty_when_no_strikes(self, store):
        assert store.list_blocked() == []

    @pytest.mark.unit
    def test_list_blocked_contains_blocked(self, store):
        for _ in range(3):
            try:
                store.record("deployment_step", "push_image")
            except ThreeStrikeError:
                pass
        blocked = store.list_blocked()
        assert len(blocked) == 1
        assert blocked[0].key == "push_image"

    @pytest.mark.unit
    def test_register_automation_unblocks(self, store):
        for _ in range(3):
            try:
                store.record("deployment_step", "push_image")
            except ThreeStrikeError:
                pass

        store.register_automation("deployment_step", "push_image", "scripts/push.sh")

        # Should no longer raise
        record = store.record("deployment_step", "push_image")
        assert record.blocked is False
        assert record.automation == "scripts/push.sh"

    @pytest.mark.unit
    def test_register_automation_resets_count(self, store):
        for _ in range(3):
            try:
                store.record("deployment_step", "push_image")
            except ThreeStrikeError:
                pass

        store.register_automation("deployment_step", "push_image", "scripts/push.sh")

        # register_automation resets count to 0; one new record brings it to 1
        new_record = store.record("deployment_step", "push_image")
        assert new_record.count == 1

    @pytest.mark.unit
    def test_get_events_returns_most_recent_first(self, store):
        store.record("vlm_prompt_edit", "nav", metadata={"n": 1})
        store.record("vlm_prompt_edit", "nav", metadata={"n": 2})
        events = store.get_events("vlm_prompt_edit", "nav")
        assert len(events) == 2
        # Most recent first
        assert events[0]["metadata"]["n"] == 2

    @pytest.mark.unit
    def test_get_events_respects_limit(self, store):
        for _ in range(5):
            try:
                store.record("vlm_prompt_edit", "el")
            except ThreeStrikeError:
                pass
        events = store.get_events("vlm_prompt_edit", "el", limit=2)
        assert len(events) == 2


# ── FalseworkChecklist ────────────────────────────────────────────────────────


class TestFalseworkChecklist:
    @pytest.mark.unit
    def test_valid_checklist_passes(self):
        cl = FalseworkChecklist(
            durable_artifact="embedding vectors",
            artifact_storage_path="data/embeddings.json",
            local_rule_or_cache="vlm_cache",
            will_repeat=False,
            sovereignty_delta="eliminates repeated call",
        )
        assert cl.passed is True
        assert cl.validate() == []

    @pytest.mark.unit
    def test_missing_artifact_fails(self):
        cl = FalseworkChecklist(
            artifact_storage_path="data/x.json",
            local_rule_or_cache="cache",
            will_repeat=False,
            sovereignty_delta="delta",
        )
        errors = cl.validate()
        assert any("Q1" in e for e in errors)

    @pytest.mark.unit
    def test_missing_storage_path_fails(self):
        cl = FalseworkChecklist(
            durable_artifact="artifact",
            local_rule_or_cache="cache",
            will_repeat=False,
            sovereignty_delta="delta",
        )
        errors = cl.validate()
        assert any("Q2" in e for e in errors)

    @pytest.mark.unit
    def test_will_repeat_none_fails(self):
        cl = FalseworkChecklist(
            durable_artifact="artifact",
            artifact_storage_path="path",
            local_rule_or_cache="cache",
            sovereignty_delta="delta",
        )
        errors = cl.validate()
        assert any("Q4" in e for e in errors)

    @pytest.mark.unit
    def test_will_repeat_true_requires_elimination_strategy(self):
        cl = FalseworkChecklist(
            durable_artifact="artifact",
            artifact_storage_path="path",
            local_rule_or_cache="cache",
            will_repeat=True,
            sovereignty_delta="delta",
        )
        errors = cl.validate()
        assert any("Q5" in e for e in errors)

    @pytest.mark.unit
    def test_will_repeat_false_no_elimination_needed(self):
        cl = FalseworkChecklist(
            durable_artifact="artifact",
            artifact_storage_path="path",
            local_rule_or_cache="cache",
            will_repeat=False,
            sovereignty_delta="delta",
        )
        errors = cl.validate()
        assert not any("Q5" in e for e in errors)

    @pytest.mark.unit
    def test_missing_sovereignty_delta_fails(self):
        cl = FalseworkChecklist(
            durable_artifact="artifact",
            artifact_storage_path="path",
            local_rule_or_cache="cache",
            will_repeat=False,
        )
        errors = cl.validate()
        assert any("Q6" in e for e in errors)

    @pytest.mark.unit
    def test_multiple_missing_fields(self):
        cl = FalseworkChecklist()
        errors = cl.validate()
        # At minimum Q1, Q2, Q3, Q4, Q6 should be flagged
        assert len(errors) >= 5


# ── falsework_check() helper ──────────────────────────────────────────────────


class TestFalseworkCheck:
    @pytest.mark.unit
    def test_raises_on_incomplete_checklist(self):
        with pytest.raises(ValueError, match="Falsework Checklist incomplete"):
            falsework_check(FalseworkChecklist())

    @pytest.mark.unit
    def test_passes_on_complete_checklist(self):
        cl = FalseworkChecklist(
            durable_artifact="artifact",
            artifact_storage_path="path",
            local_rule_or_cache="cache",
            will_repeat=False,
            sovereignty_delta="delta",
        )
        falsework_check(cl)  # should not raise


# ── ThreeStrikeError ──────────────────────────────────────────────────────────


class TestThreeStrikeError:
    @pytest.mark.unit
    def test_attributes(self):
        err = ThreeStrikeError("vlm_prompt_edit", "foo", 3)
        assert err.category == "vlm_prompt_edit"
        assert err.key == "foo"
        assert err.count == 3

    @pytest.mark.unit
    def test_message_contains_details(self):
        err = ThreeStrikeError("deployment_step", "build", 4)
        msg = str(err)
        assert "deployment_step" in msg
        assert "build" in msg
        assert "4" in msg