"""Tests for the three-strike detector. Refs: #962 """ import pytest from timmy.sovereignty.three_strike import ( CATEGORIES, STRIKE_BLOCK, STRIKE_WARNING, FalseworkChecklist, StrikeRecord, ThreeStrikeError, ThreeStrikeStore, falsework_check, ) @pytest.fixture def store(tmp_path): """Isolated store backed by a temp DB.""" return ThreeStrikeStore(db_path=tmp_path / "test_strikes.db") # ── Category constants ──────────────────────────────────────────────────────── class TestCategories: @pytest.mark.unit def test_all_categories_present(self): expected = { "vlm_prompt_edit", "game_bug_review", "parameter_tuning", "portal_adapter_creation", "deployment_step", } assert expected == CATEGORIES @pytest.mark.unit def test_strike_thresholds(self): assert STRIKE_WARNING == 2 assert STRIKE_BLOCK == 3 # ── ThreeStrikeStore ────────────────────────────────────────────────────────── class TestThreeStrikeStore: @pytest.mark.unit def test_first_strike_returns_record(self, store): record = store.record("vlm_prompt_edit", "login_button") assert isinstance(record, StrikeRecord) assert record.count == 1 assert record.blocked is False assert record.category == "vlm_prompt_edit" assert record.key == "login_button" @pytest.mark.unit def test_second_strike_count(self, store): store.record("vlm_prompt_edit", "login_button") record = store.record("vlm_prompt_edit", "login_button") assert record.count == 2 assert record.blocked is False @pytest.mark.unit def test_third_strike_raises(self, store): store.record("vlm_prompt_edit", "login_button") store.record("vlm_prompt_edit", "login_button") with pytest.raises(ThreeStrikeError) as exc_info: store.record("vlm_prompt_edit", "login_button") err = exc_info.value assert err.category == "vlm_prompt_edit" assert err.key == "login_button" assert err.count == 3 @pytest.mark.unit def test_fourth_strike_still_raises(self, store): for _ in range(3): try: store.record("deployment_step", "build_docker") except ThreeStrikeError: pass with pytest.raises(ThreeStrikeError): store.record("deployment_step", "build_docker") @pytest.mark.unit def test_different_keys_are_independent(self, store): store.record("vlm_prompt_edit", "login_button") store.record("vlm_prompt_edit", "login_button") # Different key — should not be blocked record = store.record("vlm_prompt_edit", "logout_button") assert record.count == 1 @pytest.mark.unit def test_different_categories_are_independent(self, store): store.record("vlm_prompt_edit", "foo") store.record("vlm_prompt_edit", "foo") # Different category, same key — should not be blocked record = store.record("game_bug_review", "foo") assert record.count == 1 @pytest.mark.unit def test_invalid_category_raises_value_error(self, store): with pytest.raises(ValueError, match="Unknown category"): store.record("nonexistent_category", "some_key") @pytest.mark.unit def test_metadata_stored_in_events(self, store): store.record("parameter_tuning", "learning_rate", metadata={"value": 0.01}) events = store.get_events("parameter_tuning", "learning_rate") assert len(events) == 1 assert events[0]["metadata"]["value"] == 0.01 @pytest.mark.unit def test_get_returns_none_for_missing(self, store): assert store.get("vlm_prompt_edit", "not_there") is None @pytest.mark.unit def test_get_returns_record(self, store): store.record("vlm_prompt_edit", "submit_btn") record = store.get("vlm_prompt_edit", "submit_btn") assert record is not None assert record.count == 1 @pytest.mark.unit def test_list_all_empty(self, store): assert store.list_all() == [] @pytest.mark.unit def test_list_all_returns_records(self, store): store.record("vlm_prompt_edit", "a") store.record("vlm_prompt_edit", "b") records = store.list_all() assert len(records) == 2 @pytest.mark.unit def test_list_blocked_empty_when_no_strikes(self, store): assert store.list_blocked() == [] @pytest.mark.unit def test_list_blocked_contains_blocked(self, store): for _ in range(3): try: store.record("deployment_step", "push_image") except ThreeStrikeError: pass blocked = store.list_blocked() assert len(blocked) == 1 assert blocked[0].key == "push_image" @pytest.mark.unit def test_register_automation_unblocks(self, store): for _ in range(3): try: store.record("deployment_step", "push_image") except ThreeStrikeError: pass store.register_automation("deployment_step", "push_image", "scripts/push.sh") # Should no longer raise record = store.record("deployment_step", "push_image") assert record.blocked is False assert record.automation == "scripts/push.sh" @pytest.mark.unit def test_register_automation_resets_count(self, store): for _ in range(3): try: store.record("deployment_step", "push_image") except ThreeStrikeError: pass store.register_automation("deployment_step", "push_image", "scripts/push.sh") # register_automation resets count to 0; one new record brings it to 1 new_record = store.record("deployment_step", "push_image") assert new_record.count == 1 @pytest.mark.unit def test_get_events_returns_most_recent_first(self, store): store.record("vlm_prompt_edit", "nav", metadata={"n": 1}) store.record("vlm_prompt_edit", "nav", metadata={"n": 2}) events = store.get_events("vlm_prompt_edit", "nav") assert len(events) == 2 # Most recent first assert events[0]["metadata"]["n"] == 2 @pytest.mark.unit def test_get_events_respects_limit(self, store): for i in range(5): try: store.record("vlm_prompt_edit", "el") except ThreeStrikeError: pass events = store.get_events("vlm_prompt_edit", "el", limit=2) assert len(events) == 2 # ── FalseworkChecklist ──────────────────────────────────────────────────────── class TestFalseworkChecklist: @pytest.mark.unit def test_valid_checklist_passes(self): cl = FalseworkChecklist( durable_artifact="embedding vectors", artifact_storage_path="data/embeddings.json", local_rule_or_cache="vlm_cache", will_repeat=False, sovereignty_delta="eliminates repeated call", ) assert cl.passed is True assert cl.validate() == [] @pytest.mark.unit def test_missing_artifact_fails(self): cl = FalseworkChecklist( artifact_storage_path="data/x.json", local_rule_or_cache="cache", will_repeat=False, sovereignty_delta="delta", ) errors = cl.validate() assert any("Q1" in e for e in errors) @pytest.mark.unit def test_missing_storage_path_fails(self): cl = FalseworkChecklist( durable_artifact="artifact", local_rule_or_cache="cache", will_repeat=False, sovereignty_delta="delta", ) errors = cl.validate() assert any("Q2" in e for e in errors) @pytest.mark.unit def test_will_repeat_none_fails(self): cl = FalseworkChecklist( durable_artifact="artifact", artifact_storage_path="path", local_rule_or_cache="cache", sovereignty_delta="delta", ) errors = cl.validate() assert any("Q4" in e for e in errors) @pytest.mark.unit def test_will_repeat_true_requires_elimination_strategy(self): cl = FalseworkChecklist( durable_artifact="artifact", artifact_storage_path="path", local_rule_or_cache="cache", will_repeat=True, sovereignty_delta="delta", ) errors = cl.validate() assert any("Q5" in e for e in errors) @pytest.mark.unit def test_will_repeat_false_no_elimination_needed(self): cl = FalseworkChecklist( durable_artifact="artifact", artifact_storage_path="path", local_rule_or_cache="cache", will_repeat=False, sovereignty_delta="delta", ) errors = cl.validate() assert not any("Q5" in e for e in errors) @pytest.mark.unit def test_missing_sovereignty_delta_fails(self): cl = FalseworkChecklist( durable_artifact="artifact", artifact_storage_path="path", local_rule_or_cache="cache", will_repeat=False, ) errors = cl.validate() assert any("Q6" in e for e in errors) @pytest.mark.unit def test_multiple_missing_fields(self): cl = FalseworkChecklist() errors = cl.validate() # At minimum Q1, Q2, Q3, Q4, Q6 should be flagged assert len(errors) >= 5 # ── falsework_check() helper ────────────────────────────────────────────────── class TestFalseworkCheck: @pytest.mark.unit def test_raises_on_incomplete_checklist(self): with pytest.raises(ValueError, match="Falsework Checklist incomplete"): falsework_check(FalseworkChecklist()) @pytest.mark.unit def test_passes_on_complete_checklist(self): cl = FalseworkChecklist( durable_artifact="artifact", artifact_storage_path="path", local_rule_or_cache="cache", will_repeat=False, sovereignty_delta="delta", ) falsework_check(cl) # should not raise # ── ThreeStrikeError ────────────────────────────────────────────────────────── class TestThreeStrikeError: @pytest.mark.unit def test_attributes(self): err = ThreeStrikeError("vlm_prompt_edit", "foo", 3) assert err.category == "vlm_prompt_edit" assert err.key == "foo" assert err.count == 3 @pytest.mark.unit def test_message_contains_details(self): err = ThreeStrikeError("deployment_step", "build", 4) msg = str(err) assert "deployment_step" in msg assert "build" in msg assert "4" in msg