333 lines
11 KiB
Python
333 lines
11 KiB
Python
"""Tests for the three-strike detector.
|
|
|
|
Refs: #962
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from timmy.sovereignty.three_strike import (
|
|
CATEGORIES,
|
|
STRIKE_BLOCK,
|
|
STRIKE_WARNING,
|
|
FalseworkChecklist,
|
|
StrikeRecord,
|
|
ThreeStrikeError,
|
|
ThreeStrikeStore,
|
|
falsework_check,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def store(tmp_path):
|
|
"""Isolated store backed by a temp DB."""
|
|
return ThreeStrikeStore(db_path=tmp_path / "test_strikes.db")
|
|
|
|
|
|
# ── Category constants ────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestCategories:
|
|
@pytest.mark.unit
|
|
def test_all_categories_present(self):
|
|
expected = {
|
|
"vlm_prompt_edit",
|
|
"game_bug_review",
|
|
"parameter_tuning",
|
|
"portal_adapter_creation",
|
|
"deployment_step",
|
|
}
|
|
assert expected == CATEGORIES
|
|
|
|
@pytest.mark.unit
|
|
def test_strike_thresholds(self):
|
|
assert STRIKE_WARNING == 2
|
|
assert STRIKE_BLOCK == 3
|
|
|
|
|
|
# ── ThreeStrikeStore ──────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestThreeStrikeStore:
|
|
@pytest.mark.unit
|
|
def test_first_strike_returns_record(self, store):
|
|
record = store.record("vlm_prompt_edit", "login_button")
|
|
assert isinstance(record, StrikeRecord)
|
|
assert record.count == 1
|
|
assert record.blocked is False
|
|
assert record.category == "vlm_prompt_edit"
|
|
assert record.key == "login_button"
|
|
|
|
@pytest.mark.unit
|
|
def test_second_strike_count(self, store):
|
|
store.record("vlm_prompt_edit", "login_button")
|
|
record = store.record("vlm_prompt_edit", "login_button")
|
|
assert record.count == 2
|
|
assert record.blocked is False
|
|
|
|
@pytest.mark.unit
|
|
def test_third_strike_raises(self, store):
|
|
store.record("vlm_prompt_edit", "login_button")
|
|
store.record("vlm_prompt_edit", "login_button")
|
|
with pytest.raises(ThreeStrikeError) as exc_info:
|
|
store.record("vlm_prompt_edit", "login_button")
|
|
err = exc_info.value
|
|
assert err.category == "vlm_prompt_edit"
|
|
assert err.key == "login_button"
|
|
assert err.count == 3
|
|
|
|
@pytest.mark.unit
|
|
def test_fourth_strike_still_raises(self, store):
|
|
for _ in range(3):
|
|
try:
|
|
store.record("deployment_step", "build_docker")
|
|
except ThreeStrikeError:
|
|
pass
|
|
with pytest.raises(ThreeStrikeError):
|
|
store.record("deployment_step", "build_docker")
|
|
|
|
@pytest.mark.unit
|
|
def test_different_keys_are_independent(self, store):
|
|
store.record("vlm_prompt_edit", "login_button")
|
|
store.record("vlm_prompt_edit", "login_button")
|
|
# Different key — should not be blocked
|
|
record = store.record("vlm_prompt_edit", "logout_button")
|
|
assert record.count == 1
|
|
|
|
@pytest.mark.unit
|
|
def test_different_categories_are_independent(self, store):
|
|
store.record("vlm_prompt_edit", "foo")
|
|
store.record("vlm_prompt_edit", "foo")
|
|
# Different category, same key — should not be blocked
|
|
record = store.record("game_bug_review", "foo")
|
|
assert record.count == 1
|
|
|
|
@pytest.mark.unit
|
|
def test_invalid_category_raises_value_error(self, store):
|
|
with pytest.raises(ValueError, match="Unknown category"):
|
|
store.record("nonexistent_category", "some_key")
|
|
|
|
@pytest.mark.unit
|
|
def test_metadata_stored_in_events(self, store):
|
|
store.record("parameter_tuning", "learning_rate", metadata={"value": 0.01})
|
|
events = store.get_events("parameter_tuning", "learning_rate")
|
|
assert len(events) == 1
|
|
assert events[0]["metadata"]["value"] == 0.01
|
|
|
|
@pytest.mark.unit
|
|
def test_get_returns_none_for_missing(self, store):
|
|
assert store.get("vlm_prompt_edit", "not_there") is None
|
|
|
|
@pytest.mark.unit
|
|
def test_get_returns_record(self, store):
|
|
store.record("vlm_prompt_edit", "submit_btn")
|
|
record = store.get("vlm_prompt_edit", "submit_btn")
|
|
assert record is not None
|
|
assert record.count == 1
|
|
|
|
@pytest.mark.unit
|
|
def test_list_all_empty(self, store):
|
|
assert store.list_all() == []
|
|
|
|
@pytest.mark.unit
|
|
def test_list_all_returns_records(self, store):
|
|
store.record("vlm_prompt_edit", "a")
|
|
store.record("vlm_prompt_edit", "b")
|
|
records = store.list_all()
|
|
assert len(records) == 2
|
|
|
|
@pytest.mark.unit
|
|
def test_list_blocked_empty_when_no_strikes(self, store):
|
|
assert store.list_blocked() == []
|
|
|
|
@pytest.mark.unit
|
|
def test_list_blocked_contains_blocked(self, store):
|
|
for _ in range(3):
|
|
try:
|
|
store.record("deployment_step", "push_image")
|
|
except ThreeStrikeError:
|
|
pass
|
|
blocked = store.list_blocked()
|
|
assert len(blocked) == 1
|
|
assert blocked[0].key == "push_image"
|
|
|
|
@pytest.mark.unit
|
|
def test_register_automation_unblocks(self, store):
|
|
for _ in range(3):
|
|
try:
|
|
store.record("deployment_step", "push_image")
|
|
except ThreeStrikeError:
|
|
pass
|
|
|
|
store.register_automation("deployment_step", "push_image", "scripts/push.sh")
|
|
|
|
# Should no longer raise
|
|
record = store.record("deployment_step", "push_image")
|
|
assert record.blocked is False
|
|
assert record.automation == "scripts/push.sh"
|
|
|
|
@pytest.mark.unit
|
|
def test_register_automation_resets_count(self, store):
|
|
for _ in range(3):
|
|
try:
|
|
store.record("deployment_step", "push_image")
|
|
except ThreeStrikeError:
|
|
pass
|
|
|
|
store.register_automation("deployment_step", "push_image", "scripts/push.sh")
|
|
|
|
# register_automation resets count to 0; one new record brings it to 1
|
|
new_record = store.record("deployment_step", "push_image")
|
|
assert new_record.count == 1
|
|
|
|
@pytest.mark.unit
|
|
def test_get_events_returns_most_recent_first(self, store):
|
|
store.record("vlm_prompt_edit", "nav", metadata={"n": 1})
|
|
store.record("vlm_prompt_edit", "nav", metadata={"n": 2})
|
|
events = store.get_events("vlm_prompt_edit", "nav")
|
|
assert len(events) == 2
|
|
# Most recent first
|
|
assert events[0]["metadata"]["n"] == 2
|
|
|
|
@pytest.mark.unit
|
|
def test_get_events_respects_limit(self, store):
|
|
for _ in range(5):
|
|
try:
|
|
store.record("vlm_prompt_edit", "el")
|
|
except ThreeStrikeError:
|
|
pass
|
|
events = store.get_events("vlm_prompt_edit", "el", limit=2)
|
|
assert len(events) == 2
|
|
|
|
|
|
# ── FalseworkChecklist ────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestFalseworkChecklist:
|
|
@pytest.mark.unit
|
|
def test_valid_checklist_passes(self):
|
|
cl = FalseworkChecklist(
|
|
durable_artifact="embedding vectors",
|
|
artifact_storage_path="data/embeddings.json",
|
|
local_rule_or_cache="vlm_cache",
|
|
will_repeat=False,
|
|
sovereignty_delta="eliminates repeated call",
|
|
)
|
|
assert cl.passed is True
|
|
assert cl.validate() == []
|
|
|
|
@pytest.mark.unit
|
|
def test_missing_artifact_fails(self):
|
|
cl = FalseworkChecklist(
|
|
artifact_storage_path="data/x.json",
|
|
local_rule_or_cache="cache",
|
|
will_repeat=False,
|
|
sovereignty_delta="delta",
|
|
)
|
|
errors = cl.validate()
|
|
assert any("Q1" in e for e in errors)
|
|
|
|
@pytest.mark.unit
|
|
def test_missing_storage_path_fails(self):
|
|
cl = FalseworkChecklist(
|
|
durable_artifact="artifact",
|
|
local_rule_or_cache="cache",
|
|
will_repeat=False,
|
|
sovereignty_delta="delta",
|
|
)
|
|
errors = cl.validate()
|
|
assert any("Q2" in e for e in errors)
|
|
|
|
@pytest.mark.unit
|
|
def test_will_repeat_none_fails(self):
|
|
cl = FalseworkChecklist(
|
|
durable_artifact="artifact",
|
|
artifact_storage_path="path",
|
|
local_rule_or_cache="cache",
|
|
sovereignty_delta="delta",
|
|
)
|
|
errors = cl.validate()
|
|
assert any("Q4" in e for e in errors)
|
|
|
|
@pytest.mark.unit
|
|
def test_will_repeat_true_requires_elimination_strategy(self):
|
|
cl = FalseworkChecklist(
|
|
durable_artifact="artifact",
|
|
artifact_storage_path="path",
|
|
local_rule_or_cache="cache",
|
|
will_repeat=True,
|
|
sovereignty_delta="delta",
|
|
)
|
|
errors = cl.validate()
|
|
assert any("Q5" in e for e in errors)
|
|
|
|
@pytest.mark.unit
|
|
def test_will_repeat_false_no_elimination_needed(self):
|
|
cl = FalseworkChecklist(
|
|
durable_artifact="artifact",
|
|
artifact_storage_path="path",
|
|
local_rule_or_cache="cache",
|
|
will_repeat=False,
|
|
sovereignty_delta="delta",
|
|
)
|
|
errors = cl.validate()
|
|
assert not any("Q5" in e for e in errors)
|
|
|
|
@pytest.mark.unit
|
|
def test_missing_sovereignty_delta_fails(self):
|
|
cl = FalseworkChecklist(
|
|
durable_artifact="artifact",
|
|
artifact_storage_path="path",
|
|
local_rule_or_cache="cache",
|
|
will_repeat=False,
|
|
)
|
|
errors = cl.validate()
|
|
assert any("Q6" in e for e in errors)
|
|
|
|
@pytest.mark.unit
|
|
def test_multiple_missing_fields(self):
|
|
cl = FalseworkChecklist()
|
|
errors = cl.validate()
|
|
# At minimum Q1, Q2, Q3, Q4, Q6 should be flagged
|
|
assert len(errors) >= 5
|
|
|
|
|
|
# ── falsework_check() helper ──────────────────────────────────────────────────
|
|
|
|
|
|
class TestFalseworkCheck:
|
|
@pytest.mark.unit
|
|
def test_raises_on_incomplete_checklist(self):
|
|
with pytest.raises(ValueError, match="Falsework Checklist incomplete"):
|
|
falsework_check(FalseworkChecklist())
|
|
|
|
@pytest.mark.unit
|
|
def test_passes_on_complete_checklist(self):
|
|
cl = FalseworkChecklist(
|
|
durable_artifact="artifact",
|
|
artifact_storage_path="path",
|
|
local_rule_or_cache="cache",
|
|
will_repeat=False,
|
|
sovereignty_delta="delta",
|
|
)
|
|
falsework_check(cl) # should not raise
|
|
|
|
|
|
# ── ThreeStrikeError ──────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestThreeStrikeError:
|
|
@pytest.mark.unit
|
|
def test_attributes(self):
|
|
err = ThreeStrikeError("vlm_prompt_edit", "foo", 3)
|
|
assert err.category == "vlm_prompt_edit"
|
|
assert err.key == "foo"
|
|
assert err.count == 3
|
|
|
|
@pytest.mark.unit
|
|
def test_message_contains_details(self):
|
|
err = ThreeStrikeError("deployment_step", "build", 4)
|
|
msg = str(err)
|
|
assert "deployment_step" in msg
|
|
assert "build" in msg
|
|
assert "4" in msg
|