Files
Timmy-time-dashboard/tests/timmy/test_three_strike.py
Claude (Opus 4.6) b5fb6a85cf
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
[claude] Fix pre-existing ruff lint errors blocking git hooks (#1247) (#1248)
2026-03-23 23:33:37 +00:00

333 lines
11 KiB
Python

"""Tests for the three-strike detector.
Refs: #962
"""
import pytest
from timmy.sovereignty.three_strike import (
CATEGORIES,
STRIKE_BLOCK,
STRIKE_WARNING,
FalseworkChecklist,
StrikeRecord,
ThreeStrikeError,
ThreeStrikeStore,
falsework_check,
)
@pytest.fixture
def store(tmp_path):
"""Isolated store backed by a temp DB."""
return ThreeStrikeStore(db_path=tmp_path / "test_strikes.db")
# ── Category constants ────────────────────────────────────────────────────────
class TestCategories:
@pytest.mark.unit
def test_all_categories_present(self):
expected = {
"vlm_prompt_edit",
"game_bug_review",
"parameter_tuning",
"portal_adapter_creation",
"deployment_step",
}
assert expected == CATEGORIES
@pytest.mark.unit
def test_strike_thresholds(self):
assert STRIKE_WARNING == 2
assert STRIKE_BLOCK == 3
# ── ThreeStrikeStore ──────────────────────────────────────────────────────────
class TestThreeStrikeStore:
@pytest.mark.unit
def test_first_strike_returns_record(self, store):
record = store.record("vlm_prompt_edit", "login_button")
assert isinstance(record, StrikeRecord)
assert record.count == 1
assert record.blocked is False
assert record.category == "vlm_prompt_edit"
assert record.key == "login_button"
@pytest.mark.unit
def test_second_strike_count(self, store):
store.record("vlm_prompt_edit", "login_button")
record = store.record("vlm_prompt_edit", "login_button")
assert record.count == 2
assert record.blocked is False
@pytest.mark.unit
def test_third_strike_raises(self, store):
store.record("vlm_prompt_edit", "login_button")
store.record("vlm_prompt_edit", "login_button")
with pytest.raises(ThreeStrikeError) as exc_info:
store.record("vlm_prompt_edit", "login_button")
err = exc_info.value
assert err.category == "vlm_prompt_edit"
assert err.key == "login_button"
assert err.count == 3
@pytest.mark.unit
def test_fourth_strike_still_raises(self, store):
for _ in range(3):
try:
store.record("deployment_step", "build_docker")
except ThreeStrikeError:
pass
with pytest.raises(ThreeStrikeError):
store.record("deployment_step", "build_docker")
@pytest.mark.unit
def test_different_keys_are_independent(self, store):
store.record("vlm_prompt_edit", "login_button")
store.record("vlm_prompt_edit", "login_button")
# Different key — should not be blocked
record = store.record("vlm_prompt_edit", "logout_button")
assert record.count == 1
@pytest.mark.unit
def test_different_categories_are_independent(self, store):
store.record("vlm_prompt_edit", "foo")
store.record("vlm_prompt_edit", "foo")
# Different category, same key — should not be blocked
record = store.record("game_bug_review", "foo")
assert record.count == 1
@pytest.mark.unit
def test_invalid_category_raises_value_error(self, store):
with pytest.raises(ValueError, match="Unknown category"):
store.record("nonexistent_category", "some_key")
@pytest.mark.unit
def test_metadata_stored_in_events(self, store):
store.record("parameter_tuning", "learning_rate", metadata={"value": 0.01})
events = store.get_events("parameter_tuning", "learning_rate")
assert len(events) == 1
assert events[0]["metadata"]["value"] == 0.01
@pytest.mark.unit
def test_get_returns_none_for_missing(self, store):
assert store.get("vlm_prompt_edit", "not_there") is None
@pytest.mark.unit
def test_get_returns_record(self, store):
store.record("vlm_prompt_edit", "submit_btn")
record = store.get("vlm_prompt_edit", "submit_btn")
assert record is not None
assert record.count == 1
@pytest.mark.unit
def test_list_all_empty(self, store):
assert store.list_all() == []
@pytest.mark.unit
def test_list_all_returns_records(self, store):
store.record("vlm_prompt_edit", "a")
store.record("vlm_prompt_edit", "b")
records = store.list_all()
assert len(records) == 2
@pytest.mark.unit
def test_list_blocked_empty_when_no_strikes(self, store):
assert store.list_blocked() == []
@pytest.mark.unit
def test_list_blocked_contains_blocked(self, store):
for _ in range(3):
try:
store.record("deployment_step", "push_image")
except ThreeStrikeError:
pass
blocked = store.list_blocked()
assert len(blocked) == 1
assert blocked[0].key == "push_image"
@pytest.mark.unit
def test_register_automation_unblocks(self, store):
for _ in range(3):
try:
store.record("deployment_step", "push_image")
except ThreeStrikeError:
pass
store.register_automation("deployment_step", "push_image", "scripts/push.sh")
# Should no longer raise
record = store.record("deployment_step", "push_image")
assert record.blocked is False
assert record.automation == "scripts/push.sh"
@pytest.mark.unit
def test_register_automation_resets_count(self, store):
for _ in range(3):
try:
store.record("deployment_step", "push_image")
except ThreeStrikeError:
pass
store.register_automation("deployment_step", "push_image", "scripts/push.sh")
# register_automation resets count to 0; one new record brings it to 1
new_record = store.record("deployment_step", "push_image")
assert new_record.count == 1
@pytest.mark.unit
def test_get_events_returns_most_recent_first(self, store):
store.record("vlm_prompt_edit", "nav", metadata={"n": 1})
store.record("vlm_prompt_edit", "nav", metadata={"n": 2})
events = store.get_events("vlm_prompt_edit", "nav")
assert len(events) == 2
# Most recent first
assert events[0]["metadata"]["n"] == 2
@pytest.mark.unit
def test_get_events_respects_limit(self, store):
for _ in range(5):
try:
store.record("vlm_prompt_edit", "el")
except ThreeStrikeError:
pass
events = store.get_events("vlm_prompt_edit", "el", limit=2)
assert len(events) == 2
# ── FalseworkChecklist ────────────────────────────────────────────────────────
class TestFalseworkChecklist:
@pytest.mark.unit
def test_valid_checklist_passes(self):
cl = FalseworkChecklist(
durable_artifact="embedding vectors",
artifact_storage_path="data/embeddings.json",
local_rule_or_cache="vlm_cache",
will_repeat=False,
sovereignty_delta="eliminates repeated call",
)
assert cl.passed is True
assert cl.validate() == []
@pytest.mark.unit
def test_missing_artifact_fails(self):
cl = FalseworkChecklist(
artifact_storage_path="data/x.json",
local_rule_or_cache="cache",
will_repeat=False,
sovereignty_delta="delta",
)
errors = cl.validate()
assert any("Q1" in e for e in errors)
@pytest.mark.unit
def test_missing_storage_path_fails(self):
cl = FalseworkChecklist(
durable_artifact="artifact",
local_rule_or_cache="cache",
will_repeat=False,
sovereignty_delta="delta",
)
errors = cl.validate()
assert any("Q2" in e for e in errors)
@pytest.mark.unit
def test_will_repeat_none_fails(self):
cl = FalseworkChecklist(
durable_artifact="artifact",
artifact_storage_path="path",
local_rule_or_cache="cache",
sovereignty_delta="delta",
)
errors = cl.validate()
assert any("Q4" in e for e in errors)
@pytest.mark.unit
def test_will_repeat_true_requires_elimination_strategy(self):
cl = FalseworkChecklist(
durable_artifact="artifact",
artifact_storage_path="path",
local_rule_or_cache="cache",
will_repeat=True,
sovereignty_delta="delta",
)
errors = cl.validate()
assert any("Q5" in e for e in errors)
@pytest.mark.unit
def test_will_repeat_false_no_elimination_needed(self):
cl = FalseworkChecklist(
durable_artifact="artifact",
artifact_storage_path="path",
local_rule_or_cache="cache",
will_repeat=False,
sovereignty_delta="delta",
)
errors = cl.validate()
assert not any("Q5" in e for e in errors)
@pytest.mark.unit
def test_missing_sovereignty_delta_fails(self):
cl = FalseworkChecklist(
durable_artifact="artifact",
artifact_storage_path="path",
local_rule_or_cache="cache",
will_repeat=False,
)
errors = cl.validate()
assert any("Q6" in e for e in errors)
@pytest.mark.unit
def test_multiple_missing_fields(self):
cl = FalseworkChecklist()
errors = cl.validate()
# At minimum Q1, Q2, Q3, Q4, Q6 should be flagged
assert len(errors) >= 5
# ── falsework_check() helper ──────────────────────────────────────────────────
class TestFalseworkCheck:
@pytest.mark.unit
def test_raises_on_incomplete_checklist(self):
with pytest.raises(ValueError, match="Falsework Checklist incomplete"):
falsework_check(FalseworkChecklist())
@pytest.mark.unit
def test_passes_on_complete_checklist(self):
cl = FalseworkChecklist(
durable_artifact="artifact",
artifact_storage_path="path",
local_rule_or_cache="cache",
will_repeat=False,
sovereignty_delta="delta",
)
falsework_check(cl) # should not raise
# ── ThreeStrikeError ──────────────────────────────────────────────────────────
class TestThreeStrikeError:
@pytest.mark.unit
def test_attributes(self):
err = ThreeStrikeError("vlm_prompt_edit", "foo", 3)
assert err.category == "vlm_prompt_edit"
assert err.key == "foo"
assert err.count == 3
@pytest.mark.unit
def test_message_contains_details(self):
err = ThreeStrikeError("deployment_step", "build", 4)
msg = str(err)
assert "deployment_step" in msg
assert "build" in msg
assert "4" in msg