Implements the missing pieces of the Sovereignty Loop governing architecture: ## New Modules - **auto_crystallizer.py** (#961): Extracts durable local rules from LLM reasoning chains. Regex-based pattern extraction for threshold checks, comparisons, choice-reason patterns. RuleStore with JSON persistence, confidence tracking, and success-rate gating. - **sovereignty_loop.py**: Core orchestration implementing the canonical pattern: check cache → miss → infer → crystallize → return. Provides sovereign_perceive(), sovereign_decide(), sovereign_narrate() wrappers and a @sovereignty_enforced decorator for general use. - **graduation.py**: Five-condition graduation test runner evaluating perception/decision/narration independence, economic independence (sats earned > spent), and operational independence (24h uptime). Generates markdown reports and persists to JSON. - **graduation.py route**: Dashboard API endpoint for running graduation tests via GET /sovereignty/graduation/test. ## Enhanced Modules - **perception_cache.py** (#955): Replaced placeholder crystallize_perception() with a working implementation that extracts OpenCV templates from VLM bounding-box responses. Added .npy image persistence, bbox tracking, metadata support, and robust error handling. - **__init__.py**: Updated docstring and exports to document the full sovereignty subsystem. ## Tests (60 new/updated, all passing) - test_auto_crystallizer.py: 17 tests covering rule extraction, RuleStore CRUD, persistence, confidence tracking, and matching - test_sovereignty_loop.py: 9 tests covering all three layers + decorator - test_graduation.py: 11 tests covering conditions, reports, persistence - test_perception_cache.py: Updated 3 tests for new image persistence ## Documentation - docs/SOVEREIGNTY_INTEGRATION.md: Integration guide with code examples for all sovereignty modules, module map, and API reference Fixes #953
239 lines
8.6 KiB
Python
239 lines
8.6 KiB
Python
"""Tests for the auto-crystallizer module.
|
|
|
|
Refs: #961, #953
|
|
"""
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestCrystallizeReasoning:
|
|
"""Tests for rule extraction from LLM reasoning chains."""
|
|
|
|
def test_extracts_threshold_rule(self):
|
|
"""Extracts threshold-based rules from reasoning text."""
|
|
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
|
|
|
|
reasoning = "I chose to heal because health was below 30%. So I used a healing potion."
|
|
rules = crystallize_reasoning(reasoning)
|
|
assert len(rules) >= 1
|
|
# Should detect the threshold pattern
|
|
found = any("health" in r.condition.lower() and "30" in r.condition for r in rules)
|
|
assert found, f"Expected threshold rule, got: {[r.condition for r in rules]}"
|
|
|
|
def test_extracts_comparison_rule(self):
|
|
"""Extracts comparison operators from reasoning."""
|
|
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
|
|
|
|
reasoning = "The stamina_pct < 20 so I decided to rest."
|
|
rules = crystallize_reasoning(reasoning)
|
|
assert len(rules) >= 1
|
|
found = any("stamina_pct" in r.condition and "<" in r.condition for r in rules)
|
|
assert found, f"Expected comparison rule, got: {[r.condition for r in rules]}"
|
|
|
|
def test_extracts_choice_reason_rule(self):
|
|
"""Extracts 'chose X because Y' patterns."""
|
|
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
|
|
|
|
reasoning = "I chose retreat because the enemy outnumbered us."
|
|
rules = crystallize_reasoning(reasoning)
|
|
assert len(rules) >= 1
|
|
found = any(r.action == "retreat" for r in rules)
|
|
assert found, f"Expected 'retreat' action, got: {[r.action for r in rules]}"
|
|
|
|
def test_deduplicates_rules(self):
|
|
"""Same pattern extracted once, not twice."""
|
|
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
|
|
|
|
reasoning = (
|
|
"I chose heal because health was below 30%. Again, health was below 30% so I healed."
|
|
)
|
|
rules = crystallize_reasoning(reasoning)
|
|
ids = [r.id for r in rules]
|
|
# Duplicate condition+action should produce same ID
|
|
assert len(ids) == len(set(ids)), "Duplicate rules detected"
|
|
|
|
def test_empty_reasoning_returns_no_rules(self):
|
|
"""Empty or unstructured text produces no rules."""
|
|
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
|
|
|
|
rules = crystallize_reasoning("")
|
|
assert rules == []
|
|
|
|
rules = crystallize_reasoning("The weather is nice today.")
|
|
assert rules == []
|
|
|
|
def test_rule_has_excerpt(self):
|
|
"""Extracted rules include a reasoning excerpt for provenance."""
|
|
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
|
|
|
|
reasoning = "I chose attack because the enemy health was below 50%."
|
|
rules = crystallize_reasoning(reasoning)
|
|
assert len(rules) >= 1
|
|
assert rules[0].reasoning_excerpt != ""
|
|
|
|
def test_context_stored_in_metadata(self):
|
|
"""Context dict is stored in rule metadata."""
|
|
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
|
|
|
|
context = {"game": "morrowind", "location": "balmora"}
|
|
reasoning = "I chose to trade because gold_amount > 100."
|
|
rules = crystallize_reasoning(reasoning, context=context)
|
|
assert len(rules) >= 1
|
|
assert rules[0].metadata.get("game") == "morrowind"
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestRule:
|
|
"""Tests for the Rule dataclass."""
|
|
|
|
def test_initial_state(self):
|
|
"""New rules start with default confidence and no applications."""
|
|
from timmy.sovereignty.auto_crystallizer import Rule
|
|
|
|
rule = Rule(id="test", condition="hp < 30", action="heal")
|
|
assert rule.confidence == 0.5
|
|
assert rule.times_applied == 0
|
|
assert rule.times_succeeded == 0
|
|
assert not rule.is_reliable
|
|
|
|
def test_success_rate(self):
|
|
"""Success rate is calculated correctly."""
|
|
from timmy.sovereignty.auto_crystallizer import Rule
|
|
|
|
rule = Rule(id="test", condition="hp < 30", action="heal")
|
|
rule.times_applied = 10
|
|
rule.times_succeeded = 8
|
|
assert rule.success_rate == 0.8
|
|
|
|
def test_is_reliable(self):
|
|
"""Rule becomes reliable with high confidence + enough applications."""
|
|
from timmy.sovereignty.auto_crystallizer import Rule
|
|
|
|
rule = Rule(
|
|
id="test",
|
|
condition="hp < 30",
|
|
action="heal",
|
|
confidence=0.85,
|
|
times_applied=5,
|
|
times_succeeded=4,
|
|
)
|
|
assert rule.is_reliable
|
|
|
|
def test_not_reliable_low_confidence(self):
|
|
"""Rule is not reliable with low confidence."""
|
|
from timmy.sovereignty.auto_crystallizer import Rule
|
|
|
|
rule = Rule(
|
|
id="test",
|
|
condition="hp < 30",
|
|
action="heal",
|
|
confidence=0.5,
|
|
times_applied=10,
|
|
times_succeeded=8,
|
|
)
|
|
assert not rule.is_reliable
|
|
|
|
|
|
@pytest.mark.unit
|
|
class TestRuleStore:
|
|
"""Tests for the RuleStore persistence layer."""
|
|
|
|
def test_add_and_retrieve(self, tmp_path):
|
|
"""Rules can be added and retrieved."""
|
|
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
|
|
|
|
store = RuleStore(path=tmp_path / "strategy.json")
|
|
rule = Rule(id="r1", condition="hp < 30", action="heal")
|
|
store.add(rule)
|
|
|
|
retrieved = store.get("r1")
|
|
assert retrieved is not None
|
|
assert retrieved.condition == "hp < 30"
|
|
|
|
def test_persist_and_reload(self, tmp_path):
|
|
"""Rules survive persist → reload cycle."""
|
|
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
|
|
|
|
path = tmp_path / "strategy.json"
|
|
store = RuleStore(path=path)
|
|
store.add(Rule(id="r1", condition="hp < 30", action="heal"))
|
|
store.add(Rule(id="r2", condition="mana > 50", action="cast"))
|
|
|
|
# Create a new store from the same file
|
|
store2 = RuleStore(path=path)
|
|
assert len(store2) == 2
|
|
assert store2.get("r1") is not None
|
|
assert store2.get("r2") is not None
|
|
|
|
def test_record_application_success(self, tmp_path):
|
|
"""Recording a successful application boosts confidence."""
|
|
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
|
|
|
|
store = RuleStore(path=tmp_path / "strategy.json")
|
|
store.add(Rule(id="r1", condition="hp < 30", action="heal", confidence=0.5))
|
|
|
|
store.record_application("r1", succeeded=True)
|
|
rule = store.get("r1")
|
|
assert rule.times_applied == 1
|
|
assert rule.times_succeeded == 1
|
|
assert rule.confidence > 0.5
|
|
|
|
def test_record_application_failure(self, tmp_path):
|
|
"""Recording a failed application penalizes confidence."""
|
|
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
|
|
|
|
store = RuleStore(path=tmp_path / "strategy.json")
|
|
store.add(Rule(id="r1", condition="hp < 30", action="heal", confidence=0.8))
|
|
|
|
store.record_application("r1", succeeded=False)
|
|
rule = store.get("r1")
|
|
assert rule.times_applied == 1
|
|
assert rule.times_succeeded == 0
|
|
assert rule.confidence < 0.8
|
|
|
|
def test_add_many_counts_new(self, tmp_path):
|
|
"""add_many returns count of genuinely new rules."""
|
|
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
|
|
|
|
store = RuleStore(path=tmp_path / "strategy.json")
|
|
store.add(Rule(id="r1", condition="hp < 30", action="heal"))
|
|
|
|
new_rules = [
|
|
Rule(id="r1", condition="hp < 30", action="heal"), # existing
|
|
Rule(id="r2", condition="mana > 50", action="cast"), # new
|
|
]
|
|
added = store.add_many(new_rules)
|
|
assert added == 1
|
|
assert len(store) == 2
|
|
|
|
def test_find_matching_returns_reliable_only(self, tmp_path):
|
|
"""find_matching only returns rules above confidence threshold."""
|
|
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
|
|
|
|
store = RuleStore(path=tmp_path / "strategy.json")
|
|
store.add(
|
|
Rule(
|
|
id="r1",
|
|
condition="health low",
|
|
action="heal",
|
|
confidence=0.9,
|
|
times_applied=5,
|
|
times_succeeded=4,
|
|
)
|
|
)
|
|
store.add(
|
|
Rule(
|
|
id="r2",
|
|
condition="health low",
|
|
action="flee",
|
|
confidence=0.3,
|
|
times_applied=1,
|
|
times_succeeded=0,
|
|
)
|
|
)
|
|
|
|
matches = store.find_matching({"health": "low"})
|
|
assert len(matches) == 1
|
|
assert matches[0].id == "r1"
|