Files
Timmy-time-dashboard/tests/sovereignty/test_auto_crystallizer.py
Perplexity Computer 8304c7756c
Some checks failed
Tests / test (pull_request) Has been skipped
Tests / lint (pull_request) Failing after 30s
feat: implement Sovereignty Loop core framework — auto-crystallizer, graduation test, orchestration (#953)
Implements the missing pieces of the Sovereignty Loop governing architecture:

## New Modules
- **auto_crystallizer.py** (#961): Extracts durable local rules from LLM
  reasoning chains. Regex-based pattern extraction for threshold checks,
  comparisons, choice-reason patterns. RuleStore with JSON persistence,
  confidence tracking, and success-rate gating.

- **sovereignty_loop.py**: Core orchestration implementing the canonical
  pattern: check cache → miss → infer → crystallize → return. Provides
  sovereign_perceive(), sovereign_decide(), sovereign_narrate() wrappers
  and a @sovereignty_enforced decorator for general use.

- **graduation.py**: Five-condition graduation test runner evaluating
  perception/decision/narration independence, economic independence
  (sats earned > spent), and operational independence (24h uptime).
  Generates markdown reports and persists to JSON.

- **graduation.py route**: Dashboard API endpoint for running graduation
  tests via GET /sovereignty/graduation/test.

## Enhanced Modules
- **perception_cache.py** (#955): Replaced placeholder crystallize_perception()
  with a working implementation that extracts OpenCV templates from VLM
  bounding-box responses. Added .npy image persistence, bbox tracking,
  metadata support, and robust error handling.

- **__init__.py**: Updated docstring and exports to document the full
  sovereignty subsystem.

## Tests (60 new/updated, all passing)
- test_auto_crystallizer.py: 17 tests covering rule extraction, RuleStore
  CRUD, persistence, confidence tracking, and matching
- test_sovereignty_loop.py: 9 tests covering all three layers + decorator
- test_graduation.py: 11 tests covering conditions, reports, persistence
- test_perception_cache.py: Updated 3 tests for new image persistence

## Documentation
- docs/SOVEREIGNTY_INTEGRATION.md: Integration guide with code examples
  for all sovereignty modules, module map, and API reference

Fixes #953
2026-03-24 02:27:47 +00:00

239 lines
8.6 KiB
Python

"""Tests for the auto-crystallizer module.
Refs: #961, #953
"""
import pytest
@pytest.mark.unit
class TestCrystallizeReasoning:
"""Tests for rule extraction from LLM reasoning chains."""
def test_extracts_threshold_rule(self):
"""Extracts threshold-based rules from reasoning text."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
reasoning = "I chose to heal because health was below 30%. So I used a healing potion."
rules = crystallize_reasoning(reasoning)
assert len(rules) >= 1
# Should detect the threshold pattern
found = any("health" in r.condition.lower() and "30" in r.condition for r in rules)
assert found, f"Expected threshold rule, got: {[r.condition for r in rules]}"
def test_extracts_comparison_rule(self):
"""Extracts comparison operators from reasoning."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
reasoning = "The stamina_pct < 20 so I decided to rest."
rules = crystallize_reasoning(reasoning)
assert len(rules) >= 1
found = any("stamina_pct" in r.condition and "<" in r.condition for r in rules)
assert found, f"Expected comparison rule, got: {[r.condition for r in rules]}"
def test_extracts_choice_reason_rule(self):
"""Extracts 'chose X because Y' patterns."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
reasoning = "I chose retreat because the enemy outnumbered us."
rules = crystallize_reasoning(reasoning)
assert len(rules) >= 1
found = any(r.action == "retreat" for r in rules)
assert found, f"Expected 'retreat' action, got: {[r.action for r in rules]}"
def test_deduplicates_rules(self):
"""Same pattern extracted once, not twice."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
reasoning = (
"I chose heal because health was below 30%. Again, health was below 30% so I healed."
)
rules = crystallize_reasoning(reasoning)
ids = [r.id for r in rules]
# Duplicate condition+action should produce same ID
assert len(ids) == len(set(ids)), "Duplicate rules detected"
def test_empty_reasoning_returns_no_rules(self):
"""Empty or unstructured text produces no rules."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
rules = crystallize_reasoning("")
assert rules == []
rules = crystallize_reasoning("The weather is nice today.")
assert rules == []
def test_rule_has_excerpt(self):
"""Extracted rules include a reasoning excerpt for provenance."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
reasoning = "I chose attack because the enemy health was below 50%."
rules = crystallize_reasoning(reasoning)
assert len(rules) >= 1
assert rules[0].reasoning_excerpt != ""
def test_context_stored_in_metadata(self):
"""Context dict is stored in rule metadata."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
context = {"game": "morrowind", "location": "balmora"}
reasoning = "I chose to trade because gold_amount > 100."
rules = crystallize_reasoning(reasoning, context=context)
assert len(rules) >= 1
assert rules[0].metadata.get("game") == "morrowind"
@pytest.mark.unit
class TestRule:
"""Tests for the Rule dataclass."""
def test_initial_state(self):
"""New rules start with default confidence and no applications."""
from timmy.sovereignty.auto_crystallizer import Rule
rule = Rule(id="test", condition="hp < 30", action="heal")
assert rule.confidence == 0.5
assert rule.times_applied == 0
assert rule.times_succeeded == 0
assert not rule.is_reliable
def test_success_rate(self):
"""Success rate is calculated correctly."""
from timmy.sovereignty.auto_crystallizer import Rule
rule = Rule(id="test", condition="hp < 30", action="heal")
rule.times_applied = 10
rule.times_succeeded = 8
assert rule.success_rate == 0.8
def test_is_reliable(self):
"""Rule becomes reliable with high confidence + enough applications."""
from timmy.sovereignty.auto_crystallizer import Rule
rule = Rule(
id="test",
condition="hp < 30",
action="heal",
confidence=0.85,
times_applied=5,
times_succeeded=4,
)
assert rule.is_reliable
def test_not_reliable_low_confidence(self):
"""Rule is not reliable with low confidence."""
from timmy.sovereignty.auto_crystallizer import Rule
rule = Rule(
id="test",
condition="hp < 30",
action="heal",
confidence=0.5,
times_applied=10,
times_succeeded=8,
)
assert not rule.is_reliable
@pytest.mark.unit
class TestRuleStore:
"""Tests for the RuleStore persistence layer."""
def test_add_and_retrieve(self, tmp_path):
"""Rules can be added and retrieved."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
store = RuleStore(path=tmp_path / "strategy.json")
rule = Rule(id="r1", condition="hp < 30", action="heal")
store.add(rule)
retrieved = store.get("r1")
assert retrieved is not None
assert retrieved.condition == "hp < 30"
def test_persist_and_reload(self, tmp_path):
"""Rules survive persist → reload cycle."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
path = tmp_path / "strategy.json"
store = RuleStore(path=path)
store.add(Rule(id="r1", condition="hp < 30", action="heal"))
store.add(Rule(id="r2", condition="mana > 50", action="cast"))
# Create a new store from the same file
store2 = RuleStore(path=path)
assert len(store2) == 2
assert store2.get("r1") is not None
assert store2.get("r2") is not None
def test_record_application_success(self, tmp_path):
"""Recording a successful application boosts confidence."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
store = RuleStore(path=tmp_path / "strategy.json")
store.add(Rule(id="r1", condition="hp < 30", action="heal", confidence=0.5))
store.record_application("r1", succeeded=True)
rule = store.get("r1")
assert rule.times_applied == 1
assert rule.times_succeeded == 1
assert rule.confidence > 0.5
def test_record_application_failure(self, tmp_path):
"""Recording a failed application penalizes confidence."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
store = RuleStore(path=tmp_path / "strategy.json")
store.add(Rule(id="r1", condition="hp < 30", action="heal", confidence=0.8))
store.record_application("r1", succeeded=False)
rule = store.get("r1")
assert rule.times_applied == 1
assert rule.times_succeeded == 0
assert rule.confidence < 0.8
def test_add_many_counts_new(self, tmp_path):
"""add_many returns count of genuinely new rules."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
store = RuleStore(path=tmp_path / "strategy.json")
store.add(Rule(id="r1", condition="hp < 30", action="heal"))
new_rules = [
Rule(id="r1", condition="hp < 30", action="heal"), # existing
Rule(id="r2", condition="mana > 50", action="cast"), # new
]
added = store.add_many(new_rules)
assert added == 1
assert len(store) == 2
def test_find_matching_returns_reliable_only(self, tmp_path):
"""find_matching only returns rules above confidence threshold."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
store = RuleStore(path=tmp_path / "strategy.json")
store.add(
Rule(
id="r1",
condition="health low",
action="heal",
confidence=0.9,
times_applied=5,
times_succeeded=4,
)
)
store.add(
Rule(
id="r2",
condition="health low",
action="flee",
confidence=0.3,
times_applied=1,
times_succeeded=0,
)
)
matches = store.find_matching({"health": "low"})
assert len(matches) == 1
assert matches[0].id == "r1"