[perplexity] feat: Sovereignty Loop core framework — auto-crystallizer, graduation test, orchestration (#953) (#1331)
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled

Co-authored-by: Perplexity Computer <perplexity@tower.local>
Co-committed-by: Perplexity Computer <perplexity@tower.local>
This commit was merged in pull request #1331.
This commit is contained in:
2026-03-24 02:29:39 +00:00
committed by Timmy Time
parent 4f8df32882
commit 4ec4558a2f
11 changed files with 2299 additions and 68 deletions

View File

@@ -0,0 +1,238 @@
"""Tests for the auto-crystallizer module.
Refs: #961, #953
"""
import pytest
@pytest.mark.unit
class TestCrystallizeReasoning:
"""Tests for rule extraction from LLM reasoning chains."""
def test_extracts_threshold_rule(self):
"""Extracts threshold-based rules from reasoning text."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
reasoning = "I chose to heal because health was below 30%. So I used a healing potion."
rules = crystallize_reasoning(reasoning)
assert len(rules) >= 1
# Should detect the threshold pattern
found = any("health" in r.condition.lower() and "30" in r.condition for r in rules)
assert found, f"Expected threshold rule, got: {[r.condition for r in rules]}"
def test_extracts_comparison_rule(self):
"""Extracts comparison operators from reasoning."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
reasoning = "The stamina_pct < 20 so I decided to rest."
rules = crystallize_reasoning(reasoning)
assert len(rules) >= 1
found = any("stamina_pct" in r.condition and "<" in r.condition for r in rules)
assert found, f"Expected comparison rule, got: {[r.condition for r in rules]}"
def test_extracts_choice_reason_rule(self):
"""Extracts 'chose X because Y' patterns."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
reasoning = "I chose retreat because the enemy outnumbered us."
rules = crystallize_reasoning(reasoning)
assert len(rules) >= 1
found = any(r.action == "retreat" for r in rules)
assert found, f"Expected 'retreat' action, got: {[r.action for r in rules]}"
def test_deduplicates_rules(self):
"""Same pattern extracted once, not twice."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
reasoning = (
"I chose heal because health was below 30%. Again, health was below 30% so I healed."
)
rules = crystallize_reasoning(reasoning)
ids = [r.id for r in rules]
# Duplicate condition+action should produce same ID
assert len(ids) == len(set(ids)), "Duplicate rules detected"
def test_empty_reasoning_returns_no_rules(self):
"""Empty or unstructured text produces no rules."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
rules = crystallize_reasoning("")
assert rules == []
rules = crystallize_reasoning("The weather is nice today.")
assert rules == []
def test_rule_has_excerpt(self):
"""Extracted rules include a reasoning excerpt for provenance."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
reasoning = "I chose attack because the enemy health was below 50%."
rules = crystallize_reasoning(reasoning)
assert len(rules) >= 1
assert rules[0].reasoning_excerpt != ""
def test_context_stored_in_metadata(self):
"""Context dict is stored in rule metadata."""
from timmy.sovereignty.auto_crystallizer import crystallize_reasoning
context = {"game": "morrowind", "location": "balmora"}
reasoning = "I chose to trade because gold_amount > 100."
rules = crystallize_reasoning(reasoning, context=context)
assert len(rules) >= 1
assert rules[0].metadata.get("game") == "morrowind"
@pytest.mark.unit
class TestRule:
"""Tests for the Rule dataclass."""
def test_initial_state(self):
"""New rules start with default confidence and no applications."""
from timmy.sovereignty.auto_crystallizer import Rule
rule = Rule(id="test", condition="hp < 30", action="heal")
assert rule.confidence == 0.5
assert rule.times_applied == 0
assert rule.times_succeeded == 0
assert not rule.is_reliable
def test_success_rate(self):
"""Success rate is calculated correctly."""
from timmy.sovereignty.auto_crystallizer import Rule
rule = Rule(id="test", condition="hp < 30", action="heal")
rule.times_applied = 10
rule.times_succeeded = 8
assert rule.success_rate == 0.8
def test_is_reliable(self):
"""Rule becomes reliable with high confidence + enough applications."""
from timmy.sovereignty.auto_crystallizer import Rule
rule = Rule(
id="test",
condition="hp < 30",
action="heal",
confidence=0.85,
times_applied=5,
times_succeeded=4,
)
assert rule.is_reliable
def test_not_reliable_low_confidence(self):
"""Rule is not reliable with low confidence."""
from timmy.sovereignty.auto_crystallizer import Rule
rule = Rule(
id="test",
condition="hp < 30",
action="heal",
confidence=0.5,
times_applied=10,
times_succeeded=8,
)
assert not rule.is_reliable
@pytest.mark.unit
class TestRuleStore:
"""Tests for the RuleStore persistence layer."""
def test_add_and_retrieve(self, tmp_path):
"""Rules can be added and retrieved."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
store = RuleStore(path=tmp_path / "strategy.json")
rule = Rule(id="r1", condition="hp < 30", action="heal")
store.add(rule)
retrieved = store.get("r1")
assert retrieved is not None
assert retrieved.condition == "hp < 30"
def test_persist_and_reload(self, tmp_path):
"""Rules survive persist → reload cycle."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
path = tmp_path / "strategy.json"
store = RuleStore(path=path)
store.add(Rule(id="r1", condition="hp < 30", action="heal"))
store.add(Rule(id="r2", condition="mana > 50", action="cast"))
# Create a new store from the same file
store2 = RuleStore(path=path)
assert len(store2) == 2
assert store2.get("r1") is not None
assert store2.get("r2") is not None
def test_record_application_success(self, tmp_path):
"""Recording a successful application boosts confidence."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
store = RuleStore(path=tmp_path / "strategy.json")
store.add(Rule(id="r1", condition="hp < 30", action="heal", confidence=0.5))
store.record_application("r1", succeeded=True)
rule = store.get("r1")
assert rule.times_applied == 1
assert rule.times_succeeded == 1
assert rule.confidence > 0.5
def test_record_application_failure(self, tmp_path):
"""Recording a failed application penalizes confidence."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
store = RuleStore(path=tmp_path / "strategy.json")
store.add(Rule(id="r1", condition="hp < 30", action="heal", confidence=0.8))
store.record_application("r1", succeeded=False)
rule = store.get("r1")
assert rule.times_applied == 1
assert rule.times_succeeded == 0
assert rule.confidence < 0.8
def test_add_many_counts_new(self, tmp_path):
"""add_many returns count of genuinely new rules."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
store = RuleStore(path=tmp_path / "strategy.json")
store.add(Rule(id="r1", condition="hp < 30", action="heal"))
new_rules = [
Rule(id="r1", condition="hp < 30", action="heal"), # existing
Rule(id="r2", condition="mana > 50", action="cast"), # new
]
added = store.add_many(new_rules)
assert added == 1
assert len(store) == 2
def test_find_matching_returns_reliable_only(self, tmp_path):
"""find_matching only returns rules above confidence threshold."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
store = RuleStore(path=tmp_path / "strategy.json")
store.add(
Rule(
id="r1",
condition="health low",
action="heal",
confidence=0.9,
times_applied=5,
times_succeeded=4,
)
)
store.add(
Rule(
id="r2",
condition="health low",
action="flee",
confidence=0.3,
times_applied=1,
times_succeeded=0,
)
)
matches = store.find_matching({"health": "low"})
assert len(matches) == 1
assert matches[0].id == "r1"

View File

@@ -0,0 +1,165 @@
"""Tests for the graduation test runner.
Refs: #953 (Graduation Test)
"""
from unittest.mock import patch
import pytest
@pytest.mark.unit
class TestConditionResults:
"""Tests for individual graduation condition evaluations."""
def test_economic_independence_pass(self):
"""Passes when sats earned exceeds sats spent."""
from timmy.sovereignty.graduation import evaluate_economic_independence
result = evaluate_economic_independence(sats_earned=100.0, sats_spent=50.0)
assert result.passed is True
assert result.actual == 50.0 # net
assert "Earned: 100.0" in result.detail
def test_economic_independence_fail_net_negative(self):
"""Fails when spending exceeds earnings."""
from timmy.sovereignty.graduation import evaluate_economic_independence
result = evaluate_economic_independence(sats_earned=10.0, sats_spent=50.0)
assert result.passed is False
def test_economic_independence_fail_zero_earnings(self):
"""Fails when earnings are zero even if spending is zero."""
from timmy.sovereignty.graduation import evaluate_economic_independence
result = evaluate_economic_independence(sats_earned=0.0, sats_spent=0.0)
assert result.passed is False
def test_operational_independence_pass(self):
"""Passes when uptime meets threshold and no interventions."""
from timmy.sovereignty.graduation import evaluate_operational_independence
result = evaluate_operational_independence(uptime_hours=24.0, human_interventions=0)
assert result.passed is True
def test_operational_independence_fail_low_uptime(self):
"""Fails when uptime is below threshold."""
from timmy.sovereignty.graduation import evaluate_operational_independence
result = evaluate_operational_independence(uptime_hours=20.0, human_interventions=0)
assert result.passed is False
def test_operational_independence_fail_interventions(self):
"""Fails when there are human interventions."""
from timmy.sovereignty.graduation import evaluate_operational_independence
result = evaluate_operational_independence(uptime_hours=24.0, human_interventions=2)
assert result.passed is False
@pytest.mark.unit
class TestGraduationReport:
"""Tests for the GraduationReport rendering."""
def test_to_dict(self):
"""Report serializes to dict correctly."""
from timmy.sovereignty.graduation import ConditionResult, GraduationReport
report = GraduationReport(
all_passed=False,
conditions=[
ConditionResult(name="Test", passed=True, actual=0, target=0, unit=" calls")
],
)
d = report.to_dict()
assert d["all_passed"] is False
assert len(d["conditions"]) == 1
assert d["conditions"][0]["name"] == "Test"
def test_to_markdown(self):
"""Report renders to readable markdown."""
from timmy.sovereignty.graduation import ConditionResult, GraduationReport
report = GraduationReport(
all_passed=True,
conditions=[
ConditionResult(name="Perception", passed=True, actual=0, target=0),
ConditionResult(name="Decision", passed=True, actual=3, target=5),
],
)
md = report.to_markdown()
assert "PASSED" in md
assert "Perception" in md
assert "Decision" in md
assert "falsework" in md.lower()
@pytest.mark.unit
class TestRunGraduationTest:
"""Tests for the full graduation test runner."""
@patch("timmy.sovereignty.graduation.evaluate_perception_independence")
@patch("timmy.sovereignty.graduation.evaluate_decision_independence")
@patch("timmy.sovereignty.graduation.evaluate_narration_independence")
def test_all_pass(self, mock_narr, mock_dec, mock_perc):
"""Full graduation passes when all 5 conditions pass."""
from timmy.sovereignty.graduation import ConditionResult, run_graduation_test
mock_perc.return_value = ConditionResult(name="Perception", passed=True, actual=0, target=0)
mock_dec.return_value = ConditionResult(name="Decision", passed=True, actual=3, target=5)
mock_narr.return_value = ConditionResult(name="Narration", passed=True, actual=0, target=0)
report = run_graduation_test(
sats_earned=100.0,
sats_spent=50.0,
uptime_hours=24.0,
human_interventions=0,
)
assert report.all_passed is True
assert len(report.conditions) == 5
assert all(c.passed for c in report.conditions)
@patch("timmy.sovereignty.graduation.evaluate_perception_independence")
@patch("timmy.sovereignty.graduation.evaluate_decision_independence")
@patch("timmy.sovereignty.graduation.evaluate_narration_independence")
def test_partial_fail(self, mock_narr, mock_dec, mock_perc):
"""Graduation fails when any single condition fails."""
from timmy.sovereignty.graduation import ConditionResult, run_graduation_test
mock_perc.return_value = ConditionResult(name="Perception", passed=True, actual=0, target=0)
mock_dec.return_value = ConditionResult(name="Decision", passed=False, actual=10, target=5)
mock_narr.return_value = ConditionResult(name="Narration", passed=True, actual=0, target=0)
report = run_graduation_test(
sats_earned=100.0,
sats_spent=50.0,
uptime_hours=24.0,
human_interventions=0,
)
assert report.all_passed is False
def test_persist_report(self, tmp_path):
"""Graduation report persists to JSON file."""
from timmy.sovereignty.graduation import (
ConditionResult,
GraduationReport,
persist_graduation_report,
)
report = GraduationReport(
all_passed=False,
conditions=[ConditionResult(name="Test", passed=False, actual=5, target=0)],
)
with patch("timmy.sovereignty.graduation.settings") as mock_settings:
mock_settings.repo_root = str(tmp_path)
path = persist_graduation_report(report)
assert path.exists()
import json
with open(path) as f:
data = json.load(f)
assert data["all_passed"] is False

View File

@@ -196,9 +196,10 @@ class TestPerceptionCacheMatch:
screenshot = np.array([[5, 6], [7, 8]])
result = cache.match(screenshot)
# Note: current implementation uses > 0.85, so exactly 0.85 returns None state
# Implementation uses >= 0.85 (inclusive threshold)
assert result.confidence == 0.85
assert result.state is None
assert result.state is not None
assert result.state["template_name"] == "threshold_match"
@patch("timmy.sovereignty.perception_cache.cv2")
def test_match_just_above_threshold(self, mock_cv2, tmp_path):
@@ -283,10 +284,12 @@ class TestPerceptionCachePersist:
templates_path = tmp_path / "templates.json"
cache = PerceptionCache(templates_path=templates_path)
cache.add([
Template(name="template1", image=np.array([[1]]), threshold=0.85),
Template(name="template2", image=np.array([[2]]), threshold=0.90),
])
cache.add(
[
Template(name="template1", image=np.array([[1]]), threshold=0.85),
Template(name="template2", image=np.array([[2]]), threshold=0.90),
]
)
cache.persist()
@@ -312,8 +315,10 @@ class TestPerceptionCachePersist:
with open(templates_path) as f:
data = json.load(f)
assert "image" not in data[0]
assert set(data[0].keys()) == {"name", "threshold"}
assert "image" not in data[0] # raw image array is NOT in JSON
# image_path is stored for .npy file reference
assert "name" in data[0]
assert "threshold" in data[0]
class TestPerceptionCacheLoad:
@@ -338,8 +343,8 @@ class TestPerceptionCacheLoad:
assert len(cache2.templates) == 1
assert cache2.templates[0].name == "loaded"
assert cache2.templates[0].threshold == 0.88
# Note: images are loaded as empty arrays per current implementation
assert cache2.templates[0].image.size == 0
# Images are now persisted as .npy files and loaded back
assert cache2.templates[0].image.size > 0
def test_load_empty_file(self, tmp_path):
"""Load handles empty template list in file."""

View File

@@ -0,0 +1,239 @@
"""Tests for the sovereignty loop orchestrator.
Refs: #953
"""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
@pytest.mark.unit
@pytest.mark.asyncio
class TestSovereignPerceive:
"""Tests for sovereign_perceive (perception layer)."""
async def test_cache_hit_skips_vlm(self):
"""When cache has high-confidence match, VLM is not called."""
from timmy.sovereignty.perception_cache import CacheResult
from timmy.sovereignty.sovereignty_loop import sovereign_perceive
cache = MagicMock()
cache.match.return_value = CacheResult(
confidence=0.95, state={"template_name": "health_bar"}
)
vlm = AsyncMock()
screenshot = MagicMock()
with patch(
"timmy.sovereignty.sovereignty_loop.emit_sovereignty_event",
new_callable=AsyncMock,
) as mock_emit:
result = await sovereign_perceive(screenshot, cache, vlm)
assert result == {"template_name": "health_bar"}
vlm.analyze.assert_not_called()
mock_emit.assert_called_once_with("perception_cache_hit", session_id="")
async def test_cache_miss_calls_vlm_and_crystallizes(self):
"""On cache miss, VLM is called and output is crystallized."""
from timmy.sovereignty.perception_cache import CacheResult
from timmy.sovereignty.sovereignty_loop import sovereign_perceive
cache = MagicMock()
cache.match.return_value = CacheResult(confidence=0.3, state=None)
vlm = AsyncMock()
vlm.analyze.return_value = {"items": []}
screenshot = MagicMock()
crystallize_fn = MagicMock(return_value=[])
with patch(
"timmy.sovereignty.sovereignty_loop.emit_sovereignty_event",
new_callable=AsyncMock,
):
await sovereign_perceive(screenshot, cache, vlm, crystallize_fn=crystallize_fn)
vlm.analyze.assert_called_once_with(screenshot)
crystallize_fn.assert_called_once()
@pytest.mark.unit
@pytest.mark.asyncio
class TestSovereignDecide:
"""Tests for sovereign_decide (decision layer)."""
async def test_rule_hit_skips_llm(self, tmp_path):
"""Reliable rule match bypasses the LLM."""
from timmy.sovereignty.auto_crystallizer import Rule, RuleStore
from timmy.sovereignty.sovereignty_loop import sovereign_decide
store = RuleStore(path=tmp_path / "strategy.json")
store.add(
Rule(
id="r1",
condition="health low",
action="heal",
confidence=0.9,
times_applied=5,
times_succeeded=4,
)
)
llm = AsyncMock()
context = {"health": "low", "mana": 50}
with patch(
"timmy.sovereignty.sovereignty_loop.emit_sovereignty_event",
new_callable=AsyncMock,
):
result = await sovereign_decide(context, llm, rule_store=store)
assert result["action"] == "heal"
assert result["source"] == "crystallized_rule"
llm.reason.assert_not_called()
async def test_no_rule_calls_llm_and_crystallizes(self, tmp_path):
"""Without matching rules, LLM is called and reasoning is crystallized."""
from timmy.sovereignty.auto_crystallizer import RuleStore
from timmy.sovereignty.sovereignty_loop import sovereign_decide
store = RuleStore(path=tmp_path / "strategy.json")
llm = AsyncMock()
llm.reason.return_value = {
"action": "attack",
"reasoning": "I chose attack because enemy_health was below 50%.",
}
context = {"enemy_health": 45}
with patch(
"timmy.sovereignty.sovereignty_loop.emit_sovereignty_event",
new_callable=AsyncMock,
):
result = await sovereign_decide(context, llm, rule_store=store)
assert result["action"] == "attack"
llm.reason.assert_called_once_with(context)
# The reasoning should have been crystallized (threshold pattern detected)
assert len(store) > 0
@pytest.mark.unit
@pytest.mark.asyncio
class TestSovereignNarrate:
"""Tests for sovereign_narrate (narration layer)."""
async def test_template_hit_skips_llm(self):
"""Known event type uses template without LLM."""
from timmy.sovereignty.sovereignty_loop import sovereign_narrate
template_store = {
"combat_start": "Battle begins against {enemy}!",
}
llm = AsyncMock()
with patch(
"timmy.sovereignty.sovereignty_loop.emit_sovereignty_event",
new_callable=AsyncMock,
) as mock_emit:
result = await sovereign_narrate(
{"type": "combat_start", "enemy": "Cliff Racer"},
llm=llm,
template_store=template_store,
)
assert result == "Battle begins against Cliff Racer!"
llm.narrate.assert_not_called()
mock_emit.assert_called_once_with("narration_template", session_id="")
async def test_unknown_event_calls_llm(self):
"""Unknown event type falls through to LLM and crystallizes template."""
from timmy.sovereignty.sovereignty_loop import sovereign_narrate
template_store = {}
llm = AsyncMock()
llm.narrate.return_value = "You discovered a hidden cave in the mountains."
with patch(
"timmy.sovereignty.sovereignty_loop.emit_sovereignty_event",
new_callable=AsyncMock,
):
with patch(
"timmy.sovereignty.sovereignty_loop._crystallize_narration_template"
) as mock_cryst:
result = await sovereign_narrate(
{"type": "discovery", "location": "mountains"},
llm=llm,
template_store=template_store,
)
assert result == "You discovered a hidden cave in the mountains."
llm.narrate.assert_called_once()
mock_cryst.assert_called_once()
async def test_no_llm_returns_default(self):
"""Without LLM and no template, returns a default narration."""
from timmy.sovereignty.sovereignty_loop import sovereign_narrate
with patch(
"timmy.sovereignty.sovereignty_loop.emit_sovereignty_event",
new_callable=AsyncMock,
):
result = await sovereign_narrate(
{"type": "unknown_event"},
llm=None,
template_store={},
)
assert "[unknown_event]" in result
@pytest.mark.unit
@pytest.mark.asyncio
class TestSovereigntyEnforcedDecorator:
"""Tests for the @sovereignty_enforced decorator."""
async def test_cache_hit_skips_function(self):
"""Decorator returns cached value without calling the wrapped function."""
from timmy.sovereignty.sovereignty_loop import sovereignty_enforced
call_count = 0
@sovereignty_enforced(
layer="decision",
cache_check=lambda a, kw: "cached_result",
)
async def expensive_fn():
nonlocal call_count
call_count += 1
return "expensive_result"
with patch("timmy.sovereignty.sovereignty_loop.get_metrics_store") as mock_store:
mock_store.return_value = MagicMock()
result = await expensive_fn()
assert result == "cached_result"
assert call_count == 0
async def test_cache_miss_runs_function(self):
"""Decorator calls function when cache returns None."""
from timmy.sovereignty.sovereignty_loop import sovereignty_enforced
@sovereignty_enforced(
layer="decision",
cache_check=lambda a, kw: None,
)
async def expensive_fn():
return "computed_result"
with patch("timmy.sovereignty.sovereignty_loop.get_metrics_store") as mock_store:
mock_store.return_value = MagicMock()
result = await expensive_fn()
assert result == "computed_result"