Complete second-pass refinement integrating all wizard house contributions: **Three-House Architecture:** - Ezra (Archivist): Read-before-write, evidence over vibes, citation discipline - Bezalel (Artificer): Build-from-plans, proof over speculation, test discipline - Timmy (Sovereign): Final judgment, telemetry, sovereignty preservation **Core Components:** - harness.py: House-aware execution with policy enforcement - router.py: Intelligent task routing to appropriate house - task_router_daemon.py: Full three-house Gitea workflow - tests/test_v2.py: Comprehensive test suite **Key Features:** - Provenance tracking with content hashing - House-specific policy enforcement - Sovereignty telemetry logging - Cross-house workflow orchestration - Evidence-level tracking per execution Honors canon from specs/timmy-ezra-bezalel-canon-sheet.md: - Distinct house identities - No authority blending - Artifact-flow unidirectional - Full provenance and telemetry
397 lines
12 KiB
Python
397 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test suite for Uni-Wizard v2 — Three-House Architecture
|
|
|
|
Tests:
|
|
- House policy enforcement
|
|
- Provenance tracking
|
|
- Routing decisions
|
|
- Cross-house workflows
|
|
- Telemetry logging
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
import tempfile
|
|
import shutil
|
|
from pathlib import Path
|
|
from unittest.mock import Mock, patch
|
|
|
|
# Add parent to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from harness import (
|
|
UniWizardHarness, House, HousePolicy,
|
|
Provenance, ExecutionResult, SovereigntyTelemetry
|
|
)
|
|
from router import HouseRouter, TaskType, CrossHouseWorkflow
|
|
|
|
|
|
class TestHousePolicy:
|
|
"""Test house policy enforcement"""
|
|
|
|
def test_timmy_policy(self):
|
|
policy = HousePolicy.get(House.TIMMY)
|
|
assert policy["requires_provenance"] is True
|
|
assert policy["can_override"] is True
|
|
assert policy["telemetry"] is True
|
|
assert "Sovereignty" in policy["motto"]
|
|
|
|
def test_ezra_policy(self):
|
|
policy = HousePolicy.get(House.EZRA)
|
|
assert policy["requires_provenance"] is True
|
|
assert policy["must_read_before_write"] is True
|
|
assert policy["citation_required"] is True
|
|
assert policy["evidence_threshold"] == 0.8
|
|
assert "Read" in policy["motto"]
|
|
|
|
def test_bezalel_policy(self):
|
|
policy = HousePolicy.get(House.BEZALEL)
|
|
assert policy["requires_provenance"] is True
|
|
assert policy["requires_proof"] is True
|
|
assert policy["test_before_ship"] is True
|
|
assert "Build" in policy["motto"]
|
|
|
|
|
|
class TestProvenance:
|
|
"""Test provenance tracking"""
|
|
|
|
def test_provenance_creation(self):
|
|
p = Provenance(
|
|
house="ezra",
|
|
tool="git_status",
|
|
started_at="2026-03-30T20:00:00Z",
|
|
evidence_level="full",
|
|
confidence=0.95,
|
|
sources_read=["repo:/path", "git:HEAD"]
|
|
)
|
|
|
|
d = p.to_dict()
|
|
assert d["house"] == "ezra"
|
|
assert d["evidence_level"] == "full"
|
|
assert d["confidence"] == 0.95
|
|
assert len(d["sources_read"]) == 2
|
|
|
|
|
|
class TestExecutionResult:
|
|
"""Test execution result with provenance"""
|
|
|
|
def test_success_result(self):
|
|
prov = Provenance(
|
|
house="ezra",
|
|
tool="git_status",
|
|
started_at="2026-03-30T20:00:00Z",
|
|
evidence_level="full",
|
|
confidence=0.9
|
|
)
|
|
|
|
result = ExecutionResult(
|
|
success=True,
|
|
data={"status": "clean"},
|
|
provenance=prov,
|
|
execution_time_ms=150
|
|
)
|
|
|
|
json_result = result.to_json()
|
|
parsed = json.loads(json_result)
|
|
|
|
assert parsed["success"] is True
|
|
assert parsed["data"]["status"] == "clean"
|
|
assert parsed["provenance"]["house"] == "ezra"
|
|
|
|
|
|
class TestSovereigntyTelemetry:
|
|
"""Test telemetry logging"""
|
|
|
|
def setup_method(self):
|
|
self.temp_dir = tempfile.mkdtemp()
|
|
self.telemetry = SovereigntyTelemetry(log_dir=Path(self.temp_dir))
|
|
|
|
def teardown_method(self):
|
|
shutil.rmtree(self.temp_dir)
|
|
|
|
def test_log_creation(self):
|
|
prov = Provenance(
|
|
house="timmy",
|
|
tool="test",
|
|
started_at="2026-03-30T20:00:00Z",
|
|
evidence_level="full",
|
|
confidence=0.9
|
|
)
|
|
|
|
result = ExecutionResult(
|
|
success=True,
|
|
data={},
|
|
provenance=prov,
|
|
execution_time_ms=100
|
|
)
|
|
|
|
self.telemetry.log_execution("timmy", "test", result)
|
|
|
|
# Verify log file exists
|
|
assert self.telemetry.telemetry_log.exists()
|
|
|
|
# Verify content
|
|
with open(self.telemetry.telemetry_log) as f:
|
|
entry = json.loads(f.readline())
|
|
assert entry["house"] == "timmy"
|
|
assert entry["tool"] == "test"
|
|
assert entry["evidence_level"] == "full"
|
|
|
|
def test_sovereignty_report(self):
|
|
# Log some entries
|
|
for i in range(5):
|
|
prov = Provenance(
|
|
house="ezra" if i % 2 == 0 else "bezalel",
|
|
tool=f"tool_{i}",
|
|
started_at="2026-03-30T20:00:00Z",
|
|
evidence_level="full",
|
|
confidence=0.8 + (i * 0.02)
|
|
)
|
|
result = ExecutionResult(
|
|
success=True,
|
|
data={},
|
|
provenance=prov,
|
|
execution_time_ms=100 + i
|
|
)
|
|
self.telemetry.log_execution(prov.house, prov.tool, result)
|
|
|
|
report = self.telemetry.get_sovereignty_report()
|
|
|
|
assert report["total_executions"] == 5
|
|
assert "ezra" in report["by_house"]
|
|
assert "bezalel" in report["by_house"]
|
|
assert report["avg_confidence"] > 0
|
|
|
|
|
|
class TestHarness:
|
|
"""Test UniWizardHarness"""
|
|
|
|
def test_harness_creation(self):
|
|
harness = UniWizardHarness("ezra")
|
|
assert harness.house == House.EZRA
|
|
assert harness.policy["must_read_before_write"] is True
|
|
|
|
def test_ezra_read_before_write(self):
|
|
"""Ezra must read git_status before git_commit"""
|
|
harness = UniWizardHarness("ezra")
|
|
|
|
# Try to commit without reading first
|
|
# Note: This would need actual git tool to fully test
|
|
# Here we test the policy check logic
|
|
|
|
evidence_level, confidence, sources = harness._check_evidence(
|
|
"git_commit",
|
|
{"repo_path": "/tmp/test"}
|
|
)
|
|
|
|
# git_commit would have evidence from params
|
|
assert evidence_level in ["full", "partial", "none"]
|
|
|
|
def test_bezalel_proof_verification(self):
|
|
"""Bezalel requires proof verification"""
|
|
harness = UniWizardHarness("bezalel")
|
|
|
|
# Test proof verification logic
|
|
assert harness._verify_proof("git_status", {"success": True}) is True
|
|
assert harness.policy["requires_proof"] is True
|
|
|
|
def test_timmy_review_generation(self):
|
|
"""Timmy can generate reviews"""
|
|
harness = UniWizardHarness("timmy")
|
|
|
|
# Create mock results
|
|
mock_results = {
|
|
"tool1": ExecutionResult(
|
|
success=True,
|
|
data={"result": "ok"},
|
|
provenance=Provenance(
|
|
house="ezra",
|
|
tool="tool1",
|
|
started_at="2026-03-30T20:00:00Z",
|
|
evidence_level="full",
|
|
confidence=0.9
|
|
),
|
|
execution_time_ms=100
|
|
),
|
|
"tool2": ExecutionResult(
|
|
success=True,
|
|
data={"result": "ok"},
|
|
provenance=Provenance(
|
|
house="bezalel",
|
|
tool="tool2",
|
|
started_at="2026-03-30T20:00:00Z",
|
|
evidence_level="full",
|
|
confidence=0.85
|
|
),
|
|
execution_time_ms=150
|
|
)
|
|
}
|
|
|
|
review = harness.review_for_timmy(mock_results)
|
|
|
|
assert review["house"] == "timmy"
|
|
assert review["summary"]["total"] == 2
|
|
assert review["summary"]["successful"] == 2
|
|
assert "recommendation" in review
|
|
|
|
|
|
class TestRouter:
|
|
"""Test HouseRouter"""
|
|
|
|
def test_task_classification(self):
|
|
router = HouseRouter()
|
|
|
|
# Read tasks
|
|
assert router.classify_task("git_status", {}) == TaskType.READ
|
|
assert router.classify_task("system_info", {}) == TaskType.READ
|
|
|
|
# Build tasks
|
|
assert router.classify_task("git_commit", {}) == TaskType.BUILD
|
|
|
|
# Test tasks
|
|
assert router.classify_task("health_check", {}) == TaskType.TEST
|
|
|
|
def test_routing_decisions(self):
|
|
router = HouseRouter()
|
|
|
|
# Read → Ezra
|
|
task_type = TaskType.READ
|
|
routing = router.ROUTING_TABLE[task_type]
|
|
assert routing["house"] == House.EZRA
|
|
|
|
# Build → Bezalel
|
|
task_type = TaskType.BUILD
|
|
routing = router.ROUTING_TABLE[task_type]
|
|
assert routing["house"] == House.BEZALEL
|
|
|
|
# Judge → Timmy
|
|
task_type = TaskType.JUDGE
|
|
routing = router.ROUTING_TABLE[task_type]
|
|
assert routing["house"] == House.TIMMY
|
|
|
|
def test_routing_stats(self):
|
|
router = HouseRouter()
|
|
|
|
# Simulate some routing
|
|
for _ in range(3):
|
|
router.route("git_status", repo_path="/tmp")
|
|
|
|
stats = router.get_routing_stats()
|
|
assert stats["total"] == 3
|
|
|
|
|
|
class TestIntegration:
|
|
"""Integration tests"""
|
|
|
|
def test_full_house_chain(self):
|
|
"""Test Ezra → Bezalel → Timmy chain"""
|
|
|
|
# Create harnesses
|
|
ezra = UniWizardHarness("ezra")
|
|
bezalel = UniWizardHarness("bezalel")
|
|
timmy = UniWizardHarness("timmy")
|
|
|
|
# Ezra reads
|
|
ezra_result = ExecutionResult(
|
|
success=True,
|
|
data={"analysis": "issue understood"},
|
|
provenance=Provenance(
|
|
house="ezra",
|
|
tool="read_issue",
|
|
started_at="2026-03-30T20:00:00Z",
|
|
evidence_level="full",
|
|
confidence=0.9,
|
|
sources_read=["issue:42"]
|
|
),
|
|
execution_time_ms=200
|
|
)
|
|
|
|
# Bezalel builds
|
|
bezalel_result = ExecutionResult(
|
|
success=True,
|
|
data={"proof": "tests pass"},
|
|
provenance=Provenance(
|
|
house="bezalel",
|
|
tool="implement",
|
|
started_at="2026-03-30T20:00:01Z",
|
|
evidence_level="full",
|
|
confidence=0.85
|
|
),
|
|
execution_time_ms=500
|
|
)
|
|
|
|
# Timmy reviews
|
|
review = timmy.review_for_timmy({
|
|
"ezra_analysis": ezra_result,
|
|
"bezalel_implementation": bezalel_result
|
|
})
|
|
|
|
assert "APPROVE" in review["recommendation"] or "REVIEW" in review["recommendation"]
|
|
|
|
|
|
def run_tests():
|
|
"""Run all tests"""
|
|
import inspect
|
|
|
|
test_classes = [
|
|
TestHousePolicy,
|
|
TestProvenance,
|
|
TestExecutionResult,
|
|
TestSovereigntyTelemetry,
|
|
TestHarness,
|
|
TestRouter,
|
|
TestIntegration
|
|
]
|
|
|
|
passed = 0
|
|
failed = 0
|
|
|
|
print("=" * 60)
|
|
print("UNI-WIZARD v2 TEST SUITE")
|
|
print("=" * 60)
|
|
|
|
for cls in test_classes:
|
|
print(f"\n📦 {cls.__name__}")
|
|
print("-" * 40)
|
|
|
|
instance = cls()
|
|
|
|
# Run setup if exists
|
|
if hasattr(instance, 'setup_method'):
|
|
instance.setup_method()
|
|
|
|
for name, method in inspect.getmembers(cls, predicate=inspect.isfunction):
|
|
if name.startswith('test_'):
|
|
try:
|
|
# Get fresh instance for each test
|
|
test_instance = cls()
|
|
if hasattr(test_instance, 'setup_method'):
|
|
test_instance.setup_method()
|
|
|
|
method(test_instance)
|
|
print(f" ✅ {name}")
|
|
passed += 1
|
|
|
|
if hasattr(test_instance, 'teardown_method'):
|
|
test_instance.teardown_method()
|
|
|
|
except Exception as e:
|
|
print(f" ❌ {name}: {e}")
|
|
failed += 1
|
|
|
|
# Run teardown if exists
|
|
if hasattr(instance, 'teardown_method'):
|
|
instance.teardown_method()
|
|
|
|
print("\n" + "=" * 60)
|
|
print(f"Results: {passed} passed, {failed} failed")
|
|
print("=" * 60)
|
|
|
|
return failed == 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
success = run_tests()
|
|
sys.exit(0 if success else 1)
|