Files
timmy-home/uni-wizard/v2/tests/test_v2.py
Allegro fb9243153b [#76-v2] Uni-Wizard v2 — Three-House Architecture with Ezra, Bezalel, and Timmy Integration
Complete second-pass refinement integrating all wizard house contributions:

**Three-House Architecture:**
- Ezra (Archivist): Read-before-write, evidence over vibes, citation discipline
- Bezalel (Artificer): Build-from-plans, proof over speculation, test discipline
- Timmy (Sovereign): Final judgment, telemetry, sovereignty preservation

**Core Components:**
- harness.py: House-aware execution with policy enforcement
- router.py: Intelligent task routing to appropriate house
- task_router_daemon.py: Full three-house Gitea workflow
- tests/test_v2.py: Comprehensive test suite

**Key Features:**
- Provenance tracking with content hashing
- House-specific policy enforcement
- Sovereignty telemetry logging
- Cross-house workflow orchestration
- Evidence-level tracking per execution

Honors canon from specs/timmy-ezra-bezalel-canon-sheet.md:
- Distinct house identities
- No authority blending
- Artifact-flow unidirectional
- Full provenance and telemetry
2026-03-30 15:59:47 +00:00

397 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Test suite for Uni-Wizard v2 — Three-House Architecture
Tests:
- House policy enforcement
- Provenance tracking
- Routing decisions
- Cross-house workflows
- Telemetry logging
"""
import sys
import json
import tempfile
import shutil
from pathlib import Path
from unittest.mock import Mock, patch
# Add parent to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from harness import (
UniWizardHarness, House, HousePolicy,
Provenance, ExecutionResult, SovereigntyTelemetry
)
from router import HouseRouter, TaskType, CrossHouseWorkflow
class TestHousePolicy:
"""Test house policy enforcement"""
def test_timmy_policy(self):
policy = HousePolicy.get(House.TIMMY)
assert policy["requires_provenance"] is True
assert policy["can_override"] is True
assert policy["telemetry"] is True
assert "Sovereignty" in policy["motto"]
def test_ezra_policy(self):
policy = HousePolicy.get(House.EZRA)
assert policy["requires_provenance"] is True
assert policy["must_read_before_write"] is True
assert policy["citation_required"] is True
assert policy["evidence_threshold"] == 0.8
assert "Read" in policy["motto"]
def test_bezalel_policy(self):
policy = HousePolicy.get(House.BEZALEL)
assert policy["requires_provenance"] is True
assert policy["requires_proof"] is True
assert policy["test_before_ship"] is True
assert "Build" in policy["motto"]
class TestProvenance:
"""Test provenance tracking"""
def test_provenance_creation(self):
p = Provenance(
house="ezra",
tool="git_status",
started_at="2026-03-30T20:00:00Z",
evidence_level="full",
confidence=0.95,
sources_read=["repo:/path", "git:HEAD"]
)
d = p.to_dict()
assert d["house"] == "ezra"
assert d["evidence_level"] == "full"
assert d["confidence"] == 0.95
assert len(d["sources_read"]) == 2
class TestExecutionResult:
"""Test execution result with provenance"""
def test_success_result(self):
prov = Provenance(
house="ezra",
tool="git_status",
started_at="2026-03-30T20:00:00Z",
evidence_level="full",
confidence=0.9
)
result = ExecutionResult(
success=True,
data={"status": "clean"},
provenance=prov,
execution_time_ms=150
)
json_result = result.to_json()
parsed = json.loads(json_result)
assert parsed["success"] is True
assert parsed["data"]["status"] == "clean"
assert parsed["provenance"]["house"] == "ezra"
class TestSovereigntyTelemetry:
"""Test telemetry logging"""
def setup_method(self):
self.temp_dir = tempfile.mkdtemp()
self.telemetry = SovereigntyTelemetry(log_dir=Path(self.temp_dir))
def teardown_method(self):
shutil.rmtree(self.temp_dir)
def test_log_creation(self):
prov = Provenance(
house="timmy",
tool="test",
started_at="2026-03-30T20:00:00Z",
evidence_level="full",
confidence=0.9
)
result = ExecutionResult(
success=True,
data={},
provenance=prov,
execution_time_ms=100
)
self.telemetry.log_execution("timmy", "test", result)
# Verify log file exists
assert self.telemetry.telemetry_log.exists()
# Verify content
with open(self.telemetry.telemetry_log) as f:
entry = json.loads(f.readline())
assert entry["house"] == "timmy"
assert entry["tool"] == "test"
assert entry["evidence_level"] == "full"
def test_sovereignty_report(self):
# Log some entries
for i in range(5):
prov = Provenance(
house="ezra" if i % 2 == 0 else "bezalel",
tool=f"tool_{i}",
started_at="2026-03-30T20:00:00Z",
evidence_level="full",
confidence=0.8 + (i * 0.02)
)
result = ExecutionResult(
success=True,
data={},
provenance=prov,
execution_time_ms=100 + i
)
self.telemetry.log_execution(prov.house, prov.tool, result)
report = self.telemetry.get_sovereignty_report()
assert report["total_executions"] == 5
assert "ezra" in report["by_house"]
assert "bezalel" in report["by_house"]
assert report["avg_confidence"] > 0
class TestHarness:
"""Test UniWizardHarness"""
def test_harness_creation(self):
harness = UniWizardHarness("ezra")
assert harness.house == House.EZRA
assert harness.policy["must_read_before_write"] is True
def test_ezra_read_before_write(self):
"""Ezra must read git_status before git_commit"""
harness = UniWizardHarness("ezra")
# Try to commit without reading first
# Note: This would need actual git tool to fully test
# Here we test the policy check logic
evidence_level, confidence, sources = harness._check_evidence(
"git_commit",
{"repo_path": "/tmp/test"}
)
# git_commit would have evidence from params
assert evidence_level in ["full", "partial", "none"]
def test_bezalel_proof_verification(self):
"""Bezalel requires proof verification"""
harness = UniWizardHarness("bezalel")
# Test proof verification logic
assert harness._verify_proof("git_status", {"success": True}) is True
assert harness.policy["requires_proof"] is True
def test_timmy_review_generation(self):
"""Timmy can generate reviews"""
harness = UniWizardHarness("timmy")
# Create mock results
mock_results = {
"tool1": ExecutionResult(
success=True,
data={"result": "ok"},
provenance=Provenance(
house="ezra",
tool="tool1",
started_at="2026-03-30T20:00:00Z",
evidence_level="full",
confidence=0.9
),
execution_time_ms=100
),
"tool2": ExecutionResult(
success=True,
data={"result": "ok"},
provenance=Provenance(
house="bezalel",
tool="tool2",
started_at="2026-03-30T20:00:00Z",
evidence_level="full",
confidence=0.85
),
execution_time_ms=150
)
}
review = harness.review_for_timmy(mock_results)
assert review["house"] == "timmy"
assert review["summary"]["total"] == 2
assert review["summary"]["successful"] == 2
assert "recommendation" in review
class TestRouter:
"""Test HouseRouter"""
def test_task_classification(self):
router = HouseRouter()
# Read tasks
assert router.classify_task("git_status", {}) == TaskType.READ
assert router.classify_task("system_info", {}) == TaskType.READ
# Build tasks
assert router.classify_task("git_commit", {}) == TaskType.BUILD
# Test tasks
assert router.classify_task("health_check", {}) == TaskType.TEST
def test_routing_decisions(self):
router = HouseRouter()
# Read → Ezra
task_type = TaskType.READ
routing = router.ROUTING_TABLE[task_type]
assert routing["house"] == House.EZRA
# Build → Bezalel
task_type = TaskType.BUILD
routing = router.ROUTING_TABLE[task_type]
assert routing["house"] == House.BEZALEL
# Judge → Timmy
task_type = TaskType.JUDGE
routing = router.ROUTING_TABLE[task_type]
assert routing["house"] == House.TIMMY
def test_routing_stats(self):
router = HouseRouter()
# Simulate some routing
for _ in range(3):
router.route("git_status", repo_path="/tmp")
stats = router.get_routing_stats()
assert stats["total"] == 3
class TestIntegration:
"""Integration tests"""
def test_full_house_chain(self):
"""Test Ezra → Bezalel → Timmy chain"""
# Create harnesses
ezra = UniWizardHarness("ezra")
bezalel = UniWizardHarness("bezalel")
timmy = UniWizardHarness("timmy")
# Ezra reads
ezra_result = ExecutionResult(
success=True,
data={"analysis": "issue understood"},
provenance=Provenance(
house="ezra",
tool="read_issue",
started_at="2026-03-30T20:00:00Z",
evidence_level="full",
confidence=0.9,
sources_read=["issue:42"]
),
execution_time_ms=200
)
# Bezalel builds
bezalel_result = ExecutionResult(
success=True,
data={"proof": "tests pass"},
provenance=Provenance(
house="bezalel",
tool="implement",
started_at="2026-03-30T20:00:01Z",
evidence_level="full",
confidence=0.85
),
execution_time_ms=500
)
# Timmy reviews
review = timmy.review_for_timmy({
"ezra_analysis": ezra_result,
"bezalel_implementation": bezalel_result
})
assert "APPROVE" in review["recommendation"] or "REVIEW" in review["recommendation"]
def run_tests():
"""Run all tests"""
import inspect
test_classes = [
TestHousePolicy,
TestProvenance,
TestExecutionResult,
TestSovereigntyTelemetry,
TestHarness,
TestRouter,
TestIntegration
]
passed = 0
failed = 0
print("=" * 60)
print("UNI-WIZARD v2 TEST SUITE")
print("=" * 60)
for cls in test_classes:
print(f"\n📦 {cls.__name__}")
print("-" * 40)
instance = cls()
# Run setup if exists
if hasattr(instance, 'setup_method'):
instance.setup_method()
for name, method in inspect.getmembers(cls, predicate=inspect.isfunction):
if name.startswith('test_'):
try:
# Get fresh instance for each test
test_instance = cls()
if hasattr(test_instance, 'setup_method'):
test_instance.setup_method()
method(test_instance)
print(f"{name}")
passed += 1
if hasattr(test_instance, 'teardown_method'):
test_instance.teardown_method()
except Exception as e:
print(f"{name}: {e}")
failed += 1
# Run teardown if exists
if hasattr(instance, 'teardown_method'):
instance.teardown_method()
print("\n" + "=" * 60)
print(f"Results: {passed} passed, {failed} failed")
print("=" * 60)
return failed == 0
if __name__ == "__main__":
success = run_tests()
sys.exit(0 if success else 1)