Files
timmy-home/uni-wizard/v3/tests/test_v3.py
Allegro 31026ddcc1 [#76-v4] Final Uni-Wizard Architecture — Production Integration
Complete four-pass evolution to production-ready architecture:

**Pass 1 → Foundation:**
- Tool registry, basic harness, 19 tools
- VPS provisioning, Syncthing mesh
- Health daemon, systemd services

**Pass 2 → Three-House Canon:**
- Timmy (Sovereign), Ezra (Archivist), Bezalel (Artificer)
- Provenance tracking, artifact-flow discipline
- House-aware policy enforcement

**Pass 3 → Self-Improvement:**
- Pattern database with SQLite backend
- Adaptive policies (auto-adjust thresholds)
- Predictive execution (success prediction)
- Hermes bridge for shortest-loop telemetry
- Learning velocity tracking

**Pass 4 → Production Integration:**
- Unified API: `from uni_wizard import Harness, House, Mode`
- Three modes: SIMPLE / INTELLIGENT / SOVEREIGN
- Circuit breaker pattern for fault tolerance
- Async/concurrent execution support
- Production hardening (timeouts, retries)

**Allegro Lane Definition:**
- Narrowed to: Gitea integration, Hermes bridge, redundancy/failover
- Provides: Cloud connectivity, telemetry streaming, issue routing
- Does NOT: Make sovereign decisions, authenticate as Timmy

**Files:**
- v3/: Intelligence engine, adaptive harness, Hermes bridge
- v4/: Unified API, production harness, final architecture

Total: ~25KB architecture documentation + production code
2026-03-30 16:39:42 +00:00

494 lines
16 KiB
Python

#!/usr/bin/env python3
"""
Test Suite for Uni-Wizard v3 — Self-Improving Intelligence
Tests:
- Pattern database operations
- Intelligence engine learning
- Adaptive policy changes
- Prediction accuracy
- Hermes bridge integration
- End-to-end self-improvement
"""
import sys
import json
import tempfile
import shutil
import time
import threading
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
# Add parent to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from intelligence_engine import (
PatternDatabase, IntelligenceEngine,
ExecutionPattern, AdaptationEvent
)
from harness import (
UniWizardHarness, AdaptivePolicy,
House, Provenance, ExecutionResult
)
from hermes_bridge import (
HermesStateReader, HermesSessionEvent,
TelemetryStreamProcessor, ShortestLoopIntegrator
)
class TestPatternDatabase:
"""Test pattern storage and retrieval"""
def setup_method(self):
self.temp_dir = tempfile.mkdtemp()
self.db = PatternDatabase(db_path=Path(self.temp_dir) / "test.db")
def teardown_method(self):
shutil.rmtree(self.temp_dir)
def test_record_execution(self):
"""Test recording execution outcomes"""
self.db.record_execution({
"tool": "git_status",
"house": "ezra",
"model": "hermes3:8b",
"success": True,
"latency_ms": 150,
"confidence": 0.9
})
# Verify pattern created
pattern = self.db.get_pattern("git_status", "ezra")
assert pattern is not None
assert pattern.success_rate == 1.0
assert pattern.sample_count == 1
def test_pattern_aggregation(self):
"""Test pattern aggregation across multiple executions"""
# Record 10 executions, 8 successful
for i in range(10):
self.db.record_execution({
"tool": "deploy",
"house": "bezalel",
"success": i < 8,
"latency_ms": 200 + i * 10,
"confidence": 0.8
})
pattern = self.db.get_pattern("deploy", "bezalel")
assert pattern.success_rate == 0.8
assert pattern.sample_count == 10
assert pattern.avg_latency_ms == 245 # Average of 200-290
def test_best_model_selection(self):
"""Test finding best model for task"""
# Model A: 10 calls, 8 success = 80%
for i in range(10):
self.db.record_execution({
"tool": "read",
"house": "ezra",
"model": "model_a",
"task_type": "read",
"success": i < 8,
"latency_ms": 100
})
# Model B: 10 calls, 9 success = 90%
for i in range(10):
self.db.record_execution({
"tool": "read",
"house": "ezra",
"model": "model_b",
"task_type": "read",
"success": i < 9,
"latency_ms": 120
})
best = self.db.get_best_model("read", min_samples=5)
assert best == "model_b"
def test_house_performance(self):
"""Test house performance metrics"""
# Record executions for ezra
for i in range(5):
self.db.record_execution({
"tool": "test",
"house": "ezra",
"success": i < 4, # 80% success
"latency_ms": 100
})
perf = self.db.get_house_performance("ezra", days=7)
assert perf["house"] == "ezra"
assert perf["success_rate"] == 0.8
assert perf["total_executions"] == 5
def test_adaptation_tracking(self):
"""Test recording adaptations"""
adapt = AdaptationEvent(
timestamp="2026-03-30T20:00:00Z",
trigger="low_success_rate",
change_type="policy.threshold",
old_value=0.8,
new_value=0.7,
reason="Performance below threshold",
expected_improvement=0.1
)
self.db.record_adaptation(adapt)
adaptations = self.db.get_adaptations(limit=10)
assert len(adaptations) == 1
assert adaptations[0].change_type == "policy.threshold"
class TestIntelligenceEngine:
"""Test intelligence and learning"""
def setup_method(self):
self.temp_dir = tempfile.mkdtemp()
self.db = PatternDatabase(db_path=Path(self.temp_dir) / "test.db")
self.engine = IntelligenceEngine(db=self.db)
def teardown_method(self):
shutil.rmtree(self.temp_dir)
def test_predict_success_with_data(self):
"""Test prediction with historical data"""
# Record successful pattern
for i in range(10):
self.db.record_execution({
"tool": "git_status",
"house": "ezra",
"success": True,
"latency_ms": 100,
"confidence": 0.9
})
prob, reason = self.engine.predict_success("git_status", "ezra")
assert prob == 1.0
assert "excellent track record" in reason
def test_predict_success_without_data(self):
"""Test prediction without historical data"""
prob, reason = self.engine.predict_success("unknown_tool", "timmy")
assert prob == 0.5
assert "Insufficient data" in reason
def test_optimal_house_selection(self):
"""Test finding optimal house for task"""
# Ezra: 90% success on git_status
for i in range(10):
self.db.record_execution({
"tool": "git_status",
"house": "ezra",
"success": i < 9,
"latency_ms": 100
})
# Bezalel: 50% success on git_status
for i in range(10):
self.db.record_execution({
"tool": "git_status",
"house": "bezalel",
"success": i < 5,
"latency_ms": 100
})
house, confidence = self.engine.get_optimal_house("git_status")
assert house == "ezra"
assert confidence == 0.9
def test_learning_velocity(self):
"""Test learning velocity calculation"""
now = time.time()
# Record old executions (5-7 days ago)
for i in range(10):
self.db.record_execution({
"tool": "test",
"house": "timmy",
"success": i < 5, # 50% success
"latency_ms": 100
})
# Backdate the executions
conn = self.db.db_path
# (In real test, we'd manipulate timestamps)
velocity = self.engine._calculate_learning_velocity()
assert "velocity" in velocity
assert "improvement" in velocity
class TestAdaptivePolicy:
"""Test policy adaptation"""
def setup_method(self):
self.temp_dir = tempfile.mkdtemp()
self.db = PatternDatabase(db_path=Path(self.temp_dir) / "test.db")
self.engine = IntelligenceEngine(db=self.db)
def teardown_method(self):
shutil.rmtree(self.temp_dir)
def test_policy_loads_defaults(self):
"""Test policy loads default values"""
policy = AdaptivePolicy(House.EZRA, self.engine)
assert policy.get("evidence_threshold") == 0.8
assert policy.get("must_read_before_write") is True
def test_policy_adapts_on_low_performance(self):
"""Test policy adapts when performance is poor"""
policy = AdaptivePolicy(House.EZRA, self.engine)
# Record poor performance for ezra
for i in range(10):
self.db.record_execution({
"tool": "test",
"house": "ezra",
"success": i < 4, # 40% success
"latency_ms": 100
})
# Trigger adaptation
adapt = policy.adapt("low_performance", "Testing adaptation")
# Threshold should have decreased
assert policy.get("evidence_threshold") < 0.8
assert adapt is not None
def test_policy_adapts_on_high_performance(self):
"""Test policy adapts when performance is excellent"""
policy = AdaptivePolicy(House.EZRA, self.engine)
# Start with lower threshold
policy.policy["evidence_threshold"] = 0.7
# Record excellent performance
for i in range(10):
self.db.record_execution({
"tool": "test",
"house": "ezra",
"success": True, # 100% success
"latency_ms": 100
})
# Trigger adaptation
adapt = policy.adapt("high_performance", "Testing adaptation")
# Threshold should have increased
assert policy.get("evidence_threshold") > 0.7
class TestHarness:
"""Test v3 harness with intelligence"""
def setup_method(self):
self.temp_dir = tempfile.mkdtemp()
self.db = PatternDatabase(db_path=Path(self.temp_dir) / "test.db")
self.engine = IntelligenceEngine(db=self.db)
def teardown_method(self):
shutil.rmtree(self.temp_dir)
def test_harness_creates_provenance(self):
"""Test harness creates proper provenance"""
harness = UniWizardHarness("ezra", intelligence=self.engine)
result = harness.execute("system_info")
assert result.provenance.house == "ezra"
assert result.provenance.tool == "system_info"
assert result.provenance.prediction >= 0
def test_harness_records_for_learning(self):
"""Test harness records executions"""
harness = UniWizardHarness("timmy", intelligence=self.engine, enable_learning=True)
initial_count = self.engine.db.get_house_performance("timmy")["total_executions"]
harness.execute("test_tool")
new_count = self.engine.db.get_house_performance("timmy")["total_executions"]
assert new_count == initial_count + 1
def test_harness_does_not_record_when_learning_disabled(self):
"""Test harness respects learning flag"""
harness = UniWizardHarness("timmy", intelligence=self.engine, enable_learning=False)
initial_count = self.engine.db.get_house_performance("timmy")["total_executions"]
harness.execute("test_tool")
new_count = self.engine.db.get_house_performance("timmy")["total_executions"]
assert new_count == initial_count
def test_learn_from_batch_triggers_adaptation(self):
"""Test batch learning triggers adaptations"""
harness = UniWizardHarness("ezra", intelligence=self.engine)
# Execute multiple times
for i in range(15):
harness.execute("test_tool")
# Trigger learning
result = harness.learn_from_batch(min_executions=10)
assert result["status"] == "adapted"
class TestHermesBridge:
"""Test Hermes integration"""
def setup_method(self):
self.temp_dir = tempfile.mkdtemp()
self.db = PatternDatabase(db_path=Path(self.temp_dir) / "test.db")
self.engine = IntelligenceEngine(db=self.db)
def teardown_method(self):
shutil.rmtree(self.temp_dir)
def test_event_conversion(self):
"""Test Hermes event to intelligence record conversion"""
processor = TelemetryStreamProcessor(self.engine)
event = HermesSessionEvent(
session_id="test_session",
timestamp=time.time(),
event_type="tool_call",
tool_name="terminal",
success=True,
latency_ms=150,
model="hermes3:8b",
provider="local",
token_count=100,
error=None
)
record = processor._convert_event(event)
assert record["tool"] == "system_shell" # Mapped from terminal
assert record["house"] == "timmy"
assert record["success"] is True
def test_task_type_inference(self):
"""Test task type inference from tool"""
processor = TelemetryStreamProcessor(self.engine)
assert processor._infer_task_type("git_status") == "read"
assert processor._infer_task_type("file_write") == "build"
assert processor._infer_task_type("run_tests") == "test"
class TestEndToEnd:
"""End-to-end integration tests"""
def setup_method(self):
self.temp_dir = tempfile.mkdtemp()
self.db = PatternDatabase(db_path=Path(self.temp_dir) / "test.db")
self.engine = IntelligenceEngine(db=self.db)
def teardown_method(self):
shutil.rmtree(self.temp_dir)
def test_full_learning_cycle(self):
"""Test complete learning cycle"""
# 1. Create harness
harness = UniWizardHarness("ezra", intelligence=self.engine)
# 2. Execute multiple times
for i in range(20):
harness.execute("git_status", repo_path="/tmp")
# 3. Get pattern
pattern = self.engine.db.get_pattern("git_status", "ezra")
assert pattern.sample_count == 20
# 4. Predict next execution
prob, reason = harness.predict_execution("git_status", {})
assert prob > 0
assert len(reason) > 0
# 5. Learn from batch
result = harness.learn_from_batch()
assert result["status"] == "adapted"
# 6. Get intelligence report
report = self.engine.get_intelligence_report()
assert "house_performance" in report
assert "learning_velocity" in report
def run_tests():
"""Run all tests"""
import inspect
test_classes = [
TestPatternDatabase,
TestIntelligenceEngine,
TestAdaptivePolicy,
TestHarness,
TestHermesBridge,
TestEndToEnd
]
passed = 0
failed = 0
print("=" * 60)
print("UNI-WIZARD v3 TEST SUITE")
print("=" * 60)
for cls in test_classes:
print(f"\n📦 {cls.__name__}")
print("-" * 40)
instance = cls()
# Run setup
if hasattr(instance, 'setup_method'):
try:
instance.setup_method()
except Exception as e:
print(f" ⚠️ Setup failed: {e}")
continue
for name, method in inspect.getmembers(cls, predicate=inspect.isfunction):
if name.startswith('test_'):
try:
# Get fresh instance for each test
test_instance = cls()
if hasattr(test_instance, 'setup_method'):
test_instance.setup_method()
method(test_instance)
print(f"{name}")
passed += 1
if hasattr(test_instance, 'teardown_method'):
test_instance.teardown_method()
except Exception as e:
print(f"{name}: {e}")
failed += 1
# Run teardown
if hasattr(instance, 'teardown_method'):
try:
instance.teardown_method()
except:
pass
print("\n" + "=" * 60)
print(f"Results: {passed} passed, {failed} failed")
print("=" * 60)
return failed == 0
if __name__ == "__main__":
success = run_tests()
sys.exit(0 if success else 1)