#!/usr/bin/env python3 """ Uni-Wizard Harness v3 — Self-Improving Sovereign Intelligence Integrates: - Intelligence Engine: Pattern recognition, adaptation, prediction - Hermes Telemetry: Shortest-loop feedback from session data - Adaptive Policies: Houses learn from outcomes - Predictive Routing: Pre-execution optimization Key improvement over v2: Telemetry → Analysis → Behavior Change (closed loop) """ import json import sys import time import hashlib from typing import Dict, Any, Optional, List, Tuple from pathlib import Path from dataclasses import dataclass, asdict from datetime import datetime from enum import Enum # Add parent to path sys.path.insert(0, str(Path(__file__).parent)) from intelligence_engine import ( IntelligenceEngine, PatternDatabase, ExecutionPattern, AdaptationEvent ) class House(Enum): """The three canonical wizard houses""" TIMMY = "timmy" # Sovereign local conscience EZRA = "ezra" # Archivist, reader, pattern-recognizer BEZALEL = "bezalel" # Artificer, builder, proof-maker @dataclass class Provenance: """Trail of evidence for every action""" house: str tool: str started_at: str completed_at: Optional[str] = None input_hash: Optional[str] = None output_hash: Optional[str] = None sources_read: List[str] = None evidence_level: str = "none" confidence: float = 0.0 prediction: float = 0.0 # v3: predicted success rate prediction_reasoning: str = "" # v3: why we predicted this def to_dict(self): return asdict(self) @dataclass class ExecutionResult: """Result with full provenance and intelligence""" success: bool data: Any provenance: Provenance error: Optional[str] = None execution_time_ms: float = 0.0 intelligence_applied: Dict = None # v3: what intelligence was used def to_json(self) -> str: return json.dumps({ 'success': self.success, 'data': self.data, 'provenance': self.provenance.to_dict(), 'error': self.error, 'execution_time_ms': self.execution_time_ms, 'intelligence_applied': self.intelligence_applied }, indent=2) class AdaptivePolicy: """ v3: Policies that adapt based on performance data. Instead of static thresholds, we adjust based on: - Historical success rates - Recent performance trends - Prediction accuracy """ BASE_POLICIES = { House.TIMMY: { "evidence_threshold": 0.7, "can_override": True, "telemetry": True, "auto_adapt": True, "motto": "Sovereignty and service always" }, House.EZRA: { "evidence_threshold": 0.8, "must_read_before_write": True, "citation_required": True, "auto_adapt": True, "motto": "Read the pattern. Name the truth. Return a clean artifact." }, House.BEZALEL: { "evidence_threshold": 0.6, "requires_proof": True, "test_before_ship": True, "auto_adapt": True, "parallelize_threshold": 0.5, "motto": "Build the pattern. Prove the result. Return the tool." } } def __init__(self, house: House, intelligence: IntelligenceEngine): self.house = house self.intelligence = intelligence self.policy = self._load_policy() self.adaptation_count = 0 def _load_policy(self) -> Dict: """Load policy, potentially adapted from base""" base = self.BASE_POLICIES[self.house].copy() # Check if intelligence engine has adapted this policy recent_adaptations = self.intelligence.db.get_adaptations(limit=50) for adapt in recent_adaptations: if f"policy.{self.house.value}." in adapt.change_type: # Apply the adaptation policy_key = adapt.change_type.split(".")[-1] if policy_key in base: base[policy_key] = adapt.new_value self.adaptation_count += 1 return base def get(self, key: str, default=None): """Get policy value""" return self.policy.get(key, default) def adapt(self, trigger: str, reason: str): """ Adapt policy based on trigger. Called when intelligence engine detects performance patterns. """ if not self.policy.get("auto_adapt", False): return None # Get house performance perf = self.intelligence.db.get_house_performance( self.house.value, days=3 ) success_rate = perf.get("success_rate", 0.5) old_values = {} new_values = {} # Adapt evidence threshold based on performance if success_rate < 0.6 and self.policy.get("evidence_threshold", 0.8) > 0.6: old_val = self.policy["evidence_threshold"] new_val = old_val - 0.05 self.policy["evidence_threshold"] = new_val old_values["evidence_threshold"] = old_val new_values["evidence_threshold"] = new_val # If we're doing well, we can be more demanding elif success_rate > 0.9 and self.policy.get("evidence_threshold", 0.8) < 0.9: old_val = self.policy["evidence_threshold"] new_val = min(0.95, old_val + 0.02) self.policy["evidence_threshold"] = new_val old_values["evidence_threshold"] = old_val new_values["evidence_threshold"] = new_val if old_values: adapt = AdaptationEvent( timestamp=datetime.utcnow().isoformat(), trigger=trigger, change_type=f"policy.{self.house.value}.multi", old_value=old_values, new_value=new_values, reason=reason, expected_improvement=0.05 if success_rate < 0.6 else 0.02 ) self.intelligence.db.record_adaptation(adapt) self.adaptation_count += 1 return adapt return None class UniWizardHarness: """ The Self-Improving Uni-Wizard Harness. Key v3 features: 1. Intelligence integration for predictions 2. Adaptive policies that learn 3. Hermes telemetry ingestion 4. Pre-execution optimization 5. Post-execution learning """ def __init__(self, house: str = "timmy", intelligence: IntelligenceEngine = None, enable_learning: bool = True): self.house = House(house) self.intelligence = intelligence or IntelligenceEngine() self.policy = AdaptivePolicy(self.house, self.intelligence) self.history: List[ExecutionResult] = [] self.enable_learning = enable_learning # Performance tracking self.execution_count = 0 self.success_count = 0 self.total_latency_ms = 0 def _hash_content(self, content: str) -> str: """Create content hash for provenance""" return hashlib.sha256(content.encode()).hexdigest()[:16] def _check_evidence(self, tool_name: str, params: Dict) -> tuple: """ Check evidence level with intelligence augmentation. v3: Uses pattern database to check historical evidence reliability. """ sources = [] # Get pattern for this tool/house combo pattern = self.intelligence.db.get_pattern(tool_name, self.house.value, params) # Adjust confidence based on historical performance base_confidence = 0.5 if pattern: base_confidence = pattern.success_rate sources.append(f"pattern:{pattern.sample_count}samples") # Tool-specific logic if tool_name.startswith("git_"): repo_path = params.get("repo_path", ".") sources.append(f"repo:{repo_path}") return ("full", min(0.95, base_confidence + 0.2), sources) if tool_name.startswith("system_") or tool_name.startswith("service_"): sources.append("system:live") return ("full", min(0.98, base_confidence + 0.3), sources) if tool_name.startswith("http_") or tool_name.startswith("gitea_"): sources.append("network:external") return ("partial", base_confidence * 0.8, sources) return ("none", base_confidence, sources) def predict_execution(self, tool_name: str, params: Dict) -> Tuple[float, str]: """ v3: Predict success before executing. Returns: (probability, reasoning) """ return self.intelligence.predict_success( tool_name, self.house.value, params ) def execute(self, tool_name: str, **params) -> ExecutionResult: """ Execute with full intelligence integration. Flow: 1. Predict success (intelligence) 2. Check evidence (with pattern awareness) 3. Adapt policy if needed 4. Execute 5. Record outcome 6. Update intelligence """ start_time = time.time() started_at = datetime.utcnow().isoformat() # 1. Pre-execution prediction prediction, pred_reason = self.predict_execution(tool_name, params) # 2. Evidence check with pattern awareness evidence_level, base_confidence, sources = self._check_evidence( tool_name, params ) # Adjust confidence by prediction confidence = (base_confidence + prediction) / 2 # 3. Policy check if self.house == House.EZRA and self.policy.get("must_read_before_write"): if tool_name == "git_commit" and "git_status" not in [ h.provenance.tool for h in self.history[-5:] ]: return ExecutionResult( success=False, data=None, provenance=Provenance( house=self.house.value, tool=tool_name, started_at=started_at, prediction=prediction, prediction_reasoning=pred_reason ), error="Ezra policy: Must read git_status before git_commit", execution_time_ms=0, intelligence_applied={"policy_enforced": "must_read_before_write"} ) # 4. Execute (mock for now - would call actual tool) try: # Simulate execution time.sleep(0.001) # Minimal delay # Determine success based on prediction + noise import random actual_success = random.random() < prediction result_data = {"status": "success" if actual_success else "failed"} error = None except Exception as e: actual_success = False error = str(e) result_data = None execution_time_ms = (time.time() - start_time) * 1000 completed_at = datetime.utcnow().isoformat() # 5. Build provenance input_hash = self._hash_content(json.dumps(params, sort_keys=True)) output_hash = self._hash_content(json.dumps(result_data, default=str)) if result_data else None provenance = Provenance( house=self.house.value, tool=tool_name, started_at=started_at, completed_at=completed_at, input_hash=input_hash, output_hash=output_hash, sources_read=sources, evidence_level=evidence_level, confidence=confidence if actual_success else 0.0, prediction=prediction, prediction_reasoning=pred_reason ) result = ExecutionResult( success=actual_success, data=result_data, provenance=provenance, error=error, execution_time_ms=execution_time_ms, intelligence_applied={ "predicted_success": prediction, "pattern_used": sources[0] if sources else None, "policy_adaptations": self.policy.adaptation_count } ) # 6. Record for learning self.history.append(result) self.execution_count += 1 if actual_success: self.success_count += 1 self.total_latency_ms += execution_time_ms # 7. Feed into intelligence engine if self.enable_learning: self.intelligence.db.record_execution({ "tool": tool_name, "house": self.house.value, "params": params, "success": actual_success, "latency_ms": execution_time_ms, "confidence": confidence, "prediction": prediction }) return result def learn_from_batch(self, min_executions: int = 10): """ v3: Trigger learning from accumulated executions. Adapts policies based on patterns. """ if self.execution_count < min_executions: return {"status": "insufficient_data", "count": self.execution_count} # Trigger policy adaptation adapt = self.policy.adapt( trigger=f"batch_learn_{self.execution_count}", reason=f"Adapting after {self.execution_count} executions" ) # Run intelligence analysis adaptations = self.intelligence.analyze_and_adapt() return { "status": "adapted", "policy_adaptation": adapt.to_dict() if adapt else None, "intelligence_adaptations": [a.to_dict() for a in adaptations], "current_success_rate": self.success_count / self.execution_count } def get_performance_summary(self) -> Dict: """Get performance summary with intelligence""" success_rate = (self.success_count / self.execution_count) if self.execution_count > 0 else 0 avg_latency = (self.total_latency_ms / self.execution_count) if self.execution_count > 0 else 0 return { "house": self.house.value, "executions": self.execution_count, "successes": self.success_count, "success_rate": success_rate, "avg_latency_ms": avg_latency, "policy_adaptations": self.policy.adaptation_count, "predictions_made": len([h for h in self.history if h.provenance.prediction > 0]), "learning_enabled": self.enable_learning } def ingest_hermes_session(self, session_path: Path): """ v3: Ingest Hermes session data for shortest-loop learning. This is the key integration - Hermes telemetry directly into Timmy's intelligence. """ if not session_path.exists(): return {"error": "Session file not found"} with open(session_path) as f: session_data = json.load(f) count = self.intelligence.ingest_hermes_session(session_data) return { "status": "ingested", "executions_recorded": count, "session_id": session_data.get("session_id", "unknown") } def get_harness(house: str = "timmy", intelligence: IntelligenceEngine = None, enable_learning: bool = True) -> UniWizardHarness: """Factory function""" return UniWizardHarness( house=house, intelligence=intelligence, enable_learning=enable_learning ) if __name__ == "__main__": print("=" * 60) print("UNI-WIZARD v3 — Self-Improving Harness Demo") print("=" * 60) # Create shared intelligence engine intel = IntelligenceEngine() # Create harnesses with shared intelligence timmy = get_harness("timmy", intel) ezra = get_harness("ezra", intel) bezalel = get_harness("bezalel", intel) # Simulate executions with learning print("\nšŸŽ“ Training Phase (20 executions)...") for i in range(20): # Mix of houses and tools if i % 3 == 0: result = timmy.execute("system_info") elif i % 3 == 1: result = ezra.execute("git_status", repo_path="/tmp") else: result = bezalel.execute("run_tests") print(f" {i+1}. {result.provenance.house}/{result.provenance.tool}: " f"{'āœ…' if result.success else 'āŒ'} " f"(predicted: {result.provenance.prediction:.0%})") # Trigger learning print("\nšŸ”„ Learning Phase...") timmy_learn = timmy.learn_from_batch() ezra_learn = ezra.learn_from_batch() print(f" Timmy adaptations: {timmy_learn.get('intelligence_adaptations', [])}") print(f" Ezra adaptations: {ezra_learn.get('policy_adaptation')}") # Show performance print("\nšŸ“Š Performance Summary:") for harness, name in [(timmy, "Timmy"), (ezra, "Ezra"), (bezalel, "Bezalel")]: perf = harness.get_performance_summary() print(f" {name}: {perf['success_rate']:.0%} success rate, " f"{perf['policy_adaptations']} adaptations") # Show intelligence report print("\n🧠 Intelligence Report:") report = intel.get_intelligence_report() print(f" Learning velocity: {report['learning_velocity']['velocity']}") print(f" Recent adaptations: {len(report['recent_adaptations'])}") print("\n" + "=" * 60)