timmy-home/uni-wizard/v3/harness.py

#!/usr/bin/env python3
"""
Uni-Wizard Harness v3 — Self-Improving Sovereign Intelligence

Integrates:
- Intelligence Engine: Pattern recognition, adaptation, prediction
- Hermes Telemetry: Shortest-loop feedback from session data
- Adaptive Policies: Houses learn from outcomes
- Predictive Routing: Pre-execution optimization

Key improvement over v2:
Telemetry → Analysis → Behavior Change (closed loop)
"""

import json
import sys
import time
import hashlib
from typing import Dict, Any, Optional, List, Tuple
from pathlib import Path
from dataclasses import dataclass, asdict
from datetime import datetime
from enum import Enum

# Add parent to path
sys.path.insert(0, str(Path(__file__).parent))

from intelligence_engine import (
    IntelligenceEngine, PatternDatabase,
    ExecutionPattern, AdaptationEvent
)


class House(Enum):
    """The three canonical wizard houses"""
    TIMMY = "timmy"      # Sovereign local conscience
    EZRA = "ezra"        # Archivist, reader, pattern-recognizer
    BEZALEL = "bezalel"  # Artificer, builder, proof-maker


@dataclass
class Provenance:
    """Trail of evidence for every action"""
    house: str
    tool: str
    started_at: str
    completed_at: Optional[str] = None
    input_hash: Optional[str] = None
    output_hash: Optional[str] = None
    sources_read: List[str] = None
    evidence_level: str = "none"
    confidence: float = 0.0
    prediction: float = 0.0  # v3: predicted success rate
    prediction_reasoning: str = ""  # v3: why we predicted this

    def to_dict(self):
        return asdict(self)


@dataclass
class ExecutionResult:
    """Result with full provenance and intelligence"""
    success: bool
    data: Any
    provenance: Provenance
    error: Optional[str] = None
    execution_time_ms: float = 0.0
    intelligence_applied: Dict = None  # v3: what intelligence was used

    def to_json(self) -> str:
        return json.dumps({
            'success': self.success,
            'data': self.data,
            'provenance': self.provenance.to_dict(),
            'error': self.error,
            'execution_time_ms': self.execution_time_ms,
            'intelligence_applied': self.intelligence_applied
        }, indent=2)


class AdaptivePolicy:
    """
    v3: Policies that adapt based on performance data.

    Instead of static thresholds, we adjust based on:
    - Historical success rates
    - Recent performance trends
    - Prediction accuracy
    """

    BASE_POLICIES = {
        House.TIMMY: {
            "evidence_threshold": 0.7,
            "can_override": True,
            "telemetry": True,
            "auto_adapt": True,
            "motto": "Sovereignty and service always"
        },
        House.EZRA: {
            "evidence_threshold": 0.8,
            "must_read_before_write": True,
            "citation_required": True,
            "auto_adapt": True,
            "motto": "Read the pattern. Name the truth. Return a clean artifact."
        },
        House.BEZALEL: {
            "evidence_threshold": 0.6,
            "requires_proof": True,
            "test_before_ship": True,
            "auto_adapt": True,
            "parallelize_threshold": 0.5,
            "motto": "Build the pattern. Prove the result. Return the tool."
        }
    }

    def __init__(self, house: House, intelligence: IntelligenceEngine):
        self.house = house
        self.intelligence = intelligence
        self.policy = self._load_policy()
        self.adaptation_count = 0

    def _load_policy(self) -> Dict:
        """Load policy, potentially adapted from base"""
        base = self.BASE_POLICIES[self.house].copy()

        # Check if intelligence engine has adapted this policy
        recent_adaptations = self.intelligence.db.get_adaptations(limit=50)
        for adapt in recent_adaptations:
            if f"policy.{self.house.value}." in adapt.change_type:
                # Apply the adaptation
                policy_key = adapt.change_type.split(".")[-1]
                if policy_key in base:
                    base[policy_key] = adapt.new_value
                    self.adaptation_count += 1

        return base

    def get(self, key: str, default=None):
        """Get policy value"""
        return self.policy.get(key, default)

    def adapt(self, trigger: str, reason: str):
        """
        Adapt policy based on trigger.

        Called when intelligence engine detects performance patterns.
        """
        if not self.policy.get("auto_adapt", False):
            return None

        # Get house performance
        perf = self.intelligence.db.get_house_performance(
            self.house.value, days=3
        )
        success_rate = perf.get("success_rate", 0.5)

        old_values = {}
        new_values = {}

        # Adapt evidence threshold based on performance
        if success_rate < 0.6 and self.policy.get("evidence_threshold", 0.8) > 0.6:
            old_val = self.policy["evidence_threshold"]
            new_val = old_val - 0.05
            self.policy["evidence_threshold"] = new_val
            old_values["evidence_threshold"] = old_val
            new_values["evidence_threshold"] = new_val

        # If we're doing well, we can be more demanding
        elif success_rate > 0.9 and self.policy.get("evidence_threshold", 0.8) < 0.9:
            old_val = self.policy["evidence_threshold"]
            new_val = min(0.95, old_val + 0.02)
            self.policy["evidence_threshold"] = new_val
            old_values["evidence_threshold"] = old_val
            new_values["evidence_threshold"] = new_val

        if old_values:
            adapt = AdaptationEvent(
                timestamp=datetime.utcnow().isoformat(),
                trigger=trigger,
                change_type=f"policy.{self.house.value}.multi",
                old_value=old_values,
                new_value=new_values,
                reason=reason,
                expected_improvement=0.05 if success_rate < 0.6 else 0.02
            )
            self.intelligence.db.record_adaptation(adapt)
            self.adaptation_count += 1
            return adapt

        return None


class UniWizardHarness:
    """
    The Self-Improving Uni-Wizard Harness.

    Key v3 features:
    1. Intelligence integration for predictions
    2. Adaptive policies that learn
    3. Hermes telemetry ingestion
    4. Pre-execution optimization
    5. Post-execution learning
    """

    def __init__(self, house: str = "timmy",
                 intelligence: IntelligenceEngine = None,
                 enable_learning: bool = True):
        self.house = House(house)
        self.intelligence = intelligence or IntelligenceEngine()
        self.policy = AdaptivePolicy(self.house, self.intelligence)
        self.history: List[ExecutionResult] = []
        self.enable_learning = enable_learning

        # Performance tracking
        self.execution_count = 0
        self.success_count = 0
        self.total_latency_ms = 0

    def _hash_content(self, content: str) -> str:
        """Create content hash for provenance"""
        return hashlib.sha256(content.encode()).hexdigest()[:16]

    def _check_evidence(self, tool_name: str, params: Dict) -> tuple:
        """
        Check evidence level with intelligence augmentation.

        v3: Uses pattern database to check historical evidence reliability.
        """
        sources = []

        # Get pattern for this tool/house combo
        pattern = self.intelligence.db.get_pattern(tool_name, self.house.value, params)

        # Adjust confidence based on historical performance
        base_confidence = 0.5
        if pattern:
            base_confidence = pattern.success_rate
            sources.append(f"pattern:{pattern.sample_count}samples")

        # Tool-specific logic
        if tool_name.startswith("git_"):
            repo_path = params.get("repo_path", ".")
            sources.append(f"repo:{repo_path}")
            return ("full", min(0.95, base_confidence + 0.2), sources)

        if tool_name.startswith("system_") or tool_name.startswith("service_"):
            sources.append("system:live")
            return ("full", min(0.98, base_confidence + 0.3), sources)

        if tool_name.startswith("http_") or tool_name.startswith("gitea_"):
            sources.append("network:external")
            return ("partial", base_confidence * 0.8, sources)

        return ("none", base_confidence, sources)

    def predict_execution(self, tool_name: str, params: Dict) -> Tuple[float, str]:
        """
        v3: Predict success before executing.

        Returns: (probability, reasoning)
        """
        return self.intelligence.predict_success(
            tool_name, self.house.value, params
        )

    def execute(self, tool_name: str, **params) -> ExecutionResult:
        """
        Execute with full intelligence integration.

        Flow:
        1. Predict success (intelligence)
        2. Check evidence (with pattern awareness)
        3. Adapt policy if needed
        4. Execute
        5. Record outcome
        6. Update intelligence
        """
        start_time = time.time()
        started_at = datetime.utcnow().isoformat()

        # 1. Pre-execution prediction
        prediction, pred_reason = self.predict_execution(tool_name, params)

        # 2. Evidence check with pattern awareness
        evidence_level, base_confidence, sources = self._check_evidence(
            tool_name, params
        )

        # Adjust confidence by prediction
        confidence = (base_confidence + prediction) / 2

        # 3. Policy check
        if self.house == House.EZRA and self.policy.get("must_read_before_write"):
            if tool_name == "git_commit" and "git_status" not in [
                h.provenance.tool for h in self.history[-5:]
            ]:
                return ExecutionResult(
                    success=False,
                    data=None,
                    provenance=Provenance(
                        house=self.house.value,
                        tool=tool_name,
                        started_at=started_at,
                        prediction=prediction,
                        prediction_reasoning=pred_reason
                    ),
                    error="Ezra policy: Must read git_status before git_commit",
                    execution_time_ms=0,
                    intelligence_applied={"policy_enforced": "must_read_before_write"}
                )

        # 4. Execute (mock for now - would call actual tool)
        try:
            # Simulate execution
            time.sleep(0.001)  # Minimal delay

            # Determine success based on prediction + noise
            import random
            actual_success = random.random() < prediction

            result_data = {"status": "success" if actual_success else "failed"}
            error = None

        except Exception as e:
            actual_success = False
            error = str(e)
            result_data = None

        execution_time_ms = (time.time() - start_time) * 1000
        completed_at = datetime.utcnow().isoformat()

        # 5. Build provenance
        input_hash = self._hash_content(json.dumps(params, sort_keys=True))
        output_hash = self._hash_content(json.dumps(result_data, default=str)) if result_data else None

        provenance = Provenance(
            house=self.house.value,
            tool=tool_name,
            started_at=started_at,
            completed_at=completed_at,
            input_hash=input_hash,
            output_hash=output_hash,
            sources_read=sources,
            evidence_level=evidence_level,
            confidence=confidence if actual_success else 0.0,
            prediction=prediction,
            prediction_reasoning=pred_reason
        )

        result = ExecutionResult(
            success=actual_success,
            data=result_data,
            provenance=provenance,
            error=error,
            execution_time_ms=execution_time_ms,
            intelligence_applied={
                "predicted_success": prediction,
                "pattern_used": sources[0] if sources else None,
                "policy_adaptations": self.policy.adaptation_count
            }
        )

        # 6. Record for learning
        self.history.append(result)
        self.execution_count += 1
        if actual_success:
            self.success_count += 1
        self.total_latency_ms += execution_time_ms

        # 7. Feed into intelligence engine
        if self.enable_learning:
            self.intelligence.db.record_execution({
                "tool": tool_name,
                "house": self.house.value,
                "params": params,
                "success": actual_success,
                "latency_ms": execution_time_ms,
                "confidence": confidence,
                "prediction": prediction
            })

        return result

    def learn_from_batch(self, min_executions: int = 10):
        """
        v3: Trigger learning from accumulated executions.

        Adapts policies based on patterns.
        """
        if self.execution_count < min_executions:
            return {"status": "insufficient_data", "count": self.execution_count}

        # Trigger policy adaptation
        adapt = self.policy.adapt(
            trigger=f"batch_learn_{self.execution_count}",
            reason=f"Adapting after {self.execution_count} executions"
        )

        # Run intelligence analysis
        adaptations = self.intelligence.analyze_and_adapt()

        return {
            "status": "adapted",
            "policy_adaptation": adapt.to_dict() if adapt else None,
            "intelligence_adaptations": [a.to_dict() for a in adaptations],
            "current_success_rate": self.success_count / self.execution_count
        }

    def get_performance_summary(self) -> Dict:
        """Get performance summary with intelligence"""
        success_rate = (self.success_count / self.execution_count) if self.execution_count > 0 else 0
        avg_latency = (self.total_latency_ms / self.execution_count) if self.execution_count > 0 else 0

        return {
            "house": self.house.value,
            "executions": self.execution_count,
            "successes": self.success_count,
            "success_rate": success_rate,
            "avg_latency_ms": avg_latency,
            "policy_adaptations": self.policy.adaptation_count,
            "predictions_made": len([h for h in self.history if h.provenance.prediction > 0]),
            "learning_enabled": self.enable_learning
        }

    def ingest_hermes_session(self, session_path: Path):
        """
        v3: Ingest Hermes session data for shortest-loop learning.

        This is the key integration - Hermes telemetry directly into
        Timmy's intelligence.
        """
        if not session_path.exists():
            return {"error": "Session file not found"}

        with open(session_path) as f:
            session_data = json.load(f)

        count = self.intelligence.ingest_hermes_session(session_data)

        return {
            "status": "ingested",
            "executions_recorded": count,
            "session_id": session_data.get("session_id", "unknown")
        }


def get_harness(house: str = "timmy",
                intelligence: IntelligenceEngine = None,
                enable_learning: bool = True) -> UniWizardHarness:
    """Factory function"""
    return UniWizardHarness(
        house=house,
        intelligence=intelligence,
        enable_learning=enable_learning
    )


if __name__ == "__main__":
    print("=" * 60)
    print("UNI-WIZARD v3 — Self-Improving Harness Demo")
    print("=" * 60)

    # Create shared intelligence engine
    intel = IntelligenceEngine()

    # Create harnesses with shared intelligence
    timmy = get_harness("timmy", intel)
    ezra = get_harness("ezra", intel)
    bezalel = get_harness("bezalel", intel)

    # Simulate executions with learning
    print("\n🎓 Training Phase (20 executions)...")
    for i in range(20):
        # Mix of houses and tools
        if i % 3 == 0:
            result = timmy.execute("system_info")
        elif i % 3 == 1:
            result = ezra.execute("git_status", repo_path="/tmp")
        else:
            result = bezalel.execute("run_tests")

        print(f"  {i+1}. {result.provenance.house}/{result.provenance.tool}: "
              f"{'✅' if result.success else '❌'} "
              f"(predicted: {result.provenance.prediction:.0%})")

    # Trigger learning
    print("\n🔄 Learning Phase...")
    timmy_learn = timmy.learn_from_batch()
    ezra_learn = ezra.learn_from_batch()

    print(f"  Timmy adaptations: {timmy_learn.get('intelligence_adaptations', [])}")
    print(f"  Ezra adaptations: {ezra_learn.get('policy_adaptation')}")

    # Show performance
    print("\n📊 Performance Summary:")
    for harness, name in [(timmy, "Timmy"), (ezra, "Ezra"), (bezalel, "Bezalel")]:
        perf = harness.get_performance_summary()
        print(f"  {name}: {perf['success_rate']:.0%} success rate, "
              f"{perf['policy_adaptations']} adaptations")

    # Show intelligence report
    print("\n🧠 Intelligence Report:")
    report = intel.get_intelligence_report()
    print(f"  Learning velocity: {report['learning_velocity']['velocity']}")
    print(f"  Recent adaptations: {len(report['recent_adaptations'])}")

    print("\n" + "=" * 60)