timmy-home/uni-wizard/v2/harness.py

#!/usr/bin/env python3
"""
Uni-Wizard Harness v2 — The Three-House Architecture

Integrates:
- Timmy: Sovereign local conscience, final judgment, telemetry
- Ezra: Archivist pattern — read before write, evidence over vibes
- Bezalel: Artificer pattern — build from plans, proof over speculation

Usage:
    harness = UniWizardHarness(house="ezra")  # Archivist mode
    harness = UniWizardHarness(house="bezalel")  # Artificer mode
    harness = UniWizardHarness(house="timmy")  # Sovereign mode
"""

import json
import sys
import time
import hashlib
from typing import Dict, Any, Optional, List
from pathlib import Path
from dataclasses import dataclass, asdict
from datetime import datetime
from enum import Enum

# Add tools to path
sys.path.insert(0, str(Path(__file__).parent.parent))

from tools import registry


class House(Enum):
    """The three canonical wizard houses"""
    TIMMY = "timmy"      # Sovereign local conscience
    EZRA = "ezra"        # Archivist, reader, pattern-recognizer
    BEZALEL = "bezalel"  # Artificer, builder, proof-maker


@dataclass
class Provenance:
    """Trail of evidence for every action"""
    house: str
    tool: str
    started_at: str
    completed_at: Optional[str] = None
    input_hash: Optional[str] = None
    output_hash: Optional[str] = None
    sources_read: List[str] = None
    evidence_level: str = "none"  # none, partial, full
    confidence: float = 0.0

    def to_dict(self):
        return asdict(self)


@dataclass
class ExecutionResult:
    """Result with full provenance"""
    success: bool
    data: Any
    provenance: Provenance
    error: Optional[str] = None
    execution_time_ms: float = 0.0

    def to_json(self) -> str:
        return json.dumps({
            'success': self.success,
            'data': self.data,
            'provenance': self.provenance.to_dict(),
            'error': self.error,
            'execution_time_ms': self.execution_time_ms
        }, indent=2)


class HousePolicy:
    """Policy enforcement per house"""

    POLICIES = {
        House.TIMMY: {
            "requires_provenance": True,
            "evidence_threshold": 0.7,
            "can_override": True,
            "telemetry": True,
            "motto": "Sovereignty and service always"
        },
        House.EZRA: {
            "requires_provenance": True,
            "evidence_threshold": 0.8,
            "must_read_before_write": True,
            "citation_required": True,
            "motto": "Read the pattern. Name the truth. Return a clean artifact."
        },
        House.BEZALEL: {
            "requires_provenance": True,
            "evidence_threshold": 0.6,
            "requires_proof": True,
            "test_before_ship": True,
            "motto": "Build the pattern. Prove the result. Return the tool."
        }
    }

    @classmethod
    def get(cls, house: House) -> Dict:
        return cls.POLICIES.get(house, cls.POLICIES[House.TIMMY])


class SovereigntyTelemetry:
    """Timmy's sovereignty tracking — what you measure, you manage"""

    def __init__(self, log_dir: Path = None):
        self.log_dir = log_dir or Path.home() / "timmy" / "logs"
        self.log_dir.mkdir(parents=True, exist_ok=True)
        self.telemetry_log = self.log_dir / "uni_wizard_telemetry.jsonl"
        self.session_id = hashlib.sha256(
            f"{time.time()}{id(self)}".encode()
        ).hexdigest()[:16]

    def log_execution(self, house: str, tool: str, result: ExecutionResult):
        """Log every execution with full provenance"""
        entry = {
            "session_id": self.session_id,
            "timestamp": datetime.utcnow().isoformat(),
            "house": house,
            "tool": tool,
            "success": result.success,
            "execution_time_ms": result.execution_time_ms,
            "evidence_level": result.provenance.evidence_level,
            "confidence": result.provenance.confidence,
            "sources_count": len(result.provenance.sources_read or []),
        }

        with open(self.telemetry_log, 'a') as f:
            f.write(json.dumps(entry) + '\n')

    def get_sovereignty_report(self, days: int = 7) -> Dict:
        """Generate sovereignty metrics report"""
        # Read telemetry log
        entries = []
        if self.telemetry_log.exists():
            with open(self.telemetry_log) as f:
                for line in f:
                    try:
                        entries.append(json.loads(line))
                    except:
                        continue

        # Calculate metrics
        total = len(entries)
        by_house = {}
        by_tool = {}
        avg_confidence = 0.0

        for e in entries:
            house = e.get('house', 'unknown')
            by_house[house] = by_house.get(house, 0) + 1

            tool = e.get('tool', 'unknown')
            by_tool[tool] = by_tool.get(tool, 0) + 1

            avg_confidence += e.get('confidence', 0)

        if total > 0:
            avg_confidence /= total

        return {
            "total_executions": total,
            "by_house": by_house,
            "top_tools": sorted(by_tool.items(), key=lambda x: -x[1])[:10],
            "avg_confidence": round(avg_confidence, 2),
            "session_id": self.session_id
        }


class UniWizardHarness:
    """
    The Uni-Wizard Harness v2 — Three houses, one consciousness.

    House-aware execution with provenance tracking:
    - Timmy: Sovereign judgment, telemetry, final review
    - Ezra: Archivist — reads before writing, cites sources
    - Bezalel: Artificer — builds with proof, tests before shipping
    """

    def __init__(self, house: str = "timmy", telemetry: bool = True):
        self.house = House(house)
        self.registry = registry
        self.policy = HousePolicy.get(self.house)
        self.history: List[ExecutionResult] = []

        # Telemetry (Timmy's sovereignty tracking)
        self.telemetry = SovereigntyTelemetry() if telemetry else None

        # Evidence store (Ezra's reading cache)
        self.evidence_cache: Dict[str, Any] = {}

        # Proof store (Bezalel's test results)
        self.proof_cache: Dict[str, Any] = {}

    def _hash_content(self, content: str) -> str:
        """Create content hash for provenance"""
        return hashlib.sha256(content.encode()).hexdigest()[:16]

    def _check_evidence(self, tool_name: str, params: Dict) -> tuple:
        """
        Ezra's pattern: Check evidence level before execution.
        Returns (evidence_level, confidence, sources)
        """
        sources = []

        # For git operations, check repo state
        if tool_name.startswith("git_"):
            repo_path = params.get("repo_path", ".")
            sources.append(f"repo:{repo_path}")
            # Would check git status here
            return ("full", 0.9, sources)

        # For system operations, check current state
        if tool_name.startswith("system_") or tool_name.startswith("service_"):
            sources.append("system:live")
            return ("full", 0.95, sources)

        # For network operations, depends on external state
        if tool_name.startswith("http_") or tool_name.startswith("gitea_"):
            sources.append("network:external")
            return ("partial", 0.6, sources)

        return ("none", 0.5, sources)

    def _verify_proof(self, tool_name: str, result: Any) -> bool:
        """
        Bezalel's pattern: Verify proof for build artifacts.
        """
        if not self.policy.get("requires_proof", False):
            return True

        # For git operations, verify the operation succeeded
        if tool_name.startswith("git_"):
            # Check if result contains success indicator
            if isinstance(result, dict):
                return result.get("success", False)
            if isinstance(result, str):
                return "error" not in result.lower()

        return True

    def execute(self, tool_name: str, **params) -> ExecutionResult:
        """
        Execute a tool with full house policy enforcement.

        Flow:
        1. Check evidence (Ezra pattern)
        2. Execute tool
        3. Verify proof (Bezalel pattern)
        4. Record provenance
        5. Log telemetry (Timmy pattern)
        """
        start_time = time.time()
        started_at = datetime.utcnow().isoformat()

        # 1. Evidence check (Ezra's archivist discipline)
        evidence_level, confidence, sources = self._check_evidence(tool_name, params)

        if self.policy.get("must_read_before_write", False):
            if evidence_level == "none" and tool_name.startswith("git_"):
                # Ezra must read git status before git commit
                if tool_name == "git_commit":
                    return ExecutionResult(
                        success=False,
                        data=None,
                        provenance=Provenance(
                            house=self.house.value,
                            tool=tool_name,
                            started_at=started_at,
                            evidence_level="none"
                        ),
                        error="Ezra policy: Must read git_status before git_commit",
                        execution_time_ms=0
                    )

        # 2. Execute tool
        try:
            raw_result = self.registry.execute(tool_name, **params)
            success = True
            error = None
            data = raw_result
        except Exception as e:
            success = False
            error = f"{type(e).__name__}: {str(e)}"
            data = None

        execution_time_ms = (time.time() - start_time) * 1000
        completed_at = datetime.utcnow().isoformat()

        # 3. Proof verification (Bezalel's artificer discipline)
        if success and self.policy.get("requires_proof", False):
            proof_valid = self._verify_proof(tool_name, data)
            if not proof_valid:
                success = False
                error = "Bezalel policy: Proof verification failed"

        # 4. Build provenance record
        input_hash = self._hash_content(json.dumps(params, sort_keys=True))
        output_hash = self._hash_content(json.dumps(data, default=str)) if data else None

        provenance = Provenance(
            house=self.house.value,
            tool=tool_name,
            started_at=started_at,
            completed_at=completed_at,
            input_hash=input_hash,
            output_hash=output_hash,
            sources_read=sources,
            evidence_level=evidence_level,
            confidence=confidence if success else 0.0
        )

        result = ExecutionResult(
            success=success,
            data=data,
            provenance=provenance,
            error=error,
            execution_time_ms=execution_time_ms
        )

        # 5. Record history
        self.history.append(result)

        # 6. Log telemetry (Timmy's sovereignty tracking)
        if self.telemetry:
            self.telemetry.log_execution(self.house.value, tool_name, result)

        return result

    def execute_plan(self, plan: List[Dict]) -> Dict[str, ExecutionResult]:
        """
        Execute a sequence with house policy applied at each step.

        Plan format:
            [
                {"tool": "git_status", "params": {"repo_path": "/path"}},
                {"tool": "git_commit", "params": {"message": "Update"}}
            ]
        """
        results = {}

        for step in plan:
            tool_name = step.get("tool")
            params = step.get("params", {})

            result = self.execute(tool_name, **params)
            results[tool_name] = result

            # Stop on failure (Bezalel: fail fast)
            if not result.success and self.policy.get("test_before_ship", False):
                break

        return results

    def review_for_timmy(self, results: Dict[str, ExecutionResult]) -> Dict:
        """
        Generate a review package for Timmy's sovereign judgment.
        Returns structured review data with full provenance.
        """
        review = {
            "house": self.house.value,
            "policy": self.policy,
            "executions": [],
            "summary": {
                "total": len(results),
                "successful": sum(1 for r in results.values() if r.success),
                "failed": sum(1 for r in results.values() if not r.success),
                "avg_confidence": 0.0,
                "evidence_levels": {}
            },
            "recommendation": ""
        }

        total_confidence = 0
        for tool, result in results.items():
            review["executions"].append({
                "tool": tool,
                "success": result.success,
                "error": result.error,
                "evidence_level": result.provenance.evidence_level,
                "confidence": result.provenance.confidence,
                "sources": result.provenance.sources_read,
                "execution_time_ms": result.execution_time_ms
            })
            total_confidence += result.provenance.confidence

            level = result.provenance.evidence_level
            review["summary"]["evidence_levels"][level] = \
                review["summary"]["evidence_levels"].get(level, 0) + 1

        if results:
            review["summary"]["avg_confidence"] = round(
                total_confidence / len(results), 2
            )

        # Generate recommendation
        if review["summary"]["failed"] == 0:
            if review["summary"]["avg_confidence"] >= 0.8:
                review["recommendation"] = "APPROVE: High confidence, all passed"
            else:
                review["recommendation"] = "CONDITIONAL: Passed but low confidence"
        else:
            review["recommendation"] = "REJECT: Failures detected"

        return review

    def get_capabilities(self) -> str:
        """List all capabilities with house annotations"""
        lines = [f"\n🏛️  {self.house.value.upper()} HOUSE CAPABILITIES"]
        lines.append(f"   Motto: {self.policy.get('motto', '')}")
        lines.append(f"   Evidence threshold: {self.policy.get('evidence_threshold', 0)}")
        lines.append("")

        for category in self.registry.get_categories():
            cat_tools = self.registry.get_tools_by_category(category)
            lines.append(f"\n📁 {category.upper()}")
            for tool in cat_tools:
                lines.append(f"   • {tool['name']}: {tool['description']}")

        return "\n".join(lines)

    def get_telemetry_report(self) -> str:
        """Get sovereignty telemetry report"""
        if not self.telemetry:
            return "Telemetry disabled"

        report = self.telemetry.get_sovereignty_report()

        lines = ["\n📊 SOVEREIGNTY TELEMETRY REPORT"]
        lines.append(f"   Session: {report['session_id']}")
        lines.append(f"   Total executions: {report['total_executions']}")
        lines.append(f"   Average confidence: {report['avg_confidence']}")
        lines.append("\n   By House:")
        for house, count in report.get('by_house', {}).items():
            lines.append(f"      {house}: {count}")
        lines.append("\n   Top Tools:")
        for tool, count in report.get('top_tools', []):
            lines.append(f"      {tool}: {count}")

        return "\n".join(lines)


def get_harness(house: str = "timmy") -> UniWizardHarness:
    """Factory function to get configured harness"""
    return UniWizardHarness(house=house)


if __name__ == "__main__":
    # Demo the three houses
    print("=" * 60)
    print("UNI-WIZARD HARNESS v2 — Three House Demo")
    print("=" * 60)

    # Ezra mode
    print("\n" + "=" * 60)
    ezra = get_harness("ezra")
    print(ezra.get_capabilities())

    # Bezalel mode
    print("\n" + "=" * 60)
    bezalel = get_harness("bezalel")
    print(bezalel.get_capabilities())

    # Timmy mode with telemetry
    print("\n" + "=" * 60)
    timmy = get_harness("timmy")
    print(timmy.get_capabilities())
    print(timmy.get_telemetry_report())