timmy-home/uni-wizard/v3/intelligence_engine.py

#!/usr/bin/env python3
"""
Intelligence Engine v3 — Self-Improving Local Sovereignty

The feedback loop that makes Timmy smarter:
1. INGEST: Pull telemetry from Hermes, houses, all sources
2. ANALYZE: Pattern recognition on success/failure/latency
3. ADAPT: Adjust policies, routing, predictions
4. PREDICT: Pre-fetch, pre-route, optimize before execution

Key principle: Every execution teaches. Every pattern informs next decision.
"""

import json
import sqlite3
import time
import hashlib
from typing import Dict, List, Any, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass, asdict
from datetime import datetime, timedelta
from collections import defaultdict
import statistics


@dataclass
class ExecutionPattern:
    """Pattern extracted from execution history"""
    tool: str
    param_signature: str  # hashed params pattern
    house: str
    model: str  # which model was used
    success_rate: float
    avg_latency_ms: float
    avg_confidence: float
    sample_count: int
    last_executed: str

    def to_dict(self):
        return asdict(self)


@dataclass
class ModelPerformance:
    """Performance metrics for a model on task types"""
    model: str
    task_type: str
    total_calls: int
    success_count: int
    success_rate: float
    avg_latency_ms: float
    avg_tokens: float
    cost_per_call: float
    last_used: str


@dataclass
class AdaptationEvent:
    """Record of a policy/system adaptation"""
    timestamp: str
    trigger: str  # what caused the adaptation
    change_type: str  # policy, routing, cache, etc
    old_value: Any
    new_value: Any
    reason: str
    expected_improvement: float


class PatternDatabase:
    """
    Local SQLite database for execution patterns.

    Tracks:
    - Tool + params → success rate
    - House + task → performance
    - Model + task type → best choice
    - Time-based patterns (hour of day effects)
    """

    def __init__(self, db_path: Path = None):
        self.db_path = db_path or Path.home() / ".timmy" / "intelligence.db"
        self.db_path.parent.mkdir(parents=True, exist_ok=True)
        self._init_db()

    def _init_db(self):
        """Initialize database with performance tracking tables"""
        conn = sqlite3.connect(str(self.db_path))

        # Execution outcomes with full context
        conn.execute("""
            CREATE TABLE IF NOT EXISTS executions (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                timestamp REAL NOT NULL,
                tool TEXT NOT NULL,
                param_hash TEXT NOT NULL,
                house TEXT NOT NULL,
                model TEXT,
                task_type TEXT,
                success INTEGER NOT NULL,
                latency_ms REAL,
                confidence REAL,
                tokens_in INTEGER,
                tokens_out INTEGER,
                error_type TEXT,
                hour_of_day INTEGER,
                day_of_week INTEGER
            )
        """)

        # Aggregated patterns (updated continuously)
        conn.execute("""
            CREATE TABLE IF NOT EXISTS patterns (
                tool TEXT NOT NULL,
                param_signature TEXT NOT NULL,
                house TEXT NOT NULL,
                model TEXT,
                success_count INTEGER DEFAULT 0,
                failure_count INTEGER DEFAULT 0,
                total_latency_ms REAL DEFAULT 0,
                total_confidence REAL DEFAULT 0,
                sample_count INTEGER DEFAULT 0,
                last_updated REAL,
                PRIMARY KEY (tool, param_signature, house, model)
            )
        """)

        # Model performance by task type
        conn.execute("""
            CREATE TABLE IF NOT EXISTS model_performance (
                model TEXT NOT NULL,
                task_type TEXT NOT NULL,
                total_calls INTEGER DEFAULT 0,
                success_count INTEGER DEFAULT 0,
                total_latency_ms REAL DEFAULT 0,
                total_tokens INTEGER DEFAULT 0,
                last_used REAL,
                PRIMARY KEY (model, task_type)
            )
        """)

        # Adaptation history (how we've changed)
        conn.execute("""
            CREATE TABLE IF NOT EXISTS adaptations (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                timestamp REAL NOT NULL,
                trigger TEXT NOT NULL,
                change_type TEXT NOT NULL,
                old_value TEXT,
                new_value TEXT,
                reason TEXT,
                expected_improvement REAL
            )
        """)

        # Performance predictions (for validation)
        conn.execute("""
            CREATE TABLE IF NOT EXISTS predictions (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                timestamp REAL NOT NULL,
                tool TEXT NOT NULL,
                house TEXT NOT NULL,
                predicted_success_rate REAL,
                actual_success INTEGER,
                prediction_accuracy REAL
            )
        """)

        conn.execute("CREATE INDEX IF NOT EXISTS idx_exec_tool ON executions(tool)")
        conn.execute("CREATE INDEX IF NOT EXISTS idx_exec_time ON executions(timestamp)")
        conn.execute("CREATE INDEX IF NOT EXISTS idx_patterns_tool ON patterns(tool)")

        conn.commit()
        conn.close()

    def record_execution(self, data: Dict):
        """Record a single execution outcome"""
        conn = sqlite3.connect(str(self.db_path))
        now = time.time()
        dt = datetime.fromtimestamp(now)

        # Extract fields
        tool = data.get("tool", "unknown")
        params = data.get("params", {})
        param_hash = hashlib.sha256(
            json.dumps(params, sort_keys=True).encode()
        ).hexdigest()[:16]

        conn.execute("""
            INSERT INTO executions
            (timestamp, tool, param_hash, house, model, task_type, success,
             latency_ms, confidence, tokens_in, tokens_out, error_type,
             hour_of_day, day_of_week)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (
            now, tool, param_hash, data.get("house", "timmy"),
            data.get("model"), data.get("task_type"),
            1 if data.get("success") else 0,
            data.get("latency_ms"), data.get("confidence"),
            data.get("tokens_in"), data.get("tokens_out"),
            data.get("error_type"),
            dt.hour, dt.weekday()
        ))

        # Update aggregated patterns
        self._update_pattern(conn, tool, param_hash, data)

        # Update model performance
        if data.get("model"):
            self._update_model_performance(conn, data)

        conn.commit()
        conn.close()

    def _update_pattern(self, conn: sqlite3.Connection, tool: str,
                        param_hash: str, data: Dict):
        """Update aggregated pattern for this tool/params/house/model combo"""
        house = data.get("house", "timmy")
        model = data.get("model", "unknown")
        success = 1 if data.get("success") else 0
        latency = data.get("latency_ms", 0)
        confidence = data.get("confidence", 0)

        # Try to update existing
        result = conn.execute("""
            SELECT success_count, failure_count, total_latency_ms,
                   total_confidence, sample_count
            FROM patterns
            WHERE tool=? AND param_signature=? AND house=? AND model=?
        """, (tool, param_hash, house, model)).fetchone()

        if result:
            succ, fail, total_lat, total_conf, samples = result
            conn.execute("""
                UPDATE patterns SET
                    success_count = ?,
                    failure_count = ?,
                    total_latency_ms = ?,
                    total_confidence = ?,
                    sample_count = ?,
                    last_updated = ?
                WHERE tool=? AND param_signature=? AND house=? AND model=?
            """, (
                succ + success, fail + (1 - success),
                total_lat + latency, total_conf + confidence,
                samples + 1, time.time(),
                tool, param_hash, house, model
            ))
        else:
            conn.execute("""
                INSERT INTO patterns
                (tool, param_signature, house, model, success_count, failure_count,
                 total_latency_ms, total_confidence, sample_count, last_updated)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            """, (tool, param_hash, house, model,
                  success, 1 - success, latency, confidence, 1, time.time()))

    def _update_model_performance(self, conn: sqlite3.Connection, data: Dict):
        """Update model performance tracking"""
        model = data.get("model")
        task_type = data.get("task_type", "unknown")
        success = 1 if data.get("success") else 0
        latency = data.get("latency_ms", 0)
        tokens = (data.get("tokens_in", 0) or 0) + (data.get("tokens_out", 0) or 0)

        result = conn.execute("""
            SELECT total_calls, success_count, total_latency_ms, total_tokens
            FROM model_performance
            WHERE model=? AND task_type=?
        """, (model, task_type)).fetchone()

        if result:
            total, succ, total_lat, total_tok = result
            conn.execute("""
                UPDATE model_performance SET
                    total_calls = ?,
                    success_count = ?,
                    total_latency_ms = ?,
                    total_tokens = ?,
                    last_used = ?
                WHERE model=? AND task_type=?
            """, (total + 1, succ + success, total_lat + latency,
                  total_tok + tokens, time.time(), model, task_type))
        else:
            conn.execute("""
                INSERT INTO model_performance
                (model, task_type, total_calls, success_count,
                 total_latency_ms, total_tokens, last_used)
                VALUES (?, ?, ?, ?, ?, ?, ?)
            """, (model, task_type, 1, success, latency, tokens, time.time()))

    def get_pattern(self, tool: str, house: str,
                    params: Dict = None) -> Optional[ExecutionPattern]:
        """Get pattern for tool/house/params combination"""
        conn = sqlite3.connect(str(self.db_path))

        if params:
            param_hash = hashlib.sha256(
                json.dumps(params, sort_keys=True).encode()
            ).hexdigest()[:16]
            result = conn.execute("""
                SELECT param_signature, house, model,
                       success_count, failure_count, total_latency_ms,
                       total_confidence, sample_count, last_updated
                FROM patterns
                WHERE tool=? AND param_signature=? AND house=?
                ORDER BY sample_count DESC
                LIMIT 1
            """, (tool, param_hash, house)).fetchone()
        else:
            # Get aggregate across all params
            result = conn.execute("""
                SELECT 'aggregate' as param_signature, house, model,
                       SUM(success_count), SUM(failure_count), SUM(total_latency_ms),
                       SUM(total_confidence), SUM(sample_count), MAX(last_updated)
                FROM patterns
                WHERE tool=? AND house=?
                GROUP BY house, model
                ORDER BY sample_count DESC
                LIMIT 1
            """, (tool, house)).fetchone()

        conn.close()

        if not result:
            return None

        (param_sig, h, model, succ, fail, total_lat,
         total_conf, samples, last_updated) = result

        total = succ + fail
        success_rate = succ / total if total > 0 else 0.5
        avg_lat = total_lat / samples if samples > 0 else 0
        avg_conf = total_conf / samples if samples > 0 else 0.5

        return ExecutionPattern(
            tool=tool,
            param_signature=param_sig,
            house=h,
            model=model or "unknown",
            success_rate=success_rate,
            avg_latency_ms=avg_lat,
            avg_confidence=avg_conf,
            sample_count=samples,
            last_executed=datetime.fromtimestamp(last_updated).isoformat()
        )

    def get_best_model(self, task_type: str, min_samples: int = 5) -> Optional[str]:
        """Get best performing model for task type"""
        conn = sqlite3.connect(str(self.db_path))

        result = conn.execute("""
            SELECT model, total_calls, success_count, total_latency_ms
            FROM model_performance
            WHERE task_type=? AND total_calls >= ?
            ORDER BY (CAST(success_count AS REAL) / total_calls) DESC,
                     (total_latency_ms / total_calls) ASC
            LIMIT 1
        """, (task_type, min_samples)).fetchone()

        conn.close()

        return result[0] if result else None

    def get_house_performance(self, house: str, days: int = 7) -> Dict:
        """Get performance metrics for a house"""
        conn = sqlite3.connect(str(self.db_path))
        cutoff = time.time() - (days * 86400)

        result = conn.execute("""
            SELECT
                COUNT(*) as total,
                SUM(success) as successes,
                AVG(latency_ms) as avg_latency,
                AVG(confidence) as avg_confidence
            FROM executions
            WHERE house=? AND timestamp > ?
        """, (house, cutoff)).fetchone()

        conn.close()

        total, successes, avg_lat, avg_conf = result

        return {
            "house": house,
            "period_days": days,
            "total_executions": total or 0,
            "successes": successes or 0,
            "success_rate": (successes / total) if total else 0,
            "avg_latency_ms": avg_lat or 0,
            "avg_confidence": avg_conf or 0
        }

    def record_adaptation(self, event: AdaptationEvent):
        """Record a system adaptation"""
        conn = sqlite3.connect(str(self.db_path))

        conn.execute("""
            INSERT INTO adaptations
            (timestamp, trigger, change_type, old_value, new_value, reason, expected_improvement)
            VALUES (?, ?, ?, ?, ?, ?, ?)
        """, (
            time.time(), event.trigger, event.change_type,
            json.dumps(event.old_value), json.dumps(event.new_value),
            event.reason, event.expected_improvement
        ))

        conn.commit()
        conn.close()

    def get_adaptations(self, limit: int = 20) -> List[AdaptationEvent]:
        """Get recent adaptations"""
        conn = sqlite3.connect(str(self.db_path))

        rows = conn.execute("""
            SELECT timestamp, trigger, change_type, old_value, new_value,
                   reason, expected_improvement
            FROM adaptations
            ORDER BY timestamp DESC
            LIMIT ?
        """, (limit,)).fetchall()

        conn.close()

        return [
            AdaptationEvent(
                timestamp=datetime.fromtimestamp(r[0]).isoformat(),
                trigger=r[1], change_type=r[2],
                old_value=json.loads(r[3]) if r[3] else None,
                new_value=json.loads(r[4]) if r[4] else None,
                reason=r[5], expected_improvement=r[6]
            )
            for r in rows
        ]


class IntelligenceEngine:
    """
    The brain that makes Timmy smarter.

    Continuously:
    - Analyzes execution patterns
    - Identifies improvement opportunities
    - Adapts policies and routing
    - Predicts optimal configurations
    """

    def __init__(self, db: PatternDatabase = None):
        self.db = db or PatternDatabase()
        self.adaptation_history: List[AdaptationEvent] = []
        self.current_policies = self._load_default_policies()

    def _load_default_policies(self) -> Dict:
        """Load default policies (will be adapted)"""
        return {
            "ezra": {
                "evidence_threshold": 0.8,
                "confidence_boost_for_read_ops": 0.1
            },
            "bezalel": {
                "evidence_threshold": 0.6,
                "parallel_test_threshold": 0.5
            },
            "routing": {
                "min_confidence_for_auto_route": 0.7,
                "fallback_to_timmy_threshold": 0.3
            }
        }

    def ingest_hermes_session(self, session_data: Dict):
        """
        Ingest telemetry from Hermes harness.

        This is the SHORTEST LOOP - Hermes data directly into intelligence.
        """
        # Extract execution records from Hermes session
        executions = []

        for msg in session_data.get("messages", []):
            if msg.get("role") == "tool":
                executions.append({
                    "tool": msg.get("name", "unknown"),
                    "success": not msg.get("error"),
                    "latency_ms": msg.get("execution_time_ms", 0),
                    "model": session_data.get("model"),
                    "timestamp": session_data.get("started_at")
                })

        for exec_data in executions:
            self.db.record_execution(exec_data)

        return len(executions)

    def analyze_and_adapt(self) -> List[AdaptationEvent]:
        """
        Analyze patterns and adapt policies.

        Called periodically to improve system performance.
        """
        adaptations = []

        # Analysis 1: House performance gaps
        house_perf = {
            "ezra": self.db.get_house_performance("ezra", days=3),
            "bezalel": self.db.get_house_performance("bezalel", days=3),
            "timmy": self.db.get_house_performance("timmy", days=3)
        }

        # If Ezra's success rate is low, lower evidence threshold
        ezra_rate = house_perf["ezra"].get("success_rate", 0.5)
        if ezra_rate < 0.6 and self.current_policies["ezra"]["evidence_threshold"] > 0.6:
            old_val = self.current_policies["ezra"]["evidence_threshold"]
            new_val = old_val - 0.1
            self.current_policies["ezra"]["evidence_threshold"] = new_val

            adapt = AdaptationEvent(
                timestamp=datetime.utcnow().isoformat(),
                trigger="low_ezra_success_rate",
                change_type="policy.ezra.evidence_threshold",
                old_value=old_val,
                new_value=new_val,
                reason=f"Ezra success rate {ezra_rate:.1%} below threshold, relaxing evidence requirement",
                expected_improvement=0.1
            )
            adaptations.append(adapt)
            self.db.record_adaptation(adapt)

        # Analysis 2: Model selection optimization
        for task_type in ["read", "build", "test", "judge"]:
            best_model = self.db.get_best_model(task_type, min_samples=10)
            if best_model:
                # This would update model selection policy
                pass

        self.adaptation_history.extend(adaptations)
        return adaptations

    def predict_success(self, tool: str, house: str,
                        params: Dict = None) -> Tuple[float, str]:
        """
        Predict success probability for a planned execution.

        Returns: (probability, reasoning)
        """
        pattern = self.db.get_pattern(tool, house, params)

        if not pattern or pattern.sample_count < 3:
            return (0.5, "Insufficient data for prediction")

        reasoning = f"Based on {pattern.sample_count} similar executions: "

        if pattern.success_rate > 0.9:
            reasoning += "excellent track record"
        elif pattern.success_rate > 0.7:
            reasoning += "good track record"
        elif pattern.success_rate > 0.5:
            reasoning += "mixed results"
        else:
            reasoning += "poor track record, consider alternatives"

        return (pattern.success_rate, reasoning)

    def get_optimal_house(self, tool: str, params: Dict = None) -> Tuple[str, float]:
        """
        Determine optimal house for a task based on historical performance.

        Returns: (house, confidence)
        """
        houses = ["ezra", "bezalel", "timmy"]
        best_house = "timmy"
        best_rate = 0.0

        for house in houses:
            pattern = self.db.get_pattern(tool, house, params)
            if pattern and pattern.success_rate > best_rate:
                best_rate = pattern.success_rate
                best_house = house

        confidence = best_rate if best_rate > 0 else 0.5
        return (best_house, confidence)

    def get_intelligence_report(self) -> Dict:
        """Generate comprehensive intelligence report"""
        return {
            "timestamp": datetime.utcnow().isoformat(),
            "house_performance": {
                "ezra": self.db.get_house_performance("ezra", days=7),
                "bezalel": self.db.get_house_performance("bezalel", days=7),
                "timmy": self.db.get_house_performance("timmy", days=7)
            },
            "current_policies": self.current_policies,
            "recent_adaptations": [
                a.to_dict() for a in self.db.get_adaptations(limit=10)
            ],
            "learning_velocity": self._calculate_learning_velocity(),
            "prediction_accuracy": self._calculate_prediction_accuracy()
        }

    def _calculate_learning_velocity(self) -> Dict:
        """Calculate how fast Timmy is improving"""
        conn = sqlite3.connect(str(self.db.db_path))

        # Compare last 3 days vs previous 3 days
        now = time.time()
        recent_start = now - (3 * 86400)
        previous_start = now - (6 * 86400)

        recent = conn.execute("""
            SELECT AVG(success) FROM executions WHERE timestamp > ?
        """, (recent_start,)).fetchone()[0] or 0

        previous = conn.execute("""
            SELECT AVG(success) FROM executions
            WHERE timestamp > ? AND timestamp <= ?
        """, (previous_start, recent_start)).fetchone()[0] or 0

        conn.close()

        improvement = recent - previous

        return {
            "recent_success_rate": recent,
            "previous_success_rate": previous,
            "improvement": improvement,
            "velocity": "accelerating" if improvement > 0.05 else
                       "stable" if improvement > -0.05 else "declining"
        }

    def _calculate_prediction_accuracy(self) -> float:
        """Calculate how accurate our predictions have been"""
        conn = sqlite3.connect(str(self.db.db_path))

        result = conn.execute("""
            SELECT AVG(prediction_accuracy) FROM predictions
            WHERE timestamp > ?
        """, (time.time() - (7 * 86400),)).fetchone()

        conn.close()

        return result[0] if result[0] else 0.5


if __name__ == "__main__":
    # Demo the intelligence engine
    engine = IntelligenceEngine()

    # Simulate some executions
    for i in range(20):
        engine.db.record_execution({
            "tool": "git_status",
            "house": "ezra" if i % 2 == 0 else "bezalel",
            "model": "hermes3:8b",
            "task_type": "read",
            "success": i < 15,  # 75% success rate
            "latency_ms": 100 + i * 5,
            "confidence": 0.8
        })

    print("=" * 60)
    print("INTELLIGENCE ENGINE v3 — Self-Improvement Demo")
    print("=" * 60)

    # Get predictions
    pred, reason = engine.predict_success("git_status", "ezra")
    print(f"\n🔮 Prediction for ezra/git_status: {pred:.1%}")
    print(f"   Reasoning: {reason}")

    # Analyze and adapt
    adaptations = engine.analyze_and_adapt()
    print(f"\n🔄 Adaptations made: {len(adaptations)}")
    for a in adaptations:
        print(f"   - {a.change_type}: {a.old_value} → {a.new_value}")
        print(f"     Reason: {a.reason}")

    # Get report
    report = engine.get_intelligence_report()
    print(f"\n📊 Learning Velocity: {report['learning_velocity']['velocity']}")
    print(f"   Improvement: {report['learning_velocity']['improvement']:+.1%}")

    print("\n" + "=" * 60)