Timmy-time-dashboard/src/self_coding/modification_journal.py

"""Modification Journal — Persistent log of self-modification attempts.

Tracks successes and failures so Timmy can learn from experience.
Supports semantic search for similar past attempts.
"""

from __future__ import annotations

import json
import logging
import sqlite3
from dataclasses import dataclass, field
from datetime import datetime, timezone
from enum import Enum
from pathlib import Path
from typing import Optional

logger = logging.getLogger(__name__)

# Default database location
DEFAULT_DB_PATH = Path("data/self_coding.db")


class Outcome(str, Enum):
    """Possible outcomes of a modification attempt."""
    SUCCESS = "success"
    FAILURE = "failure"
    ROLLBACK = "rollback"


@dataclass
class ModificationAttempt:
    """A single self-modification attempt.

    Attributes:
        id: Unique identifier (auto-generated by database)
        timestamp: When the attempt was made
        task_description: What was Timmy trying to do
        approach: Strategy/approach planned
        files_modified: List of file paths that were modified
        diff: The actual git diff of changes
        test_results: Pytest output
        outcome: success, failure, or rollback
        failure_analysis: LLM-generated analysis of why it failed
        reflection: LLM-generated lessons learned
        retry_count: Number of retry attempts
        embedding: Vector embedding of task_description (for semantic search)
    """
    task_description: str
    approach: str = ""
    files_modified: list[str] = field(default_factory=list)
    diff: str = ""
    test_results: str = ""
    outcome: Outcome = Outcome.FAILURE
    failure_analysis: str = ""
    reflection: str = ""
    retry_count: int = 0
    id: Optional[int] = None
    timestamp: Optional[datetime] = None
    embedding: Optional[bytes] = None


class ModificationJournal:
    """Persistent log of self-modification attempts.

    Before any self-modification, Timmy should query the journal for
    similar past attempts and include relevant ones in the LLM context.

    Usage:
        journal = ModificationJournal()

        # Log an attempt
        attempt = ModificationAttempt(
            task_description="Add error handling",
            files_modified=["src/app.py"],
            outcome=Outcome.SUCCESS,
        )
        await journal.log_attempt(attempt)

        # Find similar past attempts
        similar = await journal.find_similar("Add error handling to endpoints")

        # Get success metrics
        metrics = await journal.get_success_rate()
    """

    def __init__(
        self,
        db_path: Optional[str | Path] = None,
    ) -> None:
        """Initialize ModificationJournal.

        Args:
            db_path: SQLite database path. Defaults to data/self_coding.db
        """
        self.db_path = Path(db_path) if db_path else DEFAULT_DB_PATH
        self._ensure_schema()
        logger.info("ModificationJournal initialized at %s", self.db_path)

    def _get_conn(self) -> sqlite3.Connection:
        """Get database connection with schema ensured."""
        self.db_path.parent.mkdir(parents=True, exist_ok=True)
        conn = sqlite3.connect(str(self.db_path))
        conn.row_factory = sqlite3.Row
        return conn

    def _ensure_schema(self) -> None:
        """Create database tables if they don't exist."""
        with self._get_conn() as conn:
            conn.execute(
                """
                CREATE TABLE IF NOT EXISTS modification_journal (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
                    task_description TEXT NOT NULL,
                    approach TEXT,
                    files_modified JSON,
                    diff TEXT,
                    test_results TEXT,
                    outcome TEXT CHECK(outcome IN ('success', 'failure', 'rollback')),
                    failure_analysis TEXT,
                    reflection TEXT,
                    retry_count INTEGER DEFAULT 0,
                    embedding BLOB
                )
                """
            )

            # Create indexes for common queries
            conn.execute(
                "CREATE INDEX IF NOT EXISTS idx_journal_outcome ON modification_journal(outcome)"
            )
            conn.execute(
                "CREATE INDEX IF NOT EXISTS idx_journal_timestamp ON modification_journal(timestamp)"
            )
            conn.execute(
                "CREATE INDEX IF NOT EXISTS idx_journal_task ON modification_journal(task_description)"
            )

            conn.commit()

    async def log_attempt(self, attempt: ModificationAttempt) -> int:
        """Log a modification attempt to the journal.

        Args:
            attempt: The modification attempt to log

        Returns:
            ID of the logged entry
        """
        with self._get_conn() as conn:
            cursor = conn.execute(
                """
                INSERT INTO modification_journal
                (task_description, approach, files_modified, diff, test_results,
                 outcome, failure_analysis, reflection, retry_count, embedding)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                """,
                (
                    attempt.task_description,
                    attempt.approach,
                    json.dumps(attempt.files_modified),
                    attempt.diff,
                    attempt.test_results,
                    attempt.outcome.value,
                    attempt.failure_analysis,
                    attempt.reflection,
                    attempt.retry_count,
                    attempt.embedding,
                ),
            )
            conn.commit()

            attempt_id = cursor.lastrowid
            logger.info(
                "Logged modification attempt %d: %s (%s)",
                attempt_id,
                attempt.task_description[:50],
                attempt.outcome.value,
            )
            return attempt_id

    async def find_similar(
        self,
        task_description: str,
        limit: int = 5,
        include_outcomes: Optional[list[Outcome]] = None,
    ) -> list[ModificationAttempt]:
        """Find similar past modification attempts.

        Uses keyword matching for now. In Phase 2, will use vector embeddings
        for semantic search.

        Args:
            task_description: Task to find similar attempts for
            limit: Maximum number of results
            include_outcomes: Filter by outcomes (None = all)

        Returns:
            List of similar modification attempts
        """
        # Extract keywords from task description
        keywords = set(task_description.lower().split())
        keywords -= {"the", "a", "an", "to", "in", "on", "at", "for", "with", "and", "or", "of", "is", "are"}

        with self._get_conn() as conn:
            # Build query
            if include_outcomes:
                outcome_filter = "AND outcome IN ({})".format(
                    ",".join("?" * len(include_outcomes))
                )
                outcome_values = [o.value for o in include_outcomes]
            else:
                outcome_filter = ""
                outcome_values = []

            rows = conn.execute(
                f"""
                SELECT id, timestamp, task_description, approach, files_modified,
                       diff, test_results, outcome, failure_analysis, reflection,
                       retry_count
                FROM modification_journal
                WHERE 1=1 {outcome_filter}
                ORDER BY timestamp DESC
                LIMIT ?
                """,
                outcome_values + [limit * 3],  # Get more for scoring
            ).fetchall()

        # Score by keyword match
        scored = []
        for row in rows:
            score = 0
            task = row["task_description"].lower()
            approach = (row["approach"] or "").lower()

            for kw in keywords:
                if kw in task:
                    score += 3
                if kw in approach:
                    score += 1

            # Boost recent attempts (only if already matched)
            if score > 0:
                timestamp = datetime.fromisoformat(row["timestamp"])
                if timestamp.tzinfo is None:
                    timestamp = timestamp.replace(tzinfo=timezone.utc)
                age_days = (datetime.now(timezone.utc) - timestamp).days
                if age_days < 7:
                    score += 2
                elif age_days < 30:
                    score += 1

            if score > 0:
                scored.append((score, row))

        # Sort by score, take top N
        scored.sort(reverse=True, key=lambda x: x[0])
        top_rows = scored[:limit]

        # Convert to ModificationAttempt objects
        return [self._row_to_attempt(row) for _, row in top_rows]

    async def get_success_rate(self) -> dict[str, float]:
        """Get success rate metrics.

        Returns:
            Dict with overall and per-category success rates:
            {
                "overall": float,  # 0.0 to 1.0
                "success": int,    # count
                "failure": int,    # count
                "rollback": int,   # count
                "total": int,      # total attempts
            }
        """
        with self._get_conn() as conn:
            rows = conn.execute(
                """
                SELECT outcome, COUNT(*) as count
                FROM modification_journal
                GROUP BY outcome
                """
            ).fetchall()

        counts = {row["outcome"]: row["count"] for row in rows}

        success = counts.get("success", 0)
        failure = counts.get("failure", 0)
        rollback = counts.get("rollback", 0)
        total = success + failure + rollback

        overall = success / total if total > 0 else 0.0

        return {
            "overall": overall,
            "success": success,
            "failure": failure,
            "rollback": rollback,
            "total": total,
        }

    async def get_recent_failures(self, limit: int = 10) -> list[ModificationAttempt]:
        """Get recent failed attempts with their analyses.

        Args:
            limit: Maximum number of failures to return

        Returns:
            List of failed modification attempts
        """
        with self._get_conn() as conn:
            rows = conn.execute(
                """
                SELECT id, timestamp, task_description, approach, files_modified,
                       diff, test_results, outcome, failure_analysis, reflection,
                       retry_count
                FROM modification_journal
                WHERE outcome IN ('failure', 'rollback')
                ORDER BY timestamp DESC
                LIMIT ?
                """,
                (limit,),
            ).fetchall()

        return [self._row_to_attempt(row) for row in rows]

    async def get_by_id(self, attempt_id: int) -> Optional[ModificationAttempt]:
        """Get a specific modification attempt by ID.

        Args:
            attempt_id: ID of the attempt

        Returns:
            ModificationAttempt or None if not found
        """
        with self._get_conn() as conn:
            row = conn.execute(
                """
                SELECT id, timestamp, task_description, approach, files_modified,
                       diff, test_results, outcome, failure_analysis, reflection,
                       retry_count
                FROM modification_journal
                WHERE id = ?
                """,
                (attempt_id,),
            ).fetchone()

        if not row:
            return None

        return self._row_to_attempt(row)

    async def update_reflection(self, attempt_id: int, reflection: str) -> bool:
        """Update the reflection for a modification attempt.

        Args:
            attempt_id: ID of the attempt
            reflection: New reflection text

        Returns:
            True if updated, False if not found
        """
        with self._get_conn() as conn:
            cursor = conn.execute(
                """
                UPDATE modification_journal
                SET reflection = ?
                WHERE id = ?
                """,
                (reflection, attempt_id),
            )
            conn.commit()

            if cursor.rowcount > 0:
                logger.info("Updated reflection for attempt %d", attempt_id)
                return True
            return False

    async def get_attempts_for_file(
        self,
        file_path: str,
        limit: int = 10,
    ) -> list[ModificationAttempt]:
        """Get all attempts that modified a specific file.

        Args:
            file_path: Path to file (relative to repo root)
            limit: Maximum number of attempts

        Returns:
            List of modification attempts affecting this file
        """
        with self._get_conn() as conn:
            # Try exact match first, then partial match
            rows = conn.execute(
                """
                SELECT id, timestamp, task_description, approach, files_modified,
                       diff, test_results, outcome, failure_analysis, reflection,
                       retry_count
                FROM modification_journal
                WHERE files_modified LIKE ? OR files_modified LIKE ?
                ORDER BY timestamp DESC
                LIMIT ?
                """,
                (f'%"{file_path}"%', f'%{file_path}%', limit),
            ).fetchall()

        return [self._row_to_attempt(row) for row in rows]

    def _row_to_attempt(self, row: sqlite3.Row) -> ModificationAttempt:
        """Convert a database row to ModificationAttempt."""
        return ModificationAttempt(
            id=row["id"],
            timestamp=datetime.fromisoformat(row["timestamp"]),
            task_description=row["task_description"],
            approach=row["approach"] or "",
            files_modified=json.loads(row["files_modified"] or "[]"),
            diff=row["diff"] or "",
            test_results=row["test_results"] or "",
            outcome=Outcome(row["outcome"]),
            failure_analysis=row["failure_analysis"] or "",
            reflection=row["reflection"] or "",
            retry_count=row["retry_count"] or 0,
        )