"""Modification Journal — Persistent log of self-modification attempts. Tracks successes and failures so Timmy can learn from experience. Supports semantic search for similar past attempts. """ from __future__ import annotations import json import logging import sqlite3 from dataclasses import dataclass, field from datetime import datetime, timezone from enum import Enum from pathlib import Path from typing import Optional logger = logging.getLogger(__name__) # Default database location DEFAULT_DB_PATH = Path("data/self_coding.db") class Outcome(str, Enum): """Possible outcomes of a modification attempt.""" SUCCESS = "success" FAILURE = "failure" ROLLBACK = "rollback" @dataclass class ModificationAttempt: """A single self-modification attempt. Attributes: id: Unique identifier (auto-generated by database) timestamp: When the attempt was made task_description: What was Timmy trying to do approach: Strategy/approach planned files_modified: List of file paths that were modified diff: The actual git diff of changes test_results: Pytest output outcome: success, failure, or rollback failure_analysis: LLM-generated analysis of why it failed reflection: LLM-generated lessons learned retry_count: Number of retry attempts embedding: Vector embedding of task_description (for semantic search) """ task_description: str approach: str = "" files_modified: list[str] = field(default_factory=list) diff: str = "" test_results: str = "" outcome: Outcome = Outcome.FAILURE failure_analysis: str = "" reflection: str = "" retry_count: int = 0 id: Optional[int] = None timestamp: Optional[datetime] = None embedding: Optional[bytes] = None class ModificationJournal: """Persistent log of self-modification attempts. Before any self-modification, Timmy should query the journal for similar past attempts and include relevant ones in the LLM context. Usage: journal = ModificationJournal() # Log an attempt attempt = ModificationAttempt( task_description="Add error handling", files_modified=["src/app.py"], outcome=Outcome.SUCCESS, ) await journal.log_attempt(attempt) # Find similar past attempts similar = await journal.find_similar("Add error handling to endpoints") # Get success metrics metrics = await journal.get_success_rate() """ def __init__( self, db_path: Optional[str | Path] = None, ) -> None: """Initialize ModificationJournal. Args: db_path: SQLite database path. Defaults to data/self_coding.db """ self.db_path = Path(db_path) if db_path else DEFAULT_DB_PATH self._ensure_schema() logger.info("ModificationJournal initialized at %s", self.db_path) def _get_conn(self) -> sqlite3.Connection: """Get database connection with schema ensured.""" self.db_path.parent.mkdir(parents=True, exist_ok=True) conn = sqlite3.connect(str(self.db_path)) conn.row_factory = sqlite3.Row return conn def _ensure_schema(self) -> None: """Create database tables if they don't exist.""" with self._get_conn() as conn: conn.execute( """ CREATE TABLE IF NOT EXISTS modification_journal ( id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, task_description TEXT NOT NULL, approach TEXT, files_modified JSON, diff TEXT, test_results TEXT, outcome TEXT CHECK(outcome IN ('success', 'failure', 'rollback')), failure_analysis TEXT, reflection TEXT, retry_count INTEGER DEFAULT 0, embedding BLOB ) """ ) # Create indexes for common queries conn.execute( "CREATE INDEX IF NOT EXISTS idx_journal_outcome ON modification_journal(outcome)" ) conn.execute( "CREATE INDEX IF NOT EXISTS idx_journal_timestamp ON modification_journal(timestamp)" ) conn.execute( "CREATE INDEX IF NOT EXISTS idx_journal_task ON modification_journal(task_description)" ) conn.commit() async def log_attempt(self, attempt: ModificationAttempt) -> int: """Log a modification attempt to the journal. Args: attempt: The modification attempt to log Returns: ID of the logged entry """ with self._get_conn() as conn: cursor = conn.execute( """ INSERT INTO modification_journal (task_description, approach, files_modified, diff, test_results, outcome, failure_analysis, reflection, retry_count, embedding) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( attempt.task_description, attempt.approach, json.dumps(attempt.files_modified), attempt.diff, attempt.test_results, attempt.outcome.value, attempt.failure_analysis, attempt.reflection, attempt.retry_count, attempt.embedding, ), ) conn.commit() attempt_id = cursor.lastrowid logger.info( "Logged modification attempt %d: %s (%s)", attempt_id, attempt.task_description[:50], attempt.outcome.value, ) return attempt_id async def find_similar( self, task_description: str, limit: int = 5, include_outcomes: Optional[list[Outcome]] = None, ) -> list[ModificationAttempt]: """Find similar past modification attempts. Uses keyword matching for now. In Phase 2, will use vector embeddings for semantic search. Args: task_description: Task to find similar attempts for limit: Maximum number of results include_outcomes: Filter by outcomes (None = all) Returns: List of similar modification attempts """ # Extract keywords from task description keywords = set(task_description.lower().split()) keywords -= {"the", "a", "an", "to", "in", "on", "at", "for", "with", "and", "or", "of", "is", "are"} with self._get_conn() as conn: # Build query if include_outcomes: outcome_filter = "AND outcome IN ({})".format( ",".join("?" * len(include_outcomes)) ) outcome_values = [o.value for o in include_outcomes] else: outcome_filter = "" outcome_values = [] rows = conn.execute( f""" SELECT id, timestamp, task_description, approach, files_modified, diff, test_results, outcome, failure_analysis, reflection, retry_count FROM modification_journal WHERE 1=1 {outcome_filter} ORDER BY timestamp DESC LIMIT ? """, outcome_values + [limit * 3], # Get more for scoring ).fetchall() # Score by keyword match scored = [] for row in rows: score = 0 task = row["task_description"].lower() approach = (row["approach"] or "").lower() for kw in keywords: if kw in task: score += 3 if kw in approach: score += 1 # Boost recent attempts (only if already matched) if score > 0: timestamp = datetime.fromisoformat(row["timestamp"]) if timestamp.tzinfo is None: timestamp = timestamp.replace(tzinfo=timezone.utc) age_days = (datetime.now(timezone.utc) - timestamp).days if age_days < 7: score += 2 elif age_days < 30: score += 1 if score > 0: scored.append((score, row)) # Sort by score, take top N scored.sort(reverse=True, key=lambda x: x[0]) top_rows = scored[:limit] # Convert to ModificationAttempt objects return [self._row_to_attempt(row) for _, row in top_rows] async def get_success_rate(self) -> dict[str, float]: """Get success rate metrics. Returns: Dict with overall and per-category success rates: { "overall": float, # 0.0 to 1.0 "success": int, # count "failure": int, # count "rollback": int, # count "total": int, # total attempts } """ with self._get_conn() as conn: rows = conn.execute( """ SELECT outcome, COUNT(*) as count FROM modification_journal GROUP BY outcome """ ).fetchall() counts = {row["outcome"]: row["count"] for row in rows} success = counts.get("success", 0) failure = counts.get("failure", 0) rollback = counts.get("rollback", 0) total = success + failure + rollback overall = success / total if total > 0 else 0.0 return { "overall": overall, "success": success, "failure": failure, "rollback": rollback, "total": total, } async def get_recent_failures(self, limit: int = 10) -> list[ModificationAttempt]: """Get recent failed attempts with their analyses. Args: limit: Maximum number of failures to return Returns: List of failed modification attempts """ with self._get_conn() as conn: rows = conn.execute( """ SELECT id, timestamp, task_description, approach, files_modified, diff, test_results, outcome, failure_analysis, reflection, retry_count FROM modification_journal WHERE outcome IN ('failure', 'rollback') ORDER BY timestamp DESC LIMIT ? """, (limit,), ).fetchall() return [self._row_to_attempt(row) for row in rows] async def get_by_id(self, attempt_id: int) -> Optional[ModificationAttempt]: """Get a specific modification attempt by ID. Args: attempt_id: ID of the attempt Returns: ModificationAttempt or None if not found """ with self._get_conn() as conn: row = conn.execute( """ SELECT id, timestamp, task_description, approach, files_modified, diff, test_results, outcome, failure_analysis, reflection, retry_count FROM modification_journal WHERE id = ? """, (attempt_id,), ).fetchone() if not row: return None return self._row_to_attempt(row) async def update_reflection(self, attempt_id: int, reflection: str) -> bool: """Update the reflection for a modification attempt. Args: attempt_id: ID of the attempt reflection: New reflection text Returns: True if updated, False if not found """ with self._get_conn() as conn: cursor = conn.execute( """ UPDATE modification_journal SET reflection = ? WHERE id = ? """, (reflection, attempt_id), ) conn.commit() if cursor.rowcount > 0: logger.info("Updated reflection for attempt %d", attempt_id) return True return False async def get_attempts_for_file( self, file_path: str, limit: int = 10, ) -> list[ModificationAttempt]: """Get all attempts that modified a specific file. Args: file_path: Path to file (relative to repo root) limit: Maximum number of attempts Returns: List of modification attempts affecting this file """ with self._get_conn() as conn: # Try exact match first, then partial match rows = conn.execute( """ SELECT id, timestamp, task_description, approach, files_modified, diff, test_results, outcome, failure_analysis, reflection, retry_count FROM modification_journal WHERE files_modified LIKE ? OR files_modified LIKE ? ORDER BY timestamp DESC LIMIT ? """, (f'%"{file_path}"%', f'%{file_path}%', limit), ).fetchall() return [self._row_to_attempt(row) for row in rows] def _row_to_attempt(self, row: sqlite3.Row) -> ModificationAttempt: """Convert a database row to ModificationAttempt.""" return ModificationAttempt( id=row["id"], timestamp=datetime.fromisoformat(row["timestamp"]), task_description=row["task_description"], approach=row["approach"] or "", files_modified=json.loads(row["files_modified"] or "[]"), diff=row["diff"] or "", test_results=row["test_results"] or "", outcome=Outcome(row["outcome"]), failure_analysis=row["failure_analysis"] or "", reflection=row["reflection"] or "", retry_count=row["retry_count"] or 0, )