This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/src/self_coding/modification_journal.py

426 lines
14 KiB
Python
Raw Normal View History

feat: Self-Coding Foundation (Phase 1) Implements the foundational infrastructure for Timmy's self-modification capability: ## New Services 1. **GitSafety** (src/self_coding/git_safety.py) - Atomic git operations with rollback capability - Snapshot/restore for safe experimentation - Feature branch management (timmy/self-edit/{timestamp}) - Merge to main only after tests pass 2. **CodebaseIndexer** (src/self_coding/codebase_indexer.py) - AST-based parsing of Python source files - Extracts classes, functions, imports, docstrings - Builds dependency graph for blast radius analysis - SQLite storage with hash-based incremental indexing - get_summary() for LLM context (<4000 tokens) - get_relevant_files() for task-based file discovery 3. **ModificationJournal** (src/self_coding/modification_journal.py) - Persistent log of all self-modification attempts - Tracks outcomes: success, failure, rollback - find_similar() for learning from past attempts - Success rate metrics and recent failure tracking - Supports vector embeddings (Phase 2) 4. **ReflectionService** (src/self_coding/reflection.py) - LLM-powered analysis of modification attempts - Generates lessons learned from successes and failures - Fallback templates when LLM unavailable - Supports context from similar past attempts ## Test Coverage - 104 new tests across 7 test files - 95% code coverage on self_coding module - Green path tests: full workflow integration - Red path tests: errors, rollbacks, edge cases - Safety constraint tests: test coverage requirements, protected files ## Usage from self_coding import GitSafety, CodebaseIndexer, ModificationJournal git = GitSafety(repo_path=/path/to/repo) indexer = CodebaseIndexer(repo_path=/path/to/repo) journal = ModificationJournal() Phase 2 will build the Self-Edit MCP Tool that orchestrates these services.
2026-02-26 11:08:05 -05:00
"""Modification Journal — Persistent log of self-modification attempts.
Tracks successes and failures so Timmy can learn from experience.
Supports semantic search for similar past attempts.
"""
from __future__ import annotations
import json
import logging
import sqlite3
from dataclasses import dataclass, field
from datetime import datetime, timezone
from enum import Enum
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
# Default database location
DEFAULT_DB_PATH = Path("data/self_coding.db")
class Outcome(str, Enum):
"""Possible outcomes of a modification attempt."""
SUCCESS = "success"
FAILURE = "failure"
ROLLBACK = "rollback"
@dataclass
class ModificationAttempt:
"""A single self-modification attempt.
Attributes:
id: Unique identifier (auto-generated by database)
timestamp: When the attempt was made
task_description: What was Timmy trying to do
approach: Strategy/approach planned
files_modified: List of file paths that were modified
diff: The actual git diff of changes
test_results: Pytest output
outcome: success, failure, or rollback
failure_analysis: LLM-generated analysis of why it failed
reflection: LLM-generated lessons learned
retry_count: Number of retry attempts
embedding: Vector embedding of task_description (for semantic search)
"""
task_description: str
approach: str = ""
files_modified: list[str] = field(default_factory=list)
diff: str = ""
test_results: str = ""
outcome: Outcome = Outcome.FAILURE
failure_analysis: str = ""
reflection: str = ""
retry_count: int = 0
id: Optional[int] = None
timestamp: Optional[datetime] = None
embedding: Optional[bytes] = None
class ModificationJournal:
"""Persistent log of self-modification attempts.
Before any self-modification, Timmy should query the journal for
similar past attempts and include relevant ones in the LLM context.
Usage:
journal = ModificationJournal()
# Log an attempt
attempt = ModificationAttempt(
task_description="Add error handling",
files_modified=["src/app.py"],
outcome=Outcome.SUCCESS,
)
await journal.log_attempt(attempt)
# Find similar past attempts
similar = await journal.find_similar("Add error handling to endpoints")
# Get success metrics
metrics = await journal.get_success_rate()
"""
def __init__(
self,
db_path: Optional[str | Path] = None,
) -> None:
"""Initialize ModificationJournal.
Args:
db_path: SQLite database path. Defaults to data/self_coding.db
"""
self.db_path = Path(db_path) if db_path else DEFAULT_DB_PATH
self._ensure_schema()
logger.info("ModificationJournal initialized at %s", self.db_path)
def _get_conn(self) -> sqlite3.Connection:
"""Get database connection with schema ensured."""
self.db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(self.db_path))
conn.row_factory = sqlite3.Row
return conn
def _ensure_schema(self) -> None:
"""Create database tables if they don't exist."""
with self._get_conn() as conn:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS modification_journal (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
task_description TEXT NOT NULL,
approach TEXT,
files_modified JSON,
diff TEXT,
test_results TEXT,
outcome TEXT CHECK(outcome IN ('success', 'failure', 'rollback')),
failure_analysis TEXT,
reflection TEXT,
retry_count INTEGER DEFAULT 0,
embedding BLOB
)
"""
)
# Create indexes for common queries
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_journal_outcome ON modification_journal(outcome)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_journal_timestamp ON modification_journal(timestamp)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_journal_task ON modification_journal(task_description)"
)
conn.commit()
async def log_attempt(self, attempt: ModificationAttempt) -> int:
"""Log a modification attempt to the journal.
Args:
attempt: The modification attempt to log
Returns:
ID of the logged entry
"""
with self._get_conn() as conn:
cursor = conn.execute(
"""
INSERT INTO modification_journal
(task_description, approach, files_modified, diff, test_results,
outcome, failure_analysis, reflection, retry_count, embedding)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
attempt.task_description,
attempt.approach,
json.dumps(attempt.files_modified),
attempt.diff,
attempt.test_results,
attempt.outcome.value,
attempt.failure_analysis,
attempt.reflection,
attempt.retry_count,
attempt.embedding,
),
)
conn.commit()
attempt_id = cursor.lastrowid
logger.info(
"Logged modification attempt %d: %s (%s)",
attempt_id,
attempt.task_description[:50],
attempt.outcome.value,
)
return attempt_id
async def find_similar(
self,
task_description: str,
limit: int = 5,
include_outcomes: Optional[list[Outcome]] = None,
) -> list[ModificationAttempt]:
"""Find similar past modification attempts.
Uses keyword matching for now. In Phase 2, will use vector embeddings
for semantic search.
Args:
task_description: Task to find similar attempts for
limit: Maximum number of results
include_outcomes: Filter by outcomes (None = all)
Returns:
List of similar modification attempts
"""
# Extract keywords from task description
keywords = set(task_description.lower().split())
keywords -= {"the", "a", "an", "to", "in", "on", "at", "for", "with", "and", "or", "of", "is", "are"}
with self._get_conn() as conn:
# Build query
if include_outcomes:
outcome_filter = "AND outcome IN ({})".format(
",".join("?" * len(include_outcomes))
)
outcome_values = [o.value for o in include_outcomes]
else:
outcome_filter = ""
outcome_values = []
rows = conn.execute(
f"""
SELECT id, timestamp, task_description, approach, files_modified,
diff, test_results, outcome, failure_analysis, reflection,
retry_count
FROM modification_journal
WHERE 1=1 {outcome_filter}
ORDER BY timestamp DESC
LIMIT ?
""",
outcome_values + [limit * 3], # Get more for scoring
).fetchall()
# Score by keyword match
scored = []
for row in rows:
score = 0
task = row["task_description"].lower()
approach = (row["approach"] or "").lower()
for kw in keywords:
if kw in task:
score += 3
if kw in approach:
score += 1
# Boost recent attempts (only if already matched)
if score > 0:
timestamp = datetime.fromisoformat(row["timestamp"])
if timestamp.tzinfo is None:
timestamp = timestamp.replace(tzinfo=timezone.utc)
age_days = (datetime.now(timezone.utc) - timestamp).days
if age_days < 7:
score += 2
elif age_days < 30:
score += 1
if score > 0:
scored.append((score, row))
# Sort by score, take top N
scored.sort(reverse=True, key=lambda x: x[0])
top_rows = scored[:limit]
# Convert to ModificationAttempt objects
return [self._row_to_attempt(row) for _, row in top_rows]
async def get_success_rate(self) -> dict[str, float]:
"""Get success rate metrics.
Returns:
Dict with overall and per-category success rates:
{
"overall": float, # 0.0 to 1.0
"success": int, # count
"failure": int, # count
"rollback": int, # count
"total": int, # total attempts
}
"""
with self._get_conn() as conn:
rows = conn.execute(
"""
SELECT outcome, COUNT(*) as count
FROM modification_journal
GROUP BY outcome
"""
).fetchall()
counts = {row["outcome"]: row["count"] for row in rows}
success = counts.get("success", 0)
failure = counts.get("failure", 0)
rollback = counts.get("rollback", 0)
total = success + failure + rollback
overall = success / total if total > 0 else 0.0
return {
"overall": overall,
"success": success,
"failure": failure,
"rollback": rollback,
"total": total,
}
async def get_recent_failures(self, limit: int = 10) -> list[ModificationAttempt]:
"""Get recent failed attempts with their analyses.
Args:
limit: Maximum number of failures to return
Returns:
List of failed modification attempts
"""
with self._get_conn() as conn:
rows = conn.execute(
"""
SELECT id, timestamp, task_description, approach, files_modified,
diff, test_results, outcome, failure_analysis, reflection,
retry_count
FROM modification_journal
WHERE outcome IN ('failure', 'rollback')
ORDER BY timestamp DESC
LIMIT ?
""",
(limit,),
).fetchall()
return [self._row_to_attempt(row) for row in rows]
async def get_by_id(self, attempt_id: int) -> Optional[ModificationAttempt]:
"""Get a specific modification attempt by ID.
Args:
attempt_id: ID of the attempt
Returns:
ModificationAttempt or None if not found
"""
with self._get_conn() as conn:
row = conn.execute(
"""
SELECT id, timestamp, task_description, approach, files_modified,
diff, test_results, outcome, failure_analysis, reflection,
retry_count
FROM modification_journal
WHERE id = ?
""",
(attempt_id,),
).fetchone()
if not row:
return None
return self._row_to_attempt(row)
async def update_reflection(self, attempt_id: int, reflection: str) -> bool:
"""Update the reflection for a modification attempt.
Args:
attempt_id: ID of the attempt
reflection: New reflection text
Returns:
True if updated, False if not found
"""
with self._get_conn() as conn:
cursor = conn.execute(
"""
UPDATE modification_journal
SET reflection = ?
WHERE id = ?
""",
(reflection, attempt_id),
)
conn.commit()
if cursor.rowcount > 0:
logger.info("Updated reflection for attempt %d", attempt_id)
return True
return False
async def get_attempts_for_file(
self,
file_path: str,
limit: int = 10,
) -> list[ModificationAttempt]:
"""Get all attempts that modified a specific file.
Args:
file_path: Path to file (relative to repo root)
limit: Maximum number of attempts
Returns:
List of modification attempts affecting this file
"""
with self._get_conn() as conn:
# Try exact match first, then partial match
rows = conn.execute(
"""
SELECT id, timestamp, task_description, approach, files_modified,
diff, test_results, outcome, failure_analysis, reflection,
retry_count
FROM modification_journal
WHERE files_modified LIKE ? OR files_modified LIKE ?
ORDER BY timestamp DESC
LIMIT ?
""",
(f'%"{file_path}"%', f'%{file_path}%', limit),
).fetchall()
return [self._row_to_attempt(row) for row in rows]
def _row_to_attempt(self, row: sqlite3.Row) -> ModificationAttempt:
"""Convert a database row to ModificationAttempt."""
return ModificationAttempt(
id=row["id"],
timestamp=datetime.fromisoformat(row["timestamp"]),
task_description=row["task_description"],
approach=row["approach"] or "",
files_modified=json.loads(row["files_modified"] or "[]"),
diff=row["diff"] or "",
test_results=row["test_results"] or "",
outcome=Outcome(row["outcome"]),
failure_analysis=row["failure_analysis"] or "",
reflection=row["reflection"] or "",
retry_count=row["retry_count"] or 0,
)