forked from Rockachopa/Timmy-time-dashboard
Implements the foundational infrastructure for Timmy's self-modification capability:
## New Services
1. **GitSafety** (src/self_coding/git_safety.py)
- Atomic git operations with rollback capability
- Snapshot/restore for safe experimentation
- Feature branch management (timmy/self-edit/{timestamp})
- Merge to main only after tests pass
2. **CodebaseIndexer** (src/self_coding/codebase_indexer.py)
- AST-based parsing of Python source files
- Extracts classes, functions, imports, docstrings
- Builds dependency graph for blast radius analysis
- SQLite storage with hash-based incremental indexing
- get_summary() for LLM context (<4000 tokens)
- get_relevant_files() for task-based file discovery
3. **ModificationJournal** (src/self_coding/modification_journal.py)
- Persistent log of all self-modification attempts
- Tracks outcomes: success, failure, rollback
- find_similar() for learning from past attempts
- Success rate metrics and recent failure tracking
- Supports vector embeddings (Phase 2)
4. **ReflectionService** (src/self_coding/reflection.py)
- LLM-powered analysis of modification attempts
- Generates lessons learned from successes and failures
- Fallback templates when LLM unavailable
- Supports context from similar past attempts
## Test Coverage
- 104 new tests across 7 test files
- 95% code coverage on self_coding module
- Green path tests: full workflow integration
- Red path tests: errors, rollbacks, edge cases
- Safety constraint tests: test coverage requirements, protected files
## Usage
from self_coding import GitSafety, CodebaseIndexer, ModificationJournal
git = GitSafety(repo_path=/path/to/repo)
indexer = CodebaseIndexer(repo_path=/path/to/repo)
journal = ModificationJournal()
Phase 2 will build the Self-Edit MCP Tool that orchestrates these services.
426 lines
14 KiB
Python
426 lines
14 KiB
Python
"""Modification Journal — Persistent log of self-modification attempts.
|
|
|
|
Tracks successes and failures so Timmy can learn from experience.
|
|
Supports semantic search for similar past attempts.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import sqlite3
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timezone
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Default database location
|
|
DEFAULT_DB_PATH = Path("data/self_coding.db")
|
|
|
|
|
|
class Outcome(str, Enum):
|
|
"""Possible outcomes of a modification attempt."""
|
|
SUCCESS = "success"
|
|
FAILURE = "failure"
|
|
ROLLBACK = "rollback"
|
|
|
|
|
|
@dataclass
|
|
class ModificationAttempt:
|
|
"""A single self-modification attempt.
|
|
|
|
Attributes:
|
|
id: Unique identifier (auto-generated by database)
|
|
timestamp: When the attempt was made
|
|
task_description: What was Timmy trying to do
|
|
approach: Strategy/approach planned
|
|
files_modified: List of file paths that were modified
|
|
diff: The actual git diff of changes
|
|
test_results: Pytest output
|
|
outcome: success, failure, or rollback
|
|
failure_analysis: LLM-generated analysis of why it failed
|
|
reflection: LLM-generated lessons learned
|
|
retry_count: Number of retry attempts
|
|
embedding: Vector embedding of task_description (for semantic search)
|
|
"""
|
|
task_description: str
|
|
approach: str = ""
|
|
files_modified: list[str] = field(default_factory=list)
|
|
diff: str = ""
|
|
test_results: str = ""
|
|
outcome: Outcome = Outcome.FAILURE
|
|
failure_analysis: str = ""
|
|
reflection: str = ""
|
|
retry_count: int = 0
|
|
id: Optional[int] = None
|
|
timestamp: Optional[datetime] = None
|
|
embedding: Optional[bytes] = None
|
|
|
|
|
|
class ModificationJournal:
|
|
"""Persistent log of self-modification attempts.
|
|
|
|
Before any self-modification, Timmy should query the journal for
|
|
similar past attempts and include relevant ones in the LLM context.
|
|
|
|
Usage:
|
|
journal = ModificationJournal()
|
|
|
|
# Log an attempt
|
|
attempt = ModificationAttempt(
|
|
task_description="Add error handling",
|
|
files_modified=["src/app.py"],
|
|
outcome=Outcome.SUCCESS,
|
|
)
|
|
await journal.log_attempt(attempt)
|
|
|
|
# Find similar past attempts
|
|
similar = await journal.find_similar("Add error handling to endpoints")
|
|
|
|
# Get success metrics
|
|
metrics = await journal.get_success_rate()
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
db_path: Optional[str | Path] = None,
|
|
) -> None:
|
|
"""Initialize ModificationJournal.
|
|
|
|
Args:
|
|
db_path: SQLite database path. Defaults to data/self_coding.db
|
|
"""
|
|
self.db_path = Path(db_path) if db_path else DEFAULT_DB_PATH
|
|
self._ensure_schema()
|
|
logger.info("ModificationJournal initialized at %s", self.db_path)
|
|
|
|
def _get_conn(self) -> sqlite3.Connection:
|
|
"""Get database connection with schema ensured."""
|
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
conn = sqlite3.connect(str(self.db_path))
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
def _ensure_schema(self) -> None:
|
|
"""Create database tables if they don't exist."""
|
|
with self._get_conn() as conn:
|
|
conn.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS modification_journal (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
task_description TEXT NOT NULL,
|
|
approach TEXT,
|
|
files_modified JSON,
|
|
diff TEXT,
|
|
test_results TEXT,
|
|
outcome TEXT CHECK(outcome IN ('success', 'failure', 'rollback')),
|
|
failure_analysis TEXT,
|
|
reflection TEXT,
|
|
retry_count INTEGER DEFAULT 0,
|
|
embedding BLOB
|
|
)
|
|
"""
|
|
)
|
|
|
|
# Create indexes for common queries
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_journal_outcome ON modification_journal(outcome)"
|
|
)
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_journal_timestamp ON modification_journal(timestamp)"
|
|
)
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_journal_task ON modification_journal(task_description)"
|
|
)
|
|
|
|
conn.commit()
|
|
|
|
async def log_attempt(self, attempt: ModificationAttempt) -> int:
|
|
"""Log a modification attempt to the journal.
|
|
|
|
Args:
|
|
attempt: The modification attempt to log
|
|
|
|
Returns:
|
|
ID of the logged entry
|
|
"""
|
|
with self._get_conn() as conn:
|
|
cursor = conn.execute(
|
|
"""
|
|
INSERT INTO modification_journal
|
|
(task_description, approach, files_modified, diff, test_results,
|
|
outcome, failure_analysis, reflection, retry_count, embedding)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(
|
|
attempt.task_description,
|
|
attempt.approach,
|
|
json.dumps(attempt.files_modified),
|
|
attempt.diff,
|
|
attempt.test_results,
|
|
attempt.outcome.value,
|
|
attempt.failure_analysis,
|
|
attempt.reflection,
|
|
attempt.retry_count,
|
|
attempt.embedding,
|
|
),
|
|
)
|
|
conn.commit()
|
|
|
|
attempt_id = cursor.lastrowid
|
|
logger.info(
|
|
"Logged modification attempt %d: %s (%s)",
|
|
attempt_id,
|
|
attempt.task_description[:50],
|
|
attempt.outcome.value,
|
|
)
|
|
return attempt_id
|
|
|
|
async def find_similar(
|
|
self,
|
|
task_description: str,
|
|
limit: int = 5,
|
|
include_outcomes: Optional[list[Outcome]] = None,
|
|
) -> list[ModificationAttempt]:
|
|
"""Find similar past modification attempts.
|
|
|
|
Uses keyword matching for now. In Phase 2, will use vector embeddings
|
|
for semantic search.
|
|
|
|
Args:
|
|
task_description: Task to find similar attempts for
|
|
limit: Maximum number of results
|
|
include_outcomes: Filter by outcomes (None = all)
|
|
|
|
Returns:
|
|
List of similar modification attempts
|
|
"""
|
|
# Extract keywords from task description
|
|
keywords = set(task_description.lower().split())
|
|
keywords -= {"the", "a", "an", "to", "in", "on", "at", "for", "with", "and", "or", "of", "is", "are"}
|
|
|
|
with self._get_conn() as conn:
|
|
# Build query
|
|
if include_outcomes:
|
|
outcome_filter = "AND outcome IN ({})".format(
|
|
",".join("?" * len(include_outcomes))
|
|
)
|
|
outcome_values = [o.value for o in include_outcomes]
|
|
else:
|
|
outcome_filter = ""
|
|
outcome_values = []
|
|
|
|
rows = conn.execute(
|
|
f"""
|
|
SELECT id, timestamp, task_description, approach, files_modified,
|
|
diff, test_results, outcome, failure_analysis, reflection,
|
|
retry_count
|
|
FROM modification_journal
|
|
WHERE 1=1 {outcome_filter}
|
|
ORDER BY timestamp DESC
|
|
LIMIT ?
|
|
""",
|
|
outcome_values + [limit * 3], # Get more for scoring
|
|
).fetchall()
|
|
|
|
# Score by keyword match
|
|
scored = []
|
|
for row in rows:
|
|
score = 0
|
|
task = row["task_description"].lower()
|
|
approach = (row["approach"] or "").lower()
|
|
|
|
for kw in keywords:
|
|
if kw in task:
|
|
score += 3
|
|
if kw in approach:
|
|
score += 1
|
|
|
|
# Boost recent attempts (only if already matched)
|
|
if score > 0:
|
|
timestamp = datetime.fromisoformat(row["timestamp"])
|
|
if timestamp.tzinfo is None:
|
|
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
age_days = (datetime.now(timezone.utc) - timestamp).days
|
|
if age_days < 7:
|
|
score += 2
|
|
elif age_days < 30:
|
|
score += 1
|
|
|
|
if score > 0:
|
|
scored.append((score, row))
|
|
|
|
# Sort by score, take top N
|
|
scored.sort(reverse=True, key=lambda x: x[0])
|
|
top_rows = scored[:limit]
|
|
|
|
# Convert to ModificationAttempt objects
|
|
return [self._row_to_attempt(row) for _, row in top_rows]
|
|
|
|
async def get_success_rate(self) -> dict[str, float]:
|
|
"""Get success rate metrics.
|
|
|
|
Returns:
|
|
Dict with overall and per-category success rates:
|
|
{
|
|
"overall": float, # 0.0 to 1.0
|
|
"success": int, # count
|
|
"failure": int, # count
|
|
"rollback": int, # count
|
|
"total": int, # total attempts
|
|
}
|
|
"""
|
|
with self._get_conn() as conn:
|
|
rows = conn.execute(
|
|
"""
|
|
SELECT outcome, COUNT(*) as count
|
|
FROM modification_journal
|
|
GROUP BY outcome
|
|
"""
|
|
).fetchall()
|
|
|
|
counts = {row["outcome"]: row["count"] for row in rows}
|
|
|
|
success = counts.get("success", 0)
|
|
failure = counts.get("failure", 0)
|
|
rollback = counts.get("rollback", 0)
|
|
total = success + failure + rollback
|
|
|
|
overall = success / total if total > 0 else 0.0
|
|
|
|
return {
|
|
"overall": overall,
|
|
"success": success,
|
|
"failure": failure,
|
|
"rollback": rollback,
|
|
"total": total,
|
|
}
|
|
|
|
async def get_recent_failures(self, limit: int = 10) -> list[ModificationAttempt]:
|
|
"""Get recent failed attempts with their analyses.
|
|
|
|
Args:
|
|
limit: Maximum number of failures to return
|
|
|
|
Returns:
|
|
List of failed modification attempts
|
|
"""
|
|
with self._get_conn() as conn:
|
|
rows = conn.execute(
|
|
"""
|
|
SELECT id, timestamp, task_description, approach, files_modified,
|
|
diff, test_results, outcome, failure_analysis, reflection,
|
|
retry_count
|
|
FROM modification_journal
|
|
WHERE outcome IN ('failure', 'rollback')
|
|
ORDER BY timestamp DESC
|
|
LIMIT ?
|
|
""",
|
|
(limit,),
|
|
).fetchall()
|
|
|
|
return [self._row_to_attempt(row) for row in rows]
|
|
|
|
async def get_by_id(self, attempt_id: int) -> Optional[ModificationAttempt]:
|
|
"""Get a specific modification attempt by ID.
|
|
|
|
Args:
|
|
attempt_id: ID of the attempt
|
|
|
|
Returns:
|
|
ModificationAttempt or None if not found
|
|
"""
|
|
with self._get_conn() as conn:
|
|
row = conn.execute(
|
|
"""
|
|
SELECT id, timestamp, task_description, approach, files_modified,
|
|
diff, test_results, outcome, failure_analysis, reflection,
|
|
retry_count
|
|
FROM modification_journal
|
|
WHERE id = ?
|
|
""",
|
|
(attempt_id,),
|
|
).fetchone()
|
|
|
|
if not row:
|
|
return None
|
|
|
|
return self._row_to_attempt(row)
|
|
|
|
async def update_reflection(self, attempt_id: int, reflection: str) -> bool:
|
|
"""Update the reflection for a modification attempt.
|
|
|
|
Args:
|
|
attempt_id: ID of the attempt
|
|
reflection: New reflection text
|
|
|
|
Returns:
|
|
True if updated, False if not found
|
|
"""
|
|
with self._get_conn() as conn:
|
|
cursor = conn.execute(
|
|
"""
|
|
UPDATE modification_journal
|
|
SET reflection = ?
|
|
WHERE id = ?
|
|
""",
|
|
(reflection, attempt_id),
|
|
)
|
|
conn.commit()
|
|
|
|
if cursor.rowcount > 0:
|
|
logger.info("Updated reflection for attempt %d", attempt_id)
|
|
return True
|
|
return False
|
|
|
|
async def get_attempts_for_file(
|
|
self,
|
|
file_path: str,
|
|
limit: int = 10,
|
|
) -> list[ModificationAttempt]:
|
|
"""Get all attempts that modified a specific file.
|
|
|
|
Args:
|
|
file_path: Path to file (relative to repo root)
|
|
limit: Maximum number of attempts
|
|
|
|
Returns:
|
|
List of modification attempts affecting this file
|
|
"""
|
|
with self._get_conn() as conn:
|
|
# Try exact match first, then partial match
|
|
rows = conn.execute(
|
|
"""
|
|
SELECT id, timestamp, task_description, approach, files_modified,
|
|
diff, test_results, outcome, failure_analysis, reflection,
|
|
retry_count
|
|
FROM modification_journal
|
|
WHERE files_modified LIKE ? OR files_modified LIKE ?
|
|
ORDER BY timestamp DESC
|
|
LIMIT ?
|
|
""",
|
|
(f'%"{file_path}"%', f'%{file_path}%', limit),
|
|
).fetchall()
|
|
|
|
return [self._row_to_attempt(row) for row in rows]
|
|
|
|
def _row_to_attempt(self, row: sqlite3.Row) -> ModificationAttempt:
|
|
"""Convert a database row to ModificationAttempt."""
|
|
return ModificationAttempt(
|
|
id=row["id"],
|
|
timestamp=datetime.fromisoformat(row["timestamp"]),
|
|
task_description=row["task_description"],
|
|
approach=row["approach"] or "",
|
|
files_modified=json.loads(row["files_modified"] or "[]"),
|
|
diff=row["diff"] or "",
|
|
test_results=row["test_results"] or "",
|
|
outcome=Outcome(row["outcome"]),
|
|
failure_analysis=row["failure_analysis"] or "",
|
|
reflection=row["reflection"] or "",
|
|
retry_count=row["retry_count"] or 0,
|
|
)
|