forked from Rockachopa/Timmy-time-dashboard
feat: Self-Coding Foundation (Phase 1)
Implements the foundational infrastructure for Timmy's self-modification capability:
## New Services
1. **GitSafety** (src/self_coding/git_safety.py)
- Atomic git operations with rollback capability
- Snapshot/restore for safe experimentation
- Feature branch management (timmy/self-edit/{timestamp})
- Merge to main only after tests pass
2. **CodebaseIndexer** (src/self_coding/codebase_indexer.py)
- AST-based parsing of Python source files
- Extracts classes, functions, imports, docstrings
- Builds dependency graph for blast radius analysis
- SQLite storage with hash-based incremental indexing
- get_summary() for LLM context (<4000 tokens)
- get_relevant_files() for task-based file discovery
3. **ModificationJournal** (src/self_coding/modification_journal.py)
- Persistent log of all self-modification attempts
- Tracks outcomes: success, failure, rollback
- find_similar() for learning from past attempts
- Success rate metrics and recent failure tracking
- Supports vector embeddings (Phase 2)
4. **ReflectionService** (src/self_coding/reflection.py)
- LLM-powered analysis of modification attempts
- Generates lessons learned from successes and failures
- Fallback templates when LLM unavailable
- Supports context from similar past attempts
## Test Coverage
- 104 new tests across 7 test files
- 95% code coverage on self_coding module
- Green path tests: full workflow integration
- Red path tests: errors, rollbacks, edge cases
- Safety constraint tests: test coverage requirements, protected files
## Usage
from self_coding import GitSafety, CodebaseIndexer, ModificationJournal
git = GitSafety(repo_path=/path/to/repo)
indexer = CodebaseIndexer(repo_path=/path/to/repo)
journal = ModificationJournal()
Phase 2 will build the Self-Edit MCP Tool that orchestrates these services.
This commit is contained in:
425
src/self_coding/modification_journal.py
Normal file
425
src/self_coding/modification_journal.py
Normal file
@@ -0,0 +1,425 @@
|
||||
"""Modification Journal — Persistent log of self-modification attempts.
|
||||
|
||||
Tracks successes and failures so Timmy can learn from experience.
|
||||
Supports semantic search for similar past attempts.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default database location
|
||||
DEFAULT_DB_PATH = Path("data/self_coding.db")
|
||||
|
||||
|
||||
class Outcome(str, Enum):
|
||||
"""Possible outcomes of a modification attempt."""
|
||||
SUCCESS = "success"
|
||||
FAILURE = "failure"
|
||||
ROLLBACK = "rollback"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModificationAttempt:
|
||||
"""A single self-modification attempt.
|
||||
|
||||
Attributes:
|
||||
id: Unique identifier (auto-generated by database)
|
||||
timestamp: When the attempt was made
|
||||
task_description: What was Timmy trying to do
|
||||
approach: Strategy/approach planned
|
||||
files_modified: List of file paths that were modified
|
||||
diff: The actual git diff of changes
|
||||
test_results: Pytest output
|
||||
outcome: success, failure, or rollback
|
||||
failure_analysis: LLM-generated analysis of why it failed
|
||||
reflection: LLM-generated lessons learned
|
||||
retry_count: Number of retry attempts
|
||||
embedding: Vector embedding of task_description (for semantic search)
|
||||
"""
|
||||
task_description: str
|
||||
approach: str = ""
|
||||
files_modified: list[str] = field(default_factory=list)
|
||||
diff: str = ""
|
||||
test_results: str = ""
|
||||
outcome: Outcome = Outcome.FAILURE
|
||||
failure_analysis: str = ""
|
||||
reflection: str = ""
|
||||
retry_count: int = 0
|
||||
id: Optional[int] = None
|
||||
timestamp: Optional[datetime] = None
|
||||
embedding: Optional[bytes] = None
|
||||
|
||||
|
||||
class ModificationJournal:
|
||||
"""Persistent log of self-modification attempts.
|
||||
|
||||
Before any self-modification, Timmy should query the journal for
|
||||
similar past attempts and include relevant ones in the LLM context.
|
||||
|
||||
Usage:
|
||||
journal = ModificationJournal()
|
||||
|
||||
# Log an attempt
|
||||
attempt = ModificationAttempt(
|
||||
task_description="Add error handling",
|
||||
files_modified=["src/app.py"],
|
||||
outcome=Outcome.SUCCESS,
|
||||
)
|
||||
await journal.log_attempt(attempt)
|
||||
|
||||
# Find similar past attempts
|
||||
similar = await journal.find_similar("Add error handling to endpoints")
|
||||
|
||||
# Get success metrics
|
||||
metrics = await journal.get_success_rate()
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
db_path: Optional[str | Path] = None,
|
||||
) -> None:
|
||||
"""Initialize ModificationJournal.
|
||||
|
||||
Args:
|
||||
db_path: SQLite database path. Defaults to data/self_coding.db
|
||||
"""
|
||||
self.db_path = Path(db_path) if db_path else DEFAULT_DB_PATH
|
||||
self._ensure_schema()
|
||||
logger.info("ModificationJournal initialized at %s", self.db_path)
|
||||
|
||||
def _get_conn(self) -> sqlite3.Connection:
|
||||
"""Get database connection with schema ensured."""
|
||||
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(str(self.db_path))
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
def _ensure_schema(self) -> None:
|
||||
"""Create database tables if they don't exist."""
|
||||
with self._get_conn() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS modification_journal (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
task_description TEXT NOT NULL,
|
||||
approach TEXT,
|
||||
files_modified JSON,
|
||||
diff TEXT,
|
||||
test_results TEXT,
|
||||
outcome TEXT CHECK(outcome IN ('success', 'failure', 'rollback')),
|
||||
failure_analysis TEXT,
|
||||
reflection TEXT,
|
||||
retry_count INTEGER DEFAULT 0,
|
||||
embedding BLOB
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Create indexes for common queries
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_journal_outcome ON modification_journal(outcome)"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_journal_timestamp ON modification_journal(timestamp)"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_journal_task ON modification_journal(task_description)"
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
|
||||
async def log_attempt(self, attempt: ModificationAttempt) -> int:
|
||||
"""Log a modification attempt to the journal.
|
||||
|
||||
Args:
|
||||
attempt: The modification attempt to log
|
||||
|
||||
Returns:
|
||||
ID of the logged entry
|
||||
"""
|
||||
with self._get_conn() as conn:
|
||||
cursor = conn.execute(
|
||||
"""
|
||||
INSERT INTO modification_journal
|
||||
(task_description, approach, files_modified, diff, test_results,
|
||||
outcome, failure_analysis, reflection, retry_count, embedding)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
attempt.task_description,
|
||||
attempt.approach,
|
||||
json.dumps(attempt.files_modified),
|
||||
attempt.diff,
|
||||
attempt.test_results,
|
||||
attempt.outcome.value,
|
||||
attempt.failure_analysis,
|
||||
attempt.reflection,
|
||||
attempt.retry_count,
|
||||
attempt.embedding,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
attempt_id = cursor.lastrowid
|
||||
logger.info(
|
||||
"Logged modification attempt %d: %s (%s)",
|
||||
attempt_id,
|
||||
attempt.task_description[:50],
|
||||
attempt.outcome.value,
|
||||
)
|
||||
return attempt_id
|
||||
|
||||
async def find_similar(
|
||||
self,
|
||||
task_description: str,
|
||||
limit: int = 5,
|
||||
include_outcomes: Optional[list[Outcome]] = None,
|
||||
) -> list[ModificationAttempt]:
|
||||
"""Find similar past modification attempts.
|
||||
|
||||
Uses keyword matching for now. In Phase 2, will use vector embeddings
|
||||
for semantic search.
|
||||
|
||||
Args:
|
||||
task_description: Task to find similar attempts for
|
||||
limit: Maximum number of results
|
||||
include_outcomes: Filter by outcomes (None = all)
|
||||
|
||||
Returns:
|
||||
List of similar modification attempts
|
||||
"""
|
||||
# Extract keywords from task description
|
||||
keywords = set(task_description.lower().split())
|
||||
keywords -= {"the", "a", "an", "to", "in", "on", "at", "for", "with", "and", "or", "of", "is", "are"}
|
||||
|
||||
with self._get_conn() as conn:
|
||||
# Build query
|
||||
if include_outcomes:
|
||||
outcome_filter = "AND outcome IN ({})".format(
|
||||
",".join("?" * len(include_outcomes))
|
||||
)
|
||||
outcome_values = [o.value for o in include_outcomes]
|
||||
else:
|
||||
outcome_filter = ""
|
||||
outcome_values = []
|
||||
|
||||
rows = conn.execute(
|
||||
f"""
|
||||
SELECT id, timestamp, task_description, approach, files_modified,
|
||||
diff, test_results, outcome, failure_analysis, reflection,
|
||||
retry_count
|
||||
FROM modification_journal
|
||||
WHERE 1=1 {outcome_filter}
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT ?
|
||||
""",
|
||||
outcome_values + [limit * 3], # Get more for scoring
|
||||
).fetchall()
|
||||
|
||||
# Score by keyword match
|
||||
scored = []
|
||||
for row in rows:
|
||||
score = 0
|
||||
task = row["task_description"].lower()
|
||||
approach = (row["approach"] or "").lower()
|
||||
|
||||
for kw in keywords:
|
||||
if kw in task:
|
||||
score += 3
|
||||
if kw in approach:
|
||||
score += 1
|
||||
|
||||
# Boost recent attempts (only if already matched)
|
||||
if score > 0:
|
||||
timestamp = datetime.fromisoformat(row["timestamp"])
|
||||
if timestamp.tzinfo is None:
|
||||
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
||||
age_days = (datetime.now(timezone.utc) - timestamp).days
|
||||
if age_days < 7:
|
||||
score += 2
|
||||
elif age_days < 30:
|
||||
score += 1
|
||||
|
||||
if score > 0:
|
||||
scored.append((score, row))
|
||||
|
||||
# Sort by score, take top N
|
||||
scored.sort(reverse=True, key=lambda x: x[0])
|
||||
top_rows = scored[:limit]
|
||||
|
||||
# Convert to ModificationAttempt objects
|
||||
return [self._row_to_attempt(row) for _, row in top_rows]
|
||||
|
||||
async def get_success_rate(self) -> dict[str, float]:
|
||||
"""Get success rate metrics.
|
||||
|
||||
Returns:
|
||||
Dict with overall and per-category success rates:
|
||||
{
|
||||
"overall": float, # 0.0 to 1.0
|
||||
"success": int, # count
|
||||
"failure": int, # count
|
||||
"rollback": int, # count
|
||||
"total": int, # total attempts
|
||||
}
|
||||
"""
|
||||
with self._get_conn() as conn:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT outcome, COUNT(*) as count
|
||||
FROM modification_journal
|
||||
GROUP BY outcome
|
||||
"""
|
||||
).fetchall()
|
||||
|
||||
counts = {row["outcome"]: row["count"] for row in rows}
|
||||
|
||||
success = counts.get("success", 0)
|
||||
failure = counts.get("failure", 0)
|
||||
rollback = counts.get("rollback", 0)
|
||||
total = success + failure + rollback
|
||||
|
||||
overall = success / total if total > 0 else 0.0
|
||||
|
||||
return {
|
||||
"overall": overall,
|
||||
"success": success,
|
||||
"failure": failure,
|
||||
"rollback": rollback,
|
||||
"total": total,
|
||||
}
|
||||
|
||||
async def get_recent_failures(self, limit: int = 10) -> list[ModificationAttempt]:
|
||||
"""Get recent failed attempts with their analyses.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of failures to return
|
||||
|
||||
Returns:
|
||||
List of failed modification attempts
|
||||
"""
|
||||
with self._get_conn() as conn:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT id, timestamp, task_description, approach, files_modified,
|
||||
diff, test_results, outcome, failure_analysis, reflection,
|
||||
retry_count
|
||||
FROM modification_journal
|
||||
WHERE outcome IN ('failure', 'rollback')
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT ?
|
||||
""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
|
||||
return [self._row_to_attempt(row) for row in rows]
|
||||
|
||||
async def get_by_id(self, attempt_id: int) -> Optional[ModificationAttempt]:
|
||||
"""Get a specific modification attempt by ID.
|
||||
|
||||
Args:
|
||||
attempt_id: ID of the attempt
|
||||
|
||||
Returns:
|
||||
ModificationAttempt or None if not found
|
||||
"""
|
||||
with self._get_conn() as conn:
|
||||
row = conn.execute(
|
||||
"""
|
||||
SELECT id, timestamp, task_description, approach, files_modified,
|
||||
diff, test_results, outcome, failure_analysis, reflection,
|
||||
retry_count
|
||||
FROM modification_journal
|
||||
WHERE id = ?
|
||||
""",
|
||||
(attempt_id,),
|
||||
).fetchone()
|
||||
|
||||
if not row:
|
||||
return None
|
||||
|
||||
return self._row_to_attempt(row)
|
||||
|
||||
async def update_reflection(self, attempt_id: int, reflection: str) -> bool:
|
||||
"""Update the reflection for a modification attempt.
|
||||
|
||||
Args:
|
||||
attempt_id: ID of the attempt
|
||||
reflection: New reflection text
|
||||
|
||||
Returns:
|
||||
True if updated, False if not found
|
||||
"""
|
||||
with self._get_conn() as conn:
|
||||
cursor = conn.execute(
|
||||
"""
|
||||
UPDATE modification_journal
|
||||
SET reflection = ?
|
||||
WHERE id = ?
|
||||
""",
|
||||
(reflection, attempt_id),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
if cursor.rowcount > 0:
|
||||
logger.info("Updated reflection for attempt %d", attempt_id)
|
||||
return True
|
||||
return False
|
||||
|
||||
async def get_attempts_for_file(
|
||||
self,
|
||||
file_path: str,
|
||||
limit: int = 10,
|
||||
) -> list[ModificationAttempt]:
|
||||
"""Get all attempts that modified a specific file.
|
||||
|
||||
Args:
|
||||
file_path: Path to file (relative to repo root)
|
||||
limit: Maximum number of attempts
|
||||
|
||||
Returns:
|
||||
List of modification attempts affecting this file
|
||||
"""
|
||||
with self._get_conn() as conn:
|
||||
# Try exact match first, then partial match
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT id, timestamp, task_description, approach, files_modified,
|
||||
diff, test_results, outcome, failure_analysis, reflection,
|
||||
retry_count
|
||||
FROM modification_journal
|
||||
WHERE files_modified LIKE ? OR files_modified LIKE ?
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT ?
|
||||
""",
|
||||
(f'%"{file_path}"%', f'%{file_path}%', limit),
|
||||
).fetchall()
|
||||
|
||||
return [self._row_to_attempt(row) for row in rows]
|
||||
|
||||
def _row_to_attempt(self, row: sqlite3.Row) -> ModificationAttempt:
|
||||
"""Convert a database row to ModificationAttempt."""
|
||||
return ModificationAttempt(
|
||||
id=row["id"],
|
||||
timestamp=datetime.fromisoformat(row["timestamp"]),
|
||||
task_description=row["task_description"],
|
||||
approach=row["approach"] or "",
|
||||
files_modified=json.loads(row["files_modified"] or "[]"),
|
||||
diff=row["diff"] or "",
|
||||
test_results=row["test_results"] or "",
|
||||
outcome=Outcome(row["outcome"]),
|
||||
failure_analysis=row["failure_analysis"] or "",
|
||||
reflection=row["reflection"] or "",
|
||||
retry_count=row["retry_count"] or 0,
|
||||
)
|
||||
Reference in New Issue
Block a user