Implements the foundational infrastructure for Timmy's self-modification capability:
## New Services
1. **GitSafety** (src/self_coding/git_safety.py)
- Atomic git operations with rollback capability
- Snapshot/restore for safe experimentation
- Feature branch management (timmy/self-edit/{timestamp})
- Merge to main only after tests pass
2. **CodebaseIndexer** (src/self_coding/codebase_indexer.py)
- AST-based parsing of Python source files
- Extracts classes, functions, imports, docstrings
- Builds dependency graph for blast radius analysis
- SQLite storage with hash-based incremental indexing
- get_summary() for LLM context (<4000 tokens)
- get_relevant_files() for task-based file discovery
3. **ModificationJournal** (src/self_coding/modification_journal.py)
- Persistent log of all self-modification attempts
- Tracks outcomes: success, failure, rollback
- find_similar() for learning from past attempts
- Success rate metrics and recent failure tracking
- Supports vector embeddings (Phase 2)
4. **ReflectionService** (src/self_coding/reflection.py)
- LLM-powered analysis of modification attempts
- Generates lessons learned from successes and failures
- Fallback templates when LLM unavailable
- Supports context from similar past attempts
## Test Coverage
- 104 new tests across 7 test files
- 95% code coverage on self_coding module
- Green path tests: full workflow integration
- Red path tests: errors, rollbacks, edge cases
- Safety constraint tests: test coverage requirements, protected files
## Usage
from self_coding import GitSafety, CodebaseIndexer, ModificationJournal
git = GitSafety(repo_path=/path/to/repo)
indexer = CodebaseIndexer(repo_path=/path/to/repo)
journal = ModificationJournal()
Phase 2 will build the Self-Edit MCP Tool that orchestrates these services.
323 lines
11 KiB
Python
323 lines
11 KiB
Python
"""Tests for Modification Journal.
|
|
|
|
Tests logging, querying, and metrics for self-modification attempts.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import tempfile
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from self_coding.modification_journal import (
|
|
ModificationAttempt,
|
|
ModificationJournal,
|
|
Outcome,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_journal():
|
|
"""Create a ModificationJournal with temporary database."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "journal.db"
|
|
journal = ModificationJournal(db_path=db_path)
|
|
yield journal
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
class TestModificationJournalLogging:
|
|
"""Logging modification attempts."""
|
|
|
|
async def test_log_attempt_success(self, temp_journal):
|
|
"""Should log a successful attempt."""
|
|
attempt = ModificationAttempt(
|
|
task_description="Add error handling to health endpoint",
|
|
approach="Use try/except block",
|
|
files_modified=["src/app.py"],
|
|
diff="@@ -1,3 +1,7 @@...",
|
|
test_results="1 passed",
|
|
outcome=Outcome.SUCCESS,
|
|
)
|
|
|
|
attempt_id = await temp_journal.log_attempt(attempt)
|
|
|
|
assert attempt_id > 0
|
|
|
|
async def test_log_attempt_failure(self, temp_journal):
|
|
"""Should log a failed attempt."""
|
|
attempt = ModificationAttempt(
|
|
task_description="Refactor database layer",
|
|
approach="Extract connection pool",
|
|
files_modified=["src/db.py", "src/models.py"],
|
|
diff="@@ ...",
|
|
test_results="2 failed",
|
|
outcome=Outcome.FAILURE,
|
|
failure_analysis="Circular dependency introduced",
|
|
retry_count=2,
|
|
)
|
|
|
|
attempt_id = await temp_journal.log_attempt(attempt)
|
|
|
|
# Retrieve and verify
|
|
retrieved = await temp_journal.get_by_id(attempt_id)
|
|
assert retrieved is not None
|
|
assert retrieved.outcome == Outcome.FAILURE
|
|
assert retrieved.failure_analysis == "Circular dependency introduced"
|
|
assert retrieved.retry_count == 2
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
class TestModificationJournalRetrieval:
|
|
"""Retrieving logged attempts."""
|
|
|
|
async def test_get_by_id(self, temp_journal):
|
|
"""Should retrieve attempt by ID."""
|
|
attempt = ModificationAttempt(
|
|
task_description="Fix bug",
|
|
outcome=Outcome.SUCCESS,
|
|
)
|
|
|
|
attempt_id = await temp_journal.log_attempt(attempt)
|
|
retrieved = await temp_journal.get_by_id(attempt_id)
|
|
|
|
assert retrieved is not None
|
|
assert retrieved.task_description == "Fix bug"
|
|
assert retrieved.id == attempt_id
|
|
|
|
async def test_get_by_id_not_found(self, temp_journal):
|
|
"""Should return None for non-existent ID."""
|
|
result = await temp_journal.get_by_id(9999)
|
|
|
|
assert result is None
|
|
|
|
async def test_find_similar_basic(self, temp_journal):
|
|
"""Should find similar attempts by keyword."""
|
|
# Log some attempts
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Add error handling to API endpoints",
|
|
outcome=Outcome.SUCCESS,
|
|
))
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Add logging to database queries",
|
|
outcome=Outcome.SUCCESS,
|
|
))
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Fix CSS styling on homepage",
|
|
outcome=Outcome.FAILURE,
|
|
))
|
|
|
|
# Search for error handling
|
|
similar = await temp_journal.find_similar("error handling in endpoints", limit=3)
|
|
|
|
assert len(similar) > 0
|
|
# Should find the API error handling attempt first
|
|
assert "error" in similar[0].task_description.lower()
|
|
|
|
async def test_find_similar_filter_outcome(self, temp_journal):
|
|
"""Should filter by outcome when specified."""
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Database optimization",
|
|
outcome=Outcome.SUCCESS,
|
|
))
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Database refactoring",
|
|
outcome=Outcome.FAILURE,
|
|
))
|
|
|
|
# Search only for successes
|
|
similar = await temp_journal.find_similar(
|
|
"database work",
|
|
include_outcomes=[Outcome.SUCCESS],
|
|
)
|
|
|
|
assert len(similar) == 1
|
|
assert similar[0].outcome == Outcome.SUCCESS
|
|
|
|
async def test_find_similar_empty(self, temp_journal):
|
|
"""Should return empty list when no matches."""
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Fix bug",
|
|
outcome=Outcome.SUCCESS,
|
|
))
|
|
|
|
similar = await temp_journal.find_similar("xyzqwerty unicorn astronaut", limit=5)
|
|
|
|
assert similar == []
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
class TestModificationJournalMetrics:
|
|
"""Success rate metrics."""
|
|
|
|
async def test_get_success_rate_empty(self, temp_journal):
|
|
"""Should handle empty journal."""
|
|
metrics = await temp_journal.get_success_rate()
|
|
|
|
assert metrics["overall"] == 0.0
|
|
assert metrics["total"] == 0
|
|
|
|
async def test_get_success_rate_calculated(self, temp_journal):
|
|
"""Should calculate success rate correctly."""
|
|
# Log various outcomes
|
|
for _ in range(5):
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Success task",
|
|
outcome=Outcome.SUCCESS,
|
|
))
|
|
for _ in range(3):
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Failure task",
|
|
outcome=Outcome.FAILURE,
|
|
))
|
|
for _ in range(2):
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Rollback task",
|
|
outcome=Outcome.ROLLBACK,
|
|
))
|
|
|
|
metrics = await temp_journal.get_success_rate()
|
|
|
|
assert metrics["success"] == 5
|
|
assert metrics["failure"] == 3
|
|
assert metrics["rollback"] == 2
|
|
assert metrics["total"] == 10
|
|
assert metrics["overall"] == 0.5 # 5/10
|
|
|
|
async def test_get_recent_failures(self, temp_journal):
|
|
"""Should get recent failures."""
|
|
# Log failures and successes (last one is most recent)
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Rollback attempt",
|
|
outcome=Outcome.ROLLBACK,
|
|
))
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Success",
|
|
outcome=Outcome.SUCCESS,
|
|
))
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Failed attempt",
|
|
outcome=Outcome.FAILURE,
|
|
))
|
|
|
|
failures = await temp_journal.get_recent_failures(limit=10)
|
|
|
|
assert len(failures) == 2
|
|
# Most recent first (Failure was logged last)
|
|
assert failures[0].outcome == Outcome.FAILURE
|
|
assert failures[1].outcome == Outcome.ROLLBACK
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
class TestModificationJournalUpdates:
|
|
"""Updating logged attempts."""
|
|
|
|
async def test_update_reflection(self, temp_journal):
|
|
"""Should update reflection for an attempt."""
|
|
attempt = ModificationAttempt(
|
|
task_description="Test task",
|
|
outcome=Outcome.SUCCESS,
|
|
)
|
|
|
|
attempt_id = await temp_journal.log_attempt(attempt)
|
|
|
|
# Update reflection
|
|
success = await temp_journal.update_reflection(
|
|
attempt_id,
|
|
"This worked well because...",
|
|
)
|
|
|
|
assert success is True
|
|
|
|
# Verify
|
|
retrieved = await temp_journal.get_by_id(attempt_id)
|
|
assert retrieved.reflection == "This worked well because..."
|
|
|
|
async def test_update_reflection_not_found(self, temp_journal):
|
|
"""Should return False for non-existent ID."""
|
|
success = await temp_journal.update_reflection(9999, "Reflection")
|
|
|
|
assert success is False
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
class TestModificationJournalFileTracking:
|
|
"""Tracking attempts by file."""
|
|
|
|
async def test_get_attempts_for_file(self, temp_journal):
|
|
"""Should find all attempts that modified a file."""
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Fix app.py",
|
|
files_modified=["src/app.py", "src/config.py"],
|
|
outcome=Outcome.SUCCESS,
|
|
))
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Update config only",
|
|
files_modified=["src/config.py"],
|
|
outcome=Outcome.SUCCESS,
|
|
))
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description="Other file",
|
|
files_modified=["src/other.py"],
|
|
outcome=Outcome.SUCCESS,
|
|
))
|
|
|
|
app_attempts = await temp_journal.get_attempts_for_file("src/app.py")
|
|
|
|
assert len(app_attempts) == 1
|
|
assert "src/app.py" in app_attempts[0].files_modified
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
class TestModificationJournalIntegration:
|
|
"""Full workflow integration tests."""
|
|
|
|
async def test_full_workflow(self, temp_journal):
|
|
"""Complete workflow: log, find similar, get metrics."""
|
|
# Log some attempts
|
|
for i in range(3):
|
|
await temp_journal.log_attempt(ModificationAttempt(
|
|
task_description=f"Database optimization {i}",
|
|
approach="Add indexes",
|
|
files_modified=["src/db.py"],
|
|
outcome=Outcome.SUCCESS if i % 2 == 0 else Outcome.FAILURE,
|
|
))
|
|
|
|
# Find similar
|
|
similar = await temp_journal.find_similar("optimize database queries", limit=5)
|
|
assert len(similar) == 3
|
|
|
|
# Get success rate
|
|
metrics = await temp_journal.get_success_rate()
|
|
assert metrics["total"] == 3
|
|
assert metrics["success"] == 2
|
|
|
|
# Get recent failures
|
|
failures = await temp_journal.get_recent_failures(limit=5)
|
|
assert len(failures) == 1
|
|
|
|
# Get attempts for file
|
|
file_attempts = await temp_journal.get_attempts_for_file("src/db.py")
|
|
assert len(file_attempts) == 3
|
|
|
|
async def test_persistence(self):
|
|
"""Should persist across instances."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "persist.db"
|
|
|
|
# First instance
|
|
journal1 = ModificationJournal(db_path=db_path)
|
|
attempt_id = await journal1.log_attempt(ModificationAttempt(
|
|
task_description="Persistent attempt",
|
|
outcome=Outcome.SUCCESS,
|
|
))
|
|
|
|
# Second instance with same database
|
|
journal2 = ModificationJournal(db_path=db_path)
|
|
retrieved = await journal2.get_by_id(attempt_id)
|
|
|
|
assert retrieved is not None
|
|
assert retrieved.task_description == "Persistent attempt"
|