This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/tests/test_modification_journal.py

323 lines
11 KiB
Python
Raw Normal View History

feat: Self-Coding Foundation (Phase 1) Implements the foundational infrastructure for Timmy's self-modification capability: ## New Services 1. **GitSafety** (src/self_coding/git_safety.py) - Atomic git operations with rollback capability - Snapshot/restore for safe experimentation - Feature branch management (timmy/self-edit/{timestamp}) - Merge to main only after tests pass 2. **CodebaseIndexer** (src/self_coding/codebase_indexer.py) - AST-based parsing of Python source files - Extracts classes, functions, imports, docstrings - Builds dependency graph for blast radius analysis - SQLite storage with hash-based incremental indexing - get_summary() for LLM context (<4000 tokens) - get_relevant_files() for task-based file discovery 3. **ModificationJournal** (src/self_coding/modification_journal.py) - Persistent log of all self-modification attempts - Tracks outcomes: success, failure, rollback - find_similar() for learning from past attempts - Success rate metrics and recent failure tracking - Supports vector embeddings (Phase 2) 4. **ReflectionService** (src/self_coding/reflection.py) - LLM-powered analysis of modification attempts - Generates lessons learned from successes and failures - Fallback templates when LLM unavailable - Supports context from similar past attempts ## Test Coverage - 104 new tests across 7 test files - 95% code coverage on self_coding module - Green path tests: full workflow integration - Red path tests: errors, rollbacks, edge cases - Safety constraint tests: test coverage requirements, protected files ## Usage from self_coding import GitSafety, CodebaseIndexer, ModificationJournal git = GitSafety(repo_path=/path/to/repo) indexer = CodebaseIndexer(repo_path=/path/to/repo) journal = ModificationJournal() Phase 2 will build the Self-Edit MCP Tool that orchestrates these services.
2026-02-26 11:08:05 -05:00
"""Tests for Modification Journal.
Tests logging, querying, and metrics for self-modification attempts.
"""
from __future__ import annotations
import tempfile
from datetime import datetime, timedelta, timezone
from pathlib import Path
import pytest
from self_coding.modification_journal import (
ModificationAttempt,
ModificationJournal,
Outcome,
)
@pytest.fixture
def temp_journal():
"""Create a ModificationJournal with temporary database."""
with tempfile.TemporaryDirectory() as tmpdir:
db_path = Path(tmpdir) / "journal.db"
journal = ModificationJournal(db_path=db_path)
yield journal
@pytest.mark.asyncio
class TestModificationJournalLogging:
"""Logging modification attempts."""
async def test_log_attempt_success(self, temp_journal):
"""Should log a successful attempt."""
attempt = ModificationAttempt(
task_description="Add error handling to health endpoint",
approach="Use try/except block",
files_modified=["src/app.py"],
diff="@@ -1,3 +1,7 @@...",
test_results="1 passed",
outcome=Outcome.SUCCESS,
)
attempt_id = await temp_journal.log_attempt(attempt)
assert attempt_id > 0
async def test_log_attempt_failure(self, temp_journal):
"""Should log a failed attempt."""
attempt = ModificationAttempt(
task_description="Refactor database layer",
approach="Extract connection pool",
files_modified=["src/db.py", "src/models.py"],
diff="@@ ...",
test_results="2 failed",
outcome=Outcome.FAILURE,
failure_analysis="Circular dependency introduced",
retry_count=2,
)
attempt_id = await temp_journal.log_attempt(attempt)
# Retrieve and verify
retrieved = await temp_journal.get_by_id(attempt_id)
assert retrieved is not None
assert retrieved.outcome == Outcome.FAILURE
assert retrieved.failure_analysis == "Circular dependency introduced"
assert retrieved.retry_count == 2
@pytest.mark.asyncio
class TestModificationJournalRetrieval:
"""Retrieving logged attempts."""
async def test_get_by_id(self, temp_journal):
"""Should retrieve attempt by ID."""
attempt = ModificationAttempt(
task_description="Fix bug",
outcome=Outcome.SUCCESS,
)
attempt_id = await temp_journal.log_attempt(attempt)
retrieved = await temp_journal.get_by_id(attempt_id)
assert retrieved is not None
assert retrieved.task_description == "Fix bug"
assert retrieved.id == attempt_id
async def test_get_by_id_not_found(self, temp_journal):
"""Should return None for non-existent ID."""
result = await temp_journal.get_by_id(9999)
assert result is None
async def test_find_similar_basic(self, temp_journal):
"""Should find similar attempts by keyword."""
# Log some attempts
await temp_journal.log_attempt(ModificationAttempt(
task_description="Add error handling to API endpoints",
outcome=Outcome.SUCCESS,
))
await temp_journal.log_attempt(ModificationAttempt(
task_description="Add logging to database queries",
outcome=Outcome.SUCCESS,
))
await temp_journal.log_attempt(ModificationAttempt(
task_description="Fix CSS styling on homepage",
outcome=Outcome.FAILURE,
))
# Search for error handling
similar = await temp_journal.find_similar("error handling in endpoints", limit=3)
assert len(similar) > 0
# Should find the API error handling attempt first
assert "error" in similar[0].task_description.lower()
async def test_find_similar_filter_outcome(self, temp_journal):
"""Should filter by outcome when specified."""
await temp_journal.log_attempt(ModificationAttempt(
task_description="Database optimization",
outcome=Outcome.SUCCESS,
))
await temp_journal.log_attempt(ModificationAttempt(
task_description="Database refactoring",
outcome=Outcome.FAILURE,
))
# Search only for successes
similar = await temp_journal.find_similar(
"database work",
include_outcomes=[Outcome.SUCCESS],
)
assert len(similar) == 1
assert similar[0].outcome == Outcome.SUCCESS
async def test_find_similar_empty(self, temp_journal):
"""Should return empty list when no matches."""
await temp_journal.log_attempt(ModificationAttempt(
task_description="Fix bug",
outcome=Outcome.SUCCESS,
))
similar = await temp_journal.find_similar("xyzqwerty unicorn astronaut", limit=5)
assert similar == []
@pytest.mark.asyncio
class TestModificationJournalMetrics:
"""Success rate metrics."""
async def test_get_success_rate_empty(self, temp_journal):
"""Should handle empty journal."""
metrics = await temp_journal.get_success_rate()
assert metrics["overall"] == 0.0
assert metrics["total"] == 0
async def test_get_success_rate_calculated(self, temp_journal):
"""Should calculate success rate correctly."""
# Log various outcomes
for _ in range(5):
await temp_journal.log_attempt(ModificationAttempt(
task_description="Success task",
outcome=Outcome.SUCCESS,
))
for _ in range(3):
await temp_journal.log_attempt(ModificationAttempt(
task_description="Failure task",
outcome=Outcome.FAILURE,
))
for _ in range(2):
await temp_journal.log_attempt(ModificationAttempt(
task_description="Rollback task",
outcome=Outcome.ROLLBACK,
))
metrics = await temp_journal.get_success_rate()
assert metrics["success"] == 5
assert metrics["failure"] == 3
assert metrics["rollback"] == 2
assert metrics["total"] == 10
assert metrics["overall"] == 0.5 # 5/10
async def test_get_recent_failures(self, temp_journal):
"""Should get recent failures."""
# Log failures and successes (last one is most recent)
await temp_journal.log_attempt(ModificationAttempt(
task_description="Rollback attempt",
outcome=Outcome.ROLLBACK,
))
await temp_journal.log_attempt(ModificationAttempt(
task_description="Success",
outcome=Outcome.SUCCESS,
))
await temp_journal.log_attempt(ModificationAttempt(
task_description="Failed attempt",
outcome=Outcome.FAILURE,
))
failures = await temp_journal.get_recent_failures(limit=10)
assert len(failures) == 2
# Most recent first (Failure was logged last)
assert failures[0].outcome == Outcome.FAILURE
assert failures[1].outcome == Outcome.ROLLBACK
@pytest.mark.asyncio
class TestModificationJournalUpdates:
"""Updating logged attempts."""
async def test_update_reflection(self, temp_journal):
"""Should update reflection for an attempt."""
attempt = ModificationAttempt(
task_description="Test task",
outcome=Outcome.SUCCESS,
)
attempt_id = await temp_journal.log_attempt(attempt)
# Update reflection
success = await temp_journal.update_reflection(
attempt_id,
"This worked well because...",
)
assert success is True
# Verify
retrieved = await temp_journal.get_by_id(attempt_id)
assert retrieved.reflection == "This worked well because..."
async def test_update_reflection_not_found(self, temp_journal):
"""Should return False for non-existent ID."""
success = await temp_journal.update_reflection(9999, "Reflection")
assert success is False
@pytest.mark.asyncio
class TestModificationJournalFileTracking:
"""Tracking attempts by file."""
async def test_get_attempts_for_file(self, temp_journal):
"""Should find all attempts that modified a file."""
await temp_journal.log_attempt(ModificationAttempt(
task_description="Fix app.py",
files_modified=["src/app.py", "src/config.py"],
outcome=Outcome.SUCCESS,
))
await temp_journal.log_attempt(ModificationAttempt(
task_description="Update config only",
files_modified=["src/config.py"],
outcome=Outcome.SUCCESS,
))
await temp_journal.log_attempt(ModificationAttempt(
task_description="Other file",
files_modified=["src/other.py"],
outcome=Outcome.SUCCESS,
))
app_attempts = await temp_journal.get_attempts_for_file("src/app.py")
assert len(app_attempts) == 1
assert "src/app.py" in app_attempts[0].files_modified
@pytest.mark.asyncio
class TestModificationJournalIntegration:
"""Full workflow integration tests."""
async def test_full_workflow(self, temp_journal):
"""Complete workflow: log, find similar, get metrics."""
# Log some attempts
for i in range(3):
await temp_journal.log_attempt(ModificationAttempt(
task_description=f"Database optimization {i}",
approach="Add indexes",
files_modified=["src/db.py"],
outcome=Outcome.SUCCESS if i % 2 == 0 else Outcome.FAILURE,
))
# Find similar
similar = await temp_journal.find_similar("optimize database queries", limit=5)
assert len(similar) == 3
# Get success rate
metrics = await temp_journal.get_success_rate()
assert metrics["total"] == 3
assert metrics["success"] == 2
# Get recent failures
failures = await temp_journal.get_recent_failures(limit=5)
assert len(failures) == 1
# Get attempts for file
file_attempts = await temp_journal.get_attempts_for_file("src/db.py")
assert len(file_attempts) == 3
async def test_persistence(self):
"""Should persist across instances."""
with tempfile.TemporaryDirectory() as tmpdir:
db_path = Path(tmpdir) / "persist.db"
# First instance
journal1 = ModificationJournal(db_path=db_path)
attempt_id = await journal1.log_attempt(ModificationAttempt(
task_description="Persistent attempt",
outcome=Outcome.SUCCESS,
))
# Second instance with same database
journal2 = ModificationJournal(db_path=db_path)
retrieved = await journal2.get_by_id(attempt_id)
assert retrieved is not None
assert retrieved.task_description == "Persistent attempt"