Test fixtures that create temporary git repos now set commit.gpgsign=false to avoid failures in environments with global commit signing configured. The permission error test is skipped when running as root since file permissions don't apply to the root user. https://claude.ai/code/session_018u1fAx2GihSGctYS64tD4H
480 lines
16 KiB
Python
480 lines
16 KiB
Python
"""End-to-end integration tests for Self-Coding layer.
|
|
|
|
Tests the complete workflow: GitSafety + CodebaseIndexer + ModificationJournal + Reflection
|
|
working together.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from self_coding import (
|
|
CodebaseIndexer,
|
|
GitSafety,
|
|
ModificationAttempt,
|
|
ModificationJournal,
|
|
Outcome,
|
|
ReflectionService,
|
|
Snapshot,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def self_coding_env():
|
|
"""Create a complete self-coding environment with temp repo."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
repo_path = Path(tmpdir)
|
|
|
|
# Initialize git repo
|
|
import subprocess
|
|
subprocess.run(["git", "init"], cwd=repo_path, check=True, capture_output=True)
|
|
subprocess.run(
|
|
["git", "config", "user.email", "test@test.com"],
|
|
cwd=repo_path, check=True, capture_output=True,
|
|
)
|
|
subprocess.run(
|
|
["git", "config", "user.name", "Test User"],
|
|
cwd=repo_path, check=True, capture_output=True,
|
|
)
|
|
subprocess.run(
|
|
["git", "config", "commit.gpgsign", "false"],
|
|
cwd=repo_path, check=True, capture_output=True,
|
|
)
|
|
|
|
# Create src directory with real Python files
|
|
src_path = repo_path / "src" / "myproject"
|
|
src_path.mkdir(parents=True)
|
|
|
|
(src_path / "__init__.py").write_text("")
|
|
(src_path / "calculator.py").write_text('''
|
|
"""A simple calculator module."""
|
|
|
|
class Calculator:
|
|
"""Basic calculator with add/subtract."""
|
|
|
|
def add(self, a: int, b: int) -> int:
|
|
return a + b
|
|
|
|
def subtract(self, a: int, b: int) -> int:
|
|
return a - b
|
|
''')
|
|
|
|
(src_path / "utils.py").write_text('''
|
|
"""Utility functions."""
|
|
|
|
from myproject.calculator import Calculator
|
|
|
|
|
|
def calculate_total(items: list[int]) -> int:
|
|
calc = Calculator()
|
|
return sum(calc.add(0, item) for item in items)
|
|
''')
|
|
|
|
# Create tests
|
|
tests_path = repo_path / "tests"
|
|
tests_path.mkdir()
|
|
|
|
(tests_path / "test_calculator.py").write_text('''
|
|
"""Tests for calculator."""
|
|
|
|
from myproject.calculator import Calculator
|
|
|
|
|
|
def test_add():
|
|
calc = Calculator()
|
|
assert calc.add(2, 3) == 5
|
|
|
|
|
|
def test_subtract():
|
|
calc = Calculator()
|
|
assert calc.subtract(5, 3) == 2
|
|
''')
|
|
|
|
# Initial commit
|
|
subprocess.run(["git", "add", "."], cwd=repo_path, check=True, capture_output=True)
|
|
subprocess.run(
|
|
["git", "commit", "-m", "Initial commit"],
|
|
cwd=repo_path, check=True, capture_output=True,
|
|
)
|
|
subprocess.run(
|
|
["git", "branch", "-M", "main"],
|
|
cwd=repo_path, check=True, capture_output=True,
|
|
)
|
|
|
|
# Initialize services
|
|
git = GitSafety(
|
|
repo_path=repo_path,
|
|
main_branch="main",
|
|
test_command="python -m pytest tests/ -v",
|
|
)
|
|
indexer = CodebaseIndexer(
|
|
repo_path=repo_path,
|
|
db_path=repo_path / "codebase.db",
|
|
src_dirs=["src", "tests"],
|
|
)
|
|
journal = ModificationJournal(db_path=repo_path / "journal.db")
|
|
reflection = ReflectionService(llm_adapter=None)
|
|
|
|
yield {
|
|
"repo_path": repo_path,
|
|
"git": git,
|
|
"indexer": indexer,
|
|
"journal": journal,
|
|
"reflection": reflection,
|
|
}
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
class TestSelfCodingGreenPath:
|
|
"""Happy path: successful self-modification workflow."""
|
|
|
|
async def test_complete_successful_modification(self, self_coding_env):
|
|
"""Full workflow: snapshot → branch → modify → test → commit → merge → log → reflect."""
|
|
env = self_coding_env
|
|
git = env["git"]
|
|
indexer = env["indexer"]
|
|
journal = env["journal"]
|
|
reflection = env["reflection"]
|
|
repo_path = env["repo_path"]
|
|
|
|
# 1. Index codebase to understand structure
|
|
await indexer.index_all()
|
|
|
|
# 2. Find relevant files for task
|
|
files = await indexer.get_relevant_files("add multiply method to calculator", limit=3)
|
|
assert "src/myproject/calculator.py" in files
|
|
|
|
# 3. Check for similar past attempts
|
|
similar = await journal.find_similar("add multiply method", limit=5)
|
|
# Should be empty (first attempt)
|
|
|
|
# 4. Take snapshot
|
|
snapshot = await git.snapshot(run_tests=False)
|
|
assert isinstance(snapshot, Snapshot)
|
|
|
|
# 5. Create feature branch
|
|
branch_name = "timmy/self-edit/add-multiply"
|
|
branch = await git.create_branch(branch_name)
|
|
assert branch == branch_name
|
|
|
|
# 6. Make modification (simulate adding multiply method)
|
|
calc_path = repo_path / "src" / "myproject" / "calculator.py"
|
|
content = calc_path.read_text()
|
|
new_method = '''
|
|
def multiply(self, a: int, b: int) -> int:
|
|
"""Multiply two numbers."""
|
|
return a * b
|
|
'''
|
|
# Insert before last method
|
|
content = content.rstrip() + "\n" + new_method + "\n"
|
|
calc_path.write_text(content)
|
|
|
|
# 7. Add test for new method
|
|
test_path = repo_path / "tests" / "test_calculator.py"
|
|
test_content = test_path.read_text()
|
|
new_test = '''
|
|
|
|
def test_multiply():
|
|
calc = Calculator()
|
|
assert calc.multiply(3, 4) == 12
|
|
'''
|
|
test_path.write_text(test_content.rstrip() + new_test + "\n")
|
|
|
|
# 8. Commit changes
|
|
commit_hash = await git.commit(
|
|
"Add multiply method to Calculator",
|
|
["src/myproject/calculator.py", "tests/test_calculator.py"],
|
|
)
|
|
assert len(commit_hash) == 40
|
|
|
|
# 9. Merge to main (skipping actual test run for speed)
|
|
merge_hash = await git.merge_to_main(branch, require_tests=False)
|
|
assert merge_hash != snapshot.commit_hash
|
|
|
|
# 10. Log the successful attempt
|
|
diff = await git.get_diff(snapshot.commit_hash)
|
|
attempt = ModificationAttempt(
|
|
task_description="Add multiply method to Calculator",
|
|
approach="Added multiply method with docstring and test",
|
|
files_modified=["src/myproject/calculator.py", "tests/test_calculator.py"],
|
|
diff=diff[:1000], # Truncate for storage
|
|
test_results="Tests passed",
|
|
outcome=Outcome.SUCCESS,
|
|
)
|
|
attempt_id = await journal.log_attempt(attempt)
|
|
|
|
# 11. Generate reflection
|
|
reflection_text = await reflection.reflect_on_attempt(attempt)
|
|
assert "What went well" in reflection_text
|
|
|
|
await journal.update_reflection(attempt_id, reflection_text)
|
|
|
|
# 12. Verify final state
|
|
final_commit = await git.get_current_commit()
|
|
assert final_commit == merge_hash
|
|
|
|
# Verify we're on main branch
|
|
current_branch = await git.get_current_branch()
|
|
assert current_branch == "main"
|
|
|
|
# Verify multiply method exists
|
|
final_content = calc_path.read_text()
|
|
assert "def multiply" in final_content
|
|
|
|
async def test_incremental_codebase_indexing(self, self_coding_env):
|
|
"""Codebase indexer should detect changes after modification."""
|
|
env = self_coding_env
|
|
indexer = env["indexer"]
|
|
|
|
# Initial index
|
|
stats1 = await indexer.index_all()
|
|
assert stats1["indexed"] == 4 # __init__.py, calculator.py, utils.py, test_calculator.py
|
|
|
|
# Add new file
|
|
new_file = env["repo_path"] / "src" / "myproject" / "new_module.py"
|
|
new_file.write_text('''
|
|
"""New module."""
|
|
def new_function(): pass
|
|
''')
|
|
|
|
# Incremental index should detect only the new file
|
|
stats2 = await indexer.index_changed()
|
|
assert stats2["indexed"] == 1
|
|
assert stats2["skipped"] == 4
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
class TestSelfCodingRedPaths:
|
|
"""Error paths: failures, rollbacks, and recovery."""
|
|
|
|
async def test_rollback_on_test_failure(self, self_coding_env):
|
|
"""Should rollback when tests fail."""
|
|
env = self_coding_env
|
|
git = env["git"]
|
|
journal = env["journal"]
|
|
repo_path = env["repo_path"]
|
|
|
|
# Take snapshot
|
|
snapshot = await git.snapshot(run_tests=False)
|
|
original_commit = snapshot.commit_hash
|
|
|
|
# Create branch
|
|
branch = await git.create_branch("timmy/self-edit/bad-change")
|
|
|
|
# Make breaking change (remove add method)
|
|
calc_path = repo_path / "src" / "myproject" / "calculator.py"
|
|
calc_path.write_text('''
|
|
"""A simple calculator module."""
|
|
|
|
class Calculator:
|
|
"""Basic calculator - broken version."""
|
|
pass
|
|
''')
|
|
|
|
await git.commit("Remove methods (breaking change)")
|
|
|
|
# Log the failed attempt
|
|
attempt = ModificationAttempt(
|
|
task_description="Refactor Calculator class",
|
|
approach="Remove unused methods",
|
|
files_modified=["src/myproject/calculator.py"],
|
|
outcome=Outcome.FAILURE,
|
|
failure_analysis="Tests failed - removed methods that were used",
|
|
retry_count=0,
|
|
)
|
|
await journal.log_attempt(attempt)
|
|
|
|
# Rollback
|
|
await git.rollback(snapshot)
|
|
|
|
# Verify rollback
|
|
current = await git.get_current_commit()
|
|
assert current == original_commit
|
|
|
|
# Verify file restored
|
|
restored_content = calc_path.read_text()
|
|
assert "def add" in restored_content
|
|
|
|
async def test_find_similar_learns_from_failures(self, self_coding_env):
|
|
"""Should find similar past failures to avoid repeating mistakes."""
|
|
env = self_coding_env
|
|
journal = env["journal"]
|
|
|
|
# Log a failure
|
|
await journal.log_attempt(ModificationAttempt(
|
|
task_description="Add division method to calculator",
|
|
approach="Simple division without zero check",
|
|
files_modified=["src/myproject/calculator.py"],
|
|
outcome=Outcome.FAILURE,
|
|
failure_analysis="ZeroDivisionError not handled",
|
|
reflection="Always check for division by zero",
|
|
))
|
|
|
|
# Later, try similar task
|
|
similar = await journal.find_similar(
|
|
"Add modulo operation to calculator",
|
|
limit=5,
|
|
)
|
|
|
|
# Should find the past failure
|
|
assert len(similar) > 0
|
|
assert "division" in similar[0].task_description.lower()
|
|
|
|
async def test_dependency_chain_detects_blast_radius(self, self_coding_env):
|
|
"""Should detect which files depend on modified file."""
|
|
env = self_coding_env
|
|
indexer = env["indexer"]
|
|
|
|
await indexer.index_all()
|
|
|
|
# utils.py imports from calculator.py
|
|
deps = await indexer.get_dependency_chain("src/myproject/calculator.py")
|
|
|
|
assert "src/myproject/utils.py" in deps
|
|
|
|
async def test_success_rate_tracking(self, self_coding_env):
|
|
"""Should track success/failure metrics over time."""
|
|
env = self_coding_env
|
|
journal = env["journal"]
|
|
|
|
# Log mixed outcomes
|
|
for i in range(5):
|
|
await journal.log_attempt(ModificationAttempt(
|
|
task_description=f"Task {i}",
|
|
outcome=Outcome.SUCCESS if i % 2 == 0 else Outcome.FAILURE,
|
|
))
|
|
|
|
metrics = await journal.get_success_rate()
|
|
|
|
assert metrics["total"] == 5
|
|
assert metrics["success"] == 3
|
|
assert metrics["failure"] == 2
|
|
assert metrics["overall"] == 0.6
|
|
|
|
async def test_journal_persists_across_instances(self, self_coding_env):
|
|
"""Journal should persist even with new service instances."""
|
|
env = self_coding_env
|
|
db_path = env["repo_path"] / "persistent_journal.db"
|
|
|
|
# First instance logs attempt
|
|
journal1 = ModificationJournal(db_path=db_path)
|
|
attempt_id = await journal1.log_attempt(ModificationAttempt(
|
|
task_description="Persistent task",
|
|
outcome=Outcome.SUCCESS,
|
|
))
|
|
|
|
# New instance should see the attempt
|
|
journal2 = ModificationJournal(db_path=db_path)
|
|
retrieved = await journal2.get_by_id(attempt_id)
|
|
|
|
assert retrieved is not None
|
|
assert retrieved.task_description == "Persistent task"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
class TestSelfCodingSafetyConstraints:
|
|
"""Safety constraints and validation."""
|
|
|
|
async def test_only_modify_files_with_test_coverage(self, self_coding_env):
|
|
"""Should only allow modifying files that have tests."""
|
|
env = self_coding_env
|
|
indexer = env["indexer"]
|
|
|
|
await indexer.index_all()
|
|
|
|
# calculator.py has test coverage
|
|
assert await indexer.has_test_coverage("src/myproject/calculator.py")
|
|
|
|
# utils.py has no test file
|
|
assert not await indexer.has_test_coverage("src/myproject/utils.py")
|
|
|
|
async def test_cannot_delete_test_files(self, self_coding_env):
|
|
"""Safety check: should not delete test files."""
|
|
env = self_coding_env
|
|
git = env["git"]
|
|
repo_path = env["repo_path"]
|
|
|
|
snapshot = await git.snapshot(run_tests=False)
|
|
branch = await git.create_branch("timmy/self-edit/bad-idea")
|
|
|
|
# Try to delete test file
|
|
test_file = repo_path / "tests" / "test_calculator.py"
|
|
test_file.unlink()
|
|
|
|
# This would be caught by safety constraints in real implementation
|
|
# For now, verify the file is gone
|
|
assert not test_file.exists()
|
|
|
|
# Rollback should restore it
|
|
await git.rollback(snapshot)
|
|
assert test_file.exists()
|
|
|
|
async def test_branch_naming_convention(self, self_coding_env):
|
|
"""Branches should follow naming convention."""
|
|
env = self_coding_env
|
|
git = env["git"]
|
|
|
|
import datetime
|
|
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
branch_name = f"timmy/self-edit/{timestamp}"
|
|
|
|
branch = await git.create_branch(branch_name)
|
|
|
|
assert branch.startswith("timmy/self-edit/")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
class TestSelfCodingErrorRecovery:
|
|
"""Error recovery scenarios."""
|
|
|
|
async def test_git_operation_timeout_handling(self, self_coding_env):
|
|
"""Should handle git operation timeouts gracefully."""
|
|
# This would require mocking subprocess to timeout
|
|
# For now, verify the timeout parameter exists
|
|
env = self_coding_env
|
|
git = env["git"]
|
|
|
|
# The _run_git method has timeout parameter
|
|
# If a git operation times out, it raises GitOperationError
|
|
assert hasattr(git, '_run_git')
|
|
|
|
async def test_journal_handles_concurrent_writes(self, self_coding_env):
|
|
"""Journal should handle multiple rapid writes."""
|
|
env = self_coding_env
|
|
journal = env["journal"]
|
|
|
|
# Log multiple attempts rapidly
|
|
ids = []
|
|
for i in range(10):
|
|
attempt_id = await journal.log_attempt(ModificationAttempt(
|
|
task_description=f"Concurrent task {i}",
|
|
outcome=Outcome.SUCCESS,
|
|
))
|
|
ids.append(attempt_id)
|
|
|
|
# All should be unique and retrievable
|
|
assert len(set(ids)) == 10
|
|
|
|
for attempt_id in ids:
|
|
retrieved = await journal.get_by_id(attempt_id)
|
|
assert retrieved is not None
|
|
|
|
async def test_indexer_handles_syntax_errors(self, self_coding_env):
|
|
"""Indexer should skip files with syntax errors."""
|
|
env = self_coding_env
|
|
indexer = env["indexer"]
|
|
repo_path = env["repo_path"]
|
|
|
|
# Create file with syntax error
|
|
bad_file = repo_path / "src" / "myproject" / "bad_syntax.py"
|
|
bad_file.write_text("def broken(:")
|
|
|
|
stats = await indexer.index_all()
|
|
|
|
# Should index good files, fail on bad one
|
|
assert stats["failed"] == 1
|
|
assert stats["indexed"] >= 4 # The good files
|