This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/tests/self_coding/test_self_edit_tool.py

399 lines
13 KiB
Python
Raw Normal View History

feat: Self-Edit MCP Tool (Phase 2.1) Implements the Self-Edit MCP Tool that orchestrates the self-coding foundation: ## Core Features 1. **SelfEditTool** (src/tools/self_edit.py) - Complete self-modification orchestrator - Pre-flight safety checks (clean repo, on main branch) - Context gathering (codebase indexer + modification journal) - Feature branch creation (timmy/self-edit/{timestamp}) - LLM-based edit planning with fallback - Safety constraint validation - Aider integration (preferred) with fallback to direct editing - Automatic test execution via pytest - Commit on success, rollback on failure - Modification journaling with reflections 2. **Safety Constraints** - Max 3 files per commit - Max 100 lines changed - Protected files list (self-edit tool, foundation services) - Only modify files with test coverage - Max 3 retries on failure - Requires user confirmation (MCP tool registration) 3. **Execution Backends** - Aider integration: --auto-test --test-cmd pytest --yes --no-git - Direct editing fallback: LLM-based file modification with AST validation - Automatic backend selection based on availability ## Test Coverage - 19 new tests covering: - Basic functionality (initialization, preflight checks) - Edit planning (with/without LLM) - Safety validation (file limits, protected files) - Execution flow (success and failure paths) - Error handling (exceptions, LLM failures) - MCP registration ## Usage from tools.self_edit import register_self_edit_tool from mcp.registry import tool_registry # Register with MCP register_self_edit_tool(tool_registry, llm_adapter) Phase 2.2 will add Dashboard API endpoints and UI.
2026-02-26 12:03:36 -05:00
"""Tests for Self-Edit MCP Tool.
Tests the complete self-edit workflow with mocked dependencies.
"""
from __future__ import annotations
import tempfile
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from creative.tools.self_edit import (
feat: Self-Edit MCP Tool (Phase 2.1) Implements the Self-Edit MCP Tool that orchestrates the self-coding foundation: ## Core Features 1. **SelfEditTool** (src/tools/self_edit.py) - Complete self-modification orchestrator - Pre-flight safety checks (clean repo, on main branch) - Context gathering (codebase indexer + modification journal) - Feature branch creation (timmy/self-edit/{timestamp}) - LLM-based edit planning with fallback - Safety constraint validation - Aider integration (preferred) with fallback to direct editing - Automatic test execution via pytest - Commit on success, rollback on failure - Modification journaling with reflections 2. **Safety Constraints** - Max 3 files per commit - Max 100 lines changed - Protected files list (self-edit tool, foundation services) - Only modify files with test coverage - Max 3 retries on failure - Requires user confirmation (MCP tool registration) 3. **Execution Backends** - Aider integration: --auto-test --test-cmd pytest --yes --no-git - Direct editing fallback: LLM-based file modification with AST validation - Automatic backend selection based on availability ## Test Coverage - 19 new tests covering: - Basic functionality (initialization, preflight checks) - Edit planning (with/without LLM) - Safety validation (file limits, protected files) - Execution flow (success and failure paths) - Error handling (exceptions, LLM failures) - MCP registration ## Usage from tools.self_edit import register_self_edit_tool from mcp.registry import tool_registry # Register with MCP register_self_edit_tool(tool_registry, llm_adapter) Phase 2.2 will add Dashboard API endpoints and UI.
2026-02-26 12:03:36 -05:00
MAX_FILES_PER_COMMIT,
MAX_RETRIES,
PROTECTED_FILES,
EditPlan,
SelfEditResult,
SelfEditTool,
register_self_edit_tool,
self_edit_tool,
)
@pytest.fixture
def temp_repo():
"""Create a temporary git repository."""
with tempfile.TemporaryDirectory() as tmpdir:
repo_path = Path(tmpdir)
# Initialize git
import subprocess
subprocess.run(["git", "init"], cwd=repo_path, check=True, capture_output=True)
subprocess.run(
["git", "config", "user.email", "test@test.com"],
cwd=repo_path, check=True, capture_output=True,
)
subprocess.run(
["git", "config", "user.name", "Test"],
cwd=repo_path, check=True, capture_output=True,
)
# Create src structure
src_path = repo_path / "src" / "myproject"
src_path.mkdir(parents=True)
(src_path / "__init__.py").write_text("")
(src_path / "app.py").write_text('''
"""Main application."""
def hello():
return "Hello"
''')
# Create tests
tests_path = repo_path / "tests"
tests_path.mkdir()
(tests_path / "test_app.py").write_text('''
"""Tests for app."""
from myproject.app import hello
def test_hello():
assert hello() == "Hello"
''')
# Initial commit
subprocess.run(["git", "add", "."], cwd=repo_path, check=True, capture_output=True)
subprocess.run(
["git", "commit", "-m", "Initial"],
cwd=repo_path, check=True, capture_output=True,
)
subprocess.run(
["git", "branch", "-M", "main"],
cwd=repo_path, check=True, capture_output=True,
)
yield repo_path
@pytest.fixture(autouse=True)
def mock_settings():
"""Mock settings to enable self-modification."""
with patch('creative.tools.self_edit.settings') as mock_settings:
feat: Self-Edit MCP Tool (Phase 2.1) Implements the Self-Edit MCP Tool that orchestrates the self-coding foundation: ## Core Features 1. **SelfEditTool** (src/tools/self_edit.py) - Complete self-modification orchestrator - Pre-flight safety checks (clean repo, on main branch) - Context gathering (codebase indexer + modification journal) - Feature branch creation (timmy/self-edit/{timestamp}) - LLM-based edit planning with fallback - Safety constraint validation - Aider integration (preferred) with fallback to direct editing - Automatic test execution via pytest - Commit on success, rollback on failure - Modification journaling with reflections 2. **Safety Constraints** - Max 3 files per commit - Max 100 lines changed - Protected files list (self-edit tool, foundation services) - Only modify files with test coverage - Max 3 retries on failure - Requires user confirmation (MCP tool registration) 3. **Execution Backends** - Aider integration: --auto-test --test-cmd pytest --yes --no-git - Direct editing fallback: LLM-based file modification with AST validation - Automatic backend selection based on availability ## Test Coverage - 19 new tests covering: - Basic functionality (initialization, preflight checks) - Edit planning (with/without LLM) - Safety validation (file limits, protected files) - Execution flow (success and failure paths) - Error handling (exceptions, LLM failures) - MCP registration ## Usage from tools.self_edit import register_self_edit_tool from mcp.registry import tool_registry # Register with MCP register_self_edit_tool(tool_registry, llm_adapter) Phase 2.2 will add Dashboard API endpoints and UI.
2026-02-26 12:03:36 -05:00
mock_settings.self_modify_enabled = True
yield mock_settings
@pytest.fixture
def mock_llm():
"""Create mock LLM adapter."""
mock = AsyncMock()
mock.chat.return_value = MagicMock(
content="""APPROACH: Add error handling
FILES_TO_MODIFY: src/myproject/app.py
FILES_TO_CREATE:
TESTS_TO_ADD: tests/test_app.py
EXPLANATION: Wrap function in try/except"""
)
return mock
@pytest.mark.asyncio
class TestSelfEditToolBasics:
"""Basic functionality tests."""
async def test_initialization(self, temp_repo):
"""Should initialize with services."""
tool = SelfEditTool(repo_path=temp_repo)
assert tool.repo_path == temp_repo
assert tool.git is not None
assert tool.indexer is not None
assert tool.journal is not None
assert tool.reflection is not None
async def test_preflight_checks_clean_repo(self, temp_repo):
"""Should pass preflight on clean repo."""
tool = SelfEditTool(repo_path=temp_repo)
assert await tool._preflight_checks() is True
async def test_preflight_checks_dirty_repo(self, temp_repo):
"""Should fail preflight on dirty repo."""
tool = SelfEditTool(repo_path=temp_repo)
# Make uncommitted change
(temp_repo / "dirty.txt").write_text("dirty")
assert await tool._preflight_checks() is False
async def test_preflight_checks_wrong_branch(self, temp_repo):
"""Should fail preflight when not on main."""
tool = SelfEditTool(repo_path=temp_repo)
# Create and checkout feature branch
import subprocess
subprocess.run(
["git", "checkout", "-b", "feature"],
cwd=temp_repo, check=True, capture_output=True,
)
assert await tool._preflight_checks() is False
@pytest.mark.asyncio
class TestSelfEditToolPlanning:
"""Edit planning tests."""
async def test_plan_edit_with_llm(self, temp_repo, mock_llm):
"""Should generate plan using LLM."""
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
await tool._ensure_indexed()
plan = await tool._plan_edit(
task_description="Add error handling",
relevant_files=["src/myproject/app.py"],
similar_attempts=[],
)
assert isinstance(plan, EditPlan)
assert plan.approach == "Add error handling"
assert "src/myproject/app.py" in plan.files_to_modify
async def test_plan_edit_without_llm(self, temp_repo):
"""Should generate fallback plan without LLM."""
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=None)
await tool._ensure_indexed()
plan = await tool._plan_edit(
task_description="Add feature",
relevant_files=["src/myproject/app.py"],
similar_attempts=[],
)
assert isinstance(plan, EditPlan)
assert len(plan.files_to_modify) > 0
async def test_plan_respects_max_files(self, temp_repo, mock_llm):
"""Plan should respect MAX_FILES_PER_COMMIT."""
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
await tool._ensure_indexed()
# Mock LLM to return many files
mock_llm.chat.return_value = MagicMock(
content="FILES_TO_MODIFY: " + ",".join([f"file{i}.py" for i in range(10)])
)
plan = await tool._plan_edit(
task_description="Test",
relevant_files=[f"file{i}.py" for i in range(10)],
similar_attempts=[],
)
assert len(plan.files_to_modify) <= MAX_FILES_PER_COMMIT
@pytest.mark.asyncio
class TestSelfEditToolValidation:
"""Safety constraint validation tests."""
async def test_validate_plan_too_many_files(self, temp_repo):
"""Should reject plan with too many files."""
tool = SelfEditTool(repo_path=temp_repo)
plan = EditPlan(
approach="Test",
files_to_modify=[f"file{i}.py" for i in range(MAX_FILES_PER_COMMIT + 1)],
files_to_create=[],
tests_to_add=[],
explanation="Test",
)
assert tool._validate_plan(plan) is False
async def test_validate_plan_protected_file(self, temp_repo):
"""Should reject plan modifying protected files."""
tool = SelfEditTool(repo_path=temp_repo)
plan = EditPlan(
approach="Test",
files_to_modify=["src/tools/self_edit.py"],
files_to_create=[],
tests_to_add=[],
explanation="Test",
)
assert tool._validate_plan(plan) is False
async def test_validate_plan_valid(self, temp_repo):
"""Should accept valid plan."""
tool = SelfEditTool(repo_path=temp_repo)
plan = EditPlan(
approach="Test",
files_to_modify=["src/myproject/app.py"],
files_to_create=[],
tests_to_add=[],
explanation="Test",
)
assert tool._validate_plan(plan) is True
@pytest.mark.asyncio
class TestSelfEditToolExecution:
"""Edit execution tests."""
async def test_strip_code_fences(self, temp_repo):
"""Should strip markdown code fences."""
tool = SelfEditTool(repo_path=temp_repo)
content = "```python\ndef test(): pass\n```"
result = tool._strip_code_fences(content)
assert "```" not in result
assert "def test(): pass" in result
async def test_parse_list(self, temp_repo):
"""Should parse comma-separated lists."""
tool = SelfEditTool(repo_path=temp_repo)
assert tool._parse_list("a, b, c") == ["a", "b", "c"]
assert tool._parse_list("none") == []
assert tool._parse_list("") == []
assert tool._parse_list("N/A") == []
@pytest.mark.asyncio
class TestSelfEditToolIntegration:
"""Integration tests with mocked dependencies."""
async def test_successful_edit_flow(self, temp_repo, mock_llm):
"""Test complete successful edit flow."""
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
# Mock Aider to succeed
with patch.object(tool, '_aider_available', return_value=False):
with patch.object(tool, '_execute_direct_edit') as mock_exec:
mock_exec.return_value = {
"success": True,
"test_output": "1 passed",
}
result = await tool.execute("Add error handling")
assert result.success is True
assert result.attempt_id is not None
async def test_failed_edit_with_rollback(self, temp_repo, mock_llm):
"""Test failed edit with rollback."""
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
# Mock execution to always fail
with patch.object(tool, '_execute_edit') as mock_exec:
mock_exec.return_value = {
"success": False,
"error": "Tests failed",
"test_output": "1 failed",
}
result = await tool.execute("Add broken feature")
assert result.success is False
assert result.attempt_id is not None
assert "failed" in result.message.lower() or "retry" in result.message.lower()
async def test_preflight_failure(self, temp_repo):
"""Should fail early if preflight checks fail."""
tool = SelfEditTool(repo_path=temp_repo)
# Make repo dirty
(temp_repo / "dirty.txt").write_text("dirty")
result = await tool.execute("Some task")
assert result.success is False
assert "pre-flight" in result.message.lower()
@pytest.mark.asyncio
class TestSelfEditMCPRegistration:
"""MCP tool registration tests."""
async def test_register_self_edit_tool(self):
"""Should register with MCP registry."""
mock_registry = MagicMock()
mock_llm = AsyncMock()
register_self_edit_tool(mock_registry, mock_llm)
mock_registry.register.assert_called_once()
call_args = mock_registry.register.call_args
assert call_args.kwargs["name"] == "self_edit"
assert call_args.kwargs["requires_confirmation"] is True
assert "self_coding" in call_args.kwargs["category"]
@pytest.mark.asyncio
class TestSelfEditGlobalTool:
"""Global tool instance tests."""
async def test_self_edit_tool_singleton(self, temp_repo):
"""Should use singleton pattern."""
from creative.tools import self_edit as self_edit_module
feat: Self-Edit MCP Tool (Phase 2.1) Implements the Self-Edit MCP Tool that orchestrates the self-coding foundation: ## Core Features 1. **SelfEditTool** (src/tools/self_edit.py) - Complete self-modification orchestrator - Pre-flight safety checks (clean repo, on main branch) - Context gathering (codebase indexer + modification journal) - Feature branch creation (timmy/self-edit/{timestamp}) - LLM-based edit planning with fallback - Safety constraint validation - Aider integration (preferred) with fallback to direct editing - Automatic test execution via pytest - Commit on success, rollback on failure - Modification journaling with reflections 2. **Safety Constraints** - Max 3 files per commit - Max 100 lines changed - Protected files list (self-edit tool, foundation services) - Only modify files with test coverage - Max 3 retries on failure - Requires user confirmation (MCP tool registration) 3. **Execution Backends** - Aider integration: --auto-test --test-cmd pytest --yes --no-git - Direct editing fallback: LLM-based file modification with AST validation - Automatic backend selection based on availability ## Test Coverage - 19 new tests covering: - Basic functionality (initialization, preflight checks) - Edit planning (with/without LLM) - Safety validation (file limits, protected files) - Execution flow (success and failure paths) - Error handling (exceptions, LLM failures) - MCP registration ## Usage from tools.self_edit import register_self_edit_tool from mcp.registry import tool_registry # Register with MCP register_self_edit_tool(tool_registry, llm_adapter) Phase 2.2 will add Dashboard API endpoints and UI.
2026-02-26 12:03:36 -05:00
# Reset singleton
self_edit_module._self_edit_tool = None
# First call should initialize
with patch.object(SelfEditTool, '__init__', return_value=None) as mock_init:
mock_init.return_value = None
with patch.object(SelfEditTool, 'execute') as mock_execute:
mock_execute.return_value = SelfEditResult(
success=True,
message="Test",
)
await self_edit_tool("Test task")
mock_init.assert_called_once()
mock_execute.assert_called_once()
@pytest.mark.asyncio
class TestSelfEditErrorHandling:
"""Error handling tests."""
async def test_exception_handling(self, temp_repo):
"""Should handle exceptions gracefully."""
tool = SelfEditTool(repo_path=temp_repo)
# Mock preflight to raise exception
with patch.object(tool, '_preflight_checks', side_effect=Exception("Unexpected")):
result = await tool.execute("Test task")
assert result.success is False
assert "exception" in result.message.lower()
async def test_llm_failure_fallback(self, temp_repo, mock_llm):
"""Should fallback when LLM fails."""
tool = SelfEditTool(repo_path=temp_repo, llm_adapter=mock_llm)
await tool._ensure_indexed()
# Mock LLM to fail
mock_llm.chat.side_effect = Exception("LLM timeout")
plan = await tool._plan_edit(
task_description="Test",
relevant_files=["src/app.py"],
similar_attempts=[],
)
# Should return fallback plan
assert isinstance(plan, EditPlan)
assert len(plan.files_to_modify) > 0