Adds SelfModifyLoop — an edit→validate→test→commit cycle that can read its own failure reports, diagnose root causes, and restart autonomously. Key capabilities: - Multi-backend LLM: Anthropic Claude API, Ollama, or auto-detect - Syntax validation via compile() before writing to disk - Autonomous self-correction loop with configurable max cycles - XML-based output format to avoid triple-quote delimiter conflicts - Branch creation skipped by default to prevent container restarts - CLI: self-modify run "instruction" --backend auto --autonomous - 939 tests passing, 30 skipped Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
451 lines
18 KiB
Python
451 lines
18 KiB
Python
"""Tests for the self-modification loop (self_modify/loop.py).
|
|
|
|
All tests are fully mocked — no Ollama, no real file I/O, no git.
|
|
"""
|
|
|
|
from unittest.mock import MagicMock, patch
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from self_modify.loop import SelfModifyLoop, ModifyRequest, ModifyResult
|
|
|
|
|
|
# ── Dataclass tests ───────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestModifyRequest:
|
|
def test_defaults(self):
|
|
req = ModifyRequest(instruction="Fix the bug")
|
|
assert req.instruction == "Fix the bug"
|
|
assert req.target_files == []
|
|
assert req.dry_run is False
|
|
|
|
def test_with_target_files(self):
|
|
req = ModifyRequest(
|
|
instruction="Add docstring",
|
|
target_files=["src/foo.py"],
|
|
dry_run=True,
|
|
)
|
|
assert req.target_files == ["src/foo.py"]
|
|
assert req.dry_run is True
|
|
|
|
|
|
class TestModifyResult:
|
|
def test_success_result(self):
|
|
result = ModifyResult(
|
|
success=True,
|
|
files_changed=["src/foo.py"],
|
|
test_passed=True,
|
|
commit_sha="abc12345",
|
|
branch_name="timmy/self-modify-123",
|
|
llm_response="...",
|
|
attempts=1,
|
|
)
|
|
assert result.success
|
|
assert result.commit_sha == "abc12345"
|
|
assert result.error is None
|
|
assert result.autonomous_cycles == 0
|
|
|
|
def test_failure_result(self):
|
|
result = ModifyResult(success=False, error="something broke")
|
|
assert not result.success
|
|
assert result.error == "something broke"
|
|
assert result.files_changed == []
|
|
|
|
|
|
# ── SelfModifyLoop unit tests ────────────────────────────────────────────────
|
|
|
|
|
|
class TestSelfModifyLoop:
|
|
def test_init_defaults(self):
|
|
loop = SelfModifyLoop()
|
|
assert loop._max_retries == 2
|
|
|
|
def test_init_custom_retries(self):
|
|
loop = SelfModifyLoop(max_retries=5)
|
|
assert loop._max_retries == 5
|
|
|
|
def test_init_backend(self):
|
|
loop = SelfModifyLoop(backend="anthropic")
|
|
assert loop._backend == "anthropic"
|
|
|
|
def test_init_autonomous(self):
|
|
loop = SelfModifyLoop(autonomous=True, max_autonomous_cycles=5)
|
|
assert loop._autonomous is True
|
|
assert loop._max_autonomous_cycles == 5
|
|
|
|
@patch("self_modify.loop.settings")
|
|
def test_run_disabled(self, mock_settings):
|
|
mock_settings.self_modify_enabled = False
|
|
loop = SelfModifyLoop()
|
|
result = loop.run(ModifyRequest(instruction="test"))
|
|
assert not result.success
|
|
assert "disabled" in result.error.lower()
|
|
|
|
@patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
|
|
@patch("self_modify.loop.settings")
|
|
def test_run_no_target_files(self, mock_settings):
|
|
mock_settings.self_modify_enabled = True
|
|
mock_settings.self_modify_max_retries = 0
|
|
mock_settings.self_modify_allowed_dirs = "src,tests"
|
|
mock_settings.self_modify_backend = "ollama"
|
|
loop = SelfModifyLoop()
|
|
loop._infer_target_files = MagicMock(return_value=[])
|
|
result = loop.run(ModifyRequest(instruction="do something vague"))
|
|
assert not result.success
|
|
assert "no target files" in result.error.lower()
|
|
|
|
@patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
|
|
@patch("self_modify.loop.settings")
|
|
def test_run_success_path(self, mock_settings):
|
|
mock_settings.self_modify_enabled = True
|
|
mock_settings.self_modify_max_retries = 2
|
|
mock_settings.self_modify_allowed_dirs = "src,tests"
|
|
mock_settings.self_modify_backend = "ollama"
|
|
|
|
loop = SelfModifyLoop()
|
|
loop._read_files = MagicMock(return_value={"src/foo.py": "old content"})
|
|
loop._generate_edits = MagicMock(
|
|
return_value=({"src/foo.py": "x = 1\n"}, "llm raw")
|
|
)
|
|
loop._write_files = MagicMock(return_value=["src/foo.py"])
|
|
loop._run_tests = MagicMock(return_value=(True, "5 passed"))
|
|
loop._git_commit = MagicMock(return_value="abc12345")
|
|
loop._validate_paths = MagicMock()
|
|
|
|
result = loop.run(
|
|
ModifyRequest(instruction="Add docstring", target_files=["src/foo.py"])
|
|
)
|
|
|
|
assert result.success
|
|
assert result.test_passed
|
|
assert result.commit_sha == "abc12345"
|
|
assert result.files_changed == ["src/foo.py"]
|
|
loop._run_tests.assert_called_once()
|
|
loop._git_commit.assert_called_once()
|
|
|
|
@patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
|
|
@patch("self_modify.loop.settings")
|
|
def test_run_test_failure_reverts(self, mock_settings):
|
|
mock_settings.self_modify_enabled = True
|
|
mock_settings.self_modify_max_retries = 0
|
|
mock_settings.self_modify_allowed_dirs = "src,tests"
|
|
mock_settings.self_modify_backend = "ollama"
|
|
|
|
loop = SelfModifyLoop(max_retries=0)
|
|
loop._read_files = MagicMock(return_value={"src/foo.py": "old content"})
|
|
loop._generate_edits = MagicMock(
|
|
return_value=({"src/foo.py": "x = 1\n"}, "llm raw")
|
|
)
|
|
loop._write_files = MagicMock(return_value=["src/foo.py"])
|
|
loop._run_tests = MagicMock(return_value=(False, "1 failed"))
|
|
loop._revert_files = MagicMock()
|
|
loop._validate_paths = MagicMock()
|
|
|
|
result = loop.run(
|
|
ModifyRequest(instruction="Break it", target_files=["src/foo.py"])
|
|
)
|
|
|
|
assert not result.success
|
|
assert not result.test_passed
|
|
loop._revert_files.assert_called()
|
|
|
|
@patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
|
|
@patch("self_modify.loop.settings")
|
|
def test_dry_run(self, mock_settings):
|
|
mock_settings.self_modify_enabled = True
|
|
mock_settings.self_modify_max_retries = 2
|
|
mock_settings.self_modify_allowed_dirs = "src,tests"
|
|
mock_settings.self_modify_backend = "ollama"
|
|
|
|
loop = SelfModifyLoop()
|
|
loop._read_files = MagicMock(return_value={"src/foo.py": "old content"})
|
|
loop._generate_edits = MagicMock(
|
|
return_value=({"src/foo.py": "x = 1\n"}, "llm raw")
|
|
)
|
|
loop._validate_paths = MagicMock()
|
|
|
|
result = loop.run(
|
|
ModifyRequest(
|
|
instruction="Add docstring",
|
|
target_files=["src/foo.py"],
|
|
dry_run=True,
|
|
)
|
|
)
|
|
|
|
assert result.success
|
|
assert result.files_changed == ["src/foo.py"]
|
|
|
|
|
|
# ── Syntax validation tests ─────────────────────────────────────────────────
|
|
|
|
|
|
class TestSyntaxValidation:
|
|
def test_valid_python_passes(self):
|
|
loop = SelfModifyLoop()
|
|
errors = loop._validate_syntax({"src/foo.py": "x = 1\nprint(x)\n"})
|
|
assert errors == {}
|
|
|
|
def test_invalid_python_caught(self):
|
|
loop = SelfModifyLoop()
|
|
errors = loop._validate_syntax({"src/foo.py": "def foo(\n"})
|
|
assert "src/foo.py" in errors
|
|
assert "line" in errors["src/foo.py"]
|
|
|
|
def test_unterminated_string_caught(self):
|
|
loop = SelfModifyLoop()
|
|
bad_code = '"""\nTIMMY = """\nstuff\n"""\n'
|
|
errors = loop._validate_syntax({"src/foo.py": bad_code})
|
|
# This specific code is actually valid, but let's test truly broken code
|
|
broken = '"""\nunclosed string\n'
|
|
errors = loop._validate_syntax({"src/foo.py": broken})
|
|
assert "src/foo.py" in errors
|
|
|
|
def test_non_python_files_skipped(self):
|
|
loop = SelfModifyLoop()
|
|
errors = loop._validate_syntax({"README.md": "this is not python {{{}"})
|
|
assert errors == {}
|
|
|
|
@patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
|
|
@patch("self_modify.loop.settings")
|
|
def test_syntax_error_skips_write(self, mock_settings):
|
|
"""When LLM produces invalid syntax, we skip writing and retry."""
|
|
mock_settings.self_modify_enabled = True
|
|
mock_settings.self_modify_max_retries = 1
|
|
mock_settings.self_modify_allowed_dirs = "src,tests"
|
|
mock_settings.self_modify_backend = "ollama"
|
|
|
|
loop = SelfModifyLoop(max_retries=1)
|
|
loop._read_files = MagicMock(return_value={"src/foo.py": "x = 1\n"})
|
|
# First call returns broken syntax, second returns valid
|
|
loop._generate_edits = MagicMock(side_effect=[
|
|
({"src/foo.py": "def foo(\n"}, "bad llm"),
|
|
({"src/foo.py": "def foo():\n pass\n"}, "good llm"),
|
|
])
|
|
loop._write_files = MagicMock(return_value=["src/foo.py"])
|
|
loop._run_tests = MagicMock(return_value=(True, "passed"))
|
|
loop._git_commit = MagicMock(return_value="abc123")
|
|
loop._validate_paths = MagicMock()
|
|
|
|
result = loop.run(
|
|
ModifyRequest(instruction="Fix foo", target_files=["src/foo.py"])
|
|
)
|
|
|
|
assert result.success
|
|
# _write_files should only be called once (for the valid attempt)
|
|
loop._write_files.assert_called_once()
|
|
|
|
|
|
# ── Multi-backend tests ──────────────────────────────────────────────────────
|
|
|
|
|
|
class TestBackendResolution:
|
|
def test_resolve_ollama(self):
|
|
loop = SelfModifyLoop(backend="ollama")
|
|
assert loop._resolve_backend() == "ollama"
|
|
|
|
def test_resolve_anthropic(self):
|
|
loop = SelfModifyLoop(backend="anthropic")
|
|
assert loop._resolve_backend() == "anthropic"
|
|
|
|
@patch.dict("os.environ", {"ANTHROPIC_API_KEY": "sk-test-123"})
|
|
def test_resolve_auto_with_key(self):
|
|
loop = SelfModifyLoop(backend="auto")
|
|
assert loop._resolve_backend() == "anthropic"
|
|
|
|
@patch.dict("os.environ", {}, clear=True)
|
|
def test_resolve_auto_without_key(self):
|
|
loop = SelfModifyLoop(backend="auto")
|
|
assert loop._resolve_backend() == "ollama"
|
|
|
|
|
|
# ── Autonomous loop tests ────────────────────────────────────────────────────
|
|
|
|
|
|
class TestAutonomousLoop:
|
|
@patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
|
|
@patch("self_modify.loop.settings")
|
|
def test_autonomous_retries_after_failure(self, mock_settings):
|
|
mock_settings.self_modify_enabled = True
|
|
mock_settings.self_modify_max_retries = 0
|
|
mock_settings.self_modify_allowed_dirs = "src,tests"
|
|
mock_settings.self_modify_backend = "ollama"
|
|
|
|
loop = SelfModifyLoop(max_retries=0, autonomous=True, max_autonomous_cycles=2)
|
|
loop._validate_paths = MagicMock()
|
|
loop._read_files = MagicMock(return_value={"src/foo.py": "x = 1\n"})
|
|
|
|
# First run fails, autonomous cycle 1 succeeds
|
|
call_count = [0]
|
|
|
|
def fake_generate(instruction, contents, prev_test_output=None, prev_syntax_errors=None):
|
|
call_count[0] += 1
|
|
return ({"src/foo.py": "x = 2\n"}, "llm raw")
|
|
|
|
loop._generate_edits = MagicMock(side_effect=fake_generate)
|
|
loop._write_files = MagicMock(return_value=["src/foo.py"])
|
|
loop._revert_files = MagicMock()
|
|
|
|
# First call fails tests, second succeeds
|
|
test_results = [(False, "FAILED"), (True, "PASSED")]
|
|
loop._run_tests = MagicMock(side_effect=test_results)
|
|
loop._git_commit = MagicMock(return_value="abc123")
|
|
loop._diagnose_failure = MagicMock(return_value="Fix: do X instead of Y")
|
|
|
|
result = loop.run(
|
|
ModifyRequest(instruction="Fix foo", target_files=["src/foo.py"])
|
|
)
|
|
|
|
assert result.success
|
|
assert result.autonomous_cycles == 1
|
|
loop._diagnose_failure.assert_called_once()
|
|
|
|
def test_diagnose_failure_reads_report(self, tmp_path):
|
|
report = tmp_path / "report.md"
|
|
report.write_text("# Report\n**Error:** SyntaxError line 5\n")
|
|
|
|
loop = SelfModifyLoop(backend="ollama")
|
|
loop._call_llm = MagicMock(return_value="ROOT CAUSE: Missing closing paren")
|
|
|
|
diagnosis = loop._diagnose_failure(report)
|
|
assert "Missing closing paren" in diagnosis
|
|
loop._call_llm.assert_called_once()
|
|
|
|
def test_diagnose_failure_handles_missing_report(self, tmp_path):
|
|
loop = SelfModifyLoop(backend="ollama")
|
|
result = loop._diagnose_failure(tmp_path / "nonexistent.md")
|
|
assert result is None
|
|
|
|
|
|
# ── Path validation tests ─────────────────────────────────────────────────────
|
|
|
|
|
|
class TestPathValidation:
|
|
def test_rejects_path_outside_repo(self):
|
|
loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
|
|
with pytest.raises(ValueError, match="escapes repository"):
|
|
loop._validate_paths(["../../etc/passwd"])
|
|
|
|
def test_rejects_path_outside_allowed_dirs(self):
|
|
loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
|
|
with pytest.raises(ValueError, match="not in allowed directories"):
|
|
loop._validate_paths(["docs/secret.py"])
|
|
|
|
def test_accepts_src_path(self):
|
|
loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
|
|
loop._validate_paths(["src/some_module.py"])
|
|
|
|
def test_accepts_tests_path(self):
|
|
loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
|
|
loop._validate_paths(["tests/test_something.py"])
|
|
|
|
|
|
# ── File inference tests ──────────────────────────────────────────────────────
|
|
|
|
|
|
class TestFileInference:
|
|
def test_infer_explicit_py_path(self):
|
|
loop = SelfModifyLoop()
|
|
files = loop._infer_target_files("fix bug in src/dashboard/app.py")
|
|
assert "src/dashboard/app.py" in files
|
|
|
|
def test_infer_from_keyword_config(self):
|
|
loop = SelfModifyLoop()
|
|
files = loop._infer_target_files("update the config to add a new setting")
|
|
assert "src/config.py" in files
|
|
|
|
def test_infer_from_keyword_agent(self):
|
|
loop = SelfModifyLoop()
|
|
files = loop._infer_target_files("modify the agent prompt")
|
|
assert "src/timmy/agent.py" in files
|
|
|
|
def test_infer_returns_empty_for_vague(self):
|
|
loop = SelfModifyLoop()
|
|
files = loop._infer_target_files("do something cool")
|
|
assert files == []
|
|
|
|
|
|
# ── NLU intent tests ──────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestCodeIntent:
|
|
def test_detects_modify_code(self):
|
|
from voice.nlu import detect_intent
|
|
|
|
intent = detect_intent("modify the code in config.py")
|
|
assert intent.name == "code"
|
|
|
|
def test_detects_self_modify(self):
|
|
from voice.nlu import detect_intent
|
|
|
|
intent = detect_intent("self-modify to add a new endpoint")
|
|
assert intent.name == "code"
|
|
|
|
def test_detects_edit_source(self):
|
|
from voice.nlu import detect_intent
|
|
|
|
intent = detect_intent("edit the source to fix the bug")
|
|
assert intent.name == "code"
|
|
|
|
def test_detects_update_your_code(self):
|
|
from voice.nlu import detect_intent
|
|
|
|
intent = detect_intent("update your code to handle errors")
|
|
assert intent.name == "code"
|
|
|
|
def test_detects_fix_function(self):
|
|
from voice.nlu import detect_intent
|
|
|
|
intent = detect_intent("fix the function that calculates totals")
|
|
assert intent.name == "code"
|
|
|
|
def test_does_not_match_general_chat(self):
|
|
from voice.nlu import detect_intent
|
|
|
|
intent = detect_intent("tell me about the weather today")
|
|
assert intent.name == "chat"
|
|
|
|
def test_extracts_target_file_entity(self):
|
|
from voice.nlu import detect_intent
|
|
|
|
intent = detect_intent("modify file src/config.py to add debug flag")
|
|
assert intent.entities.get("target_file") == "src/config.py"
|
|
|
|
|
|
# ── Route tests ───────────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestSelfModifyRoutes:
|
|
def test_status_endpoint(self, client):
|
|
resp = client.get("/self-modify/status")
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert "enabled" in data
|
|
assert data["enabled"] is False # Default
|
|
|
|
def test_run_when_disabled(self, client):
|
|
resp = client.post("/self-modify/run", data={"instruction": "test"})
|
|
assert resp.status_code == 403
|
|
|
|
|
|
# ── DirectToolExecutor integration ────────────────────────────────────────────
|
|
|
|
|
|
class TestDirectToolExecutor:
|
|
def test_code_task_falls_back_when_disabled(self):
|
|
from swarm.tool_executor import DirectToolExecutor
|
|
|
|
executor = DirectToolExecutor("forge", "forge-test-001")
|
|
result = executor.execute_with_tools("modify the code to fix bug")
|
|
# Should fall back to simulated since self_modify_enabled=False
|
|
assert isinstance(result, dict)
|
|
assert "result" in result or "success" in result
|
|
|
|
def test_non_code_task_delegates_to_parent(self):
|
|
from swarm.tool_executor import DirectToolExecutor
|
|
|
|
executor = DirectToolExecutor("echo", "echo-test-001")
|
|
result = executor.execute_with_tools("search for information")
|
|
assert isinstance(result, dict)
|