forked from Rockachopa/Timmy-time-dashboard
refactor: Phase 3 — reorganize tests into module-mirroring subdirectories
Move 97 test files from flat tests/ into 13 subdirectories: tests/dashboard/ (8 files — routes, mobile, mission control) tests/swarm/ (17 files — coordinator, docker, routing, tasks) tests/timmy/ (12 files — agent, backends, CLI, tools) tests/self_coding/ (14 files — git safety, indexer, self-modify) tests/lightning/ (3 files — L402, LND, interface) tests/creative/ (8 files — assembler, director, image/music/video) tests/integrations/ (10 files — chat bridge, telegram, voice, websocket) tests/mcp/ (4 files — bootstrap, discovery, executor) tests/spark/ (3 files — engine, tools, events) tests/hands/ (3 files — registry, oracle, phase5) tests/scripture/ (1 file) tests/infrastructure/ (3 files — router cascade, API) tests/security/ (3 files — XSS, regression) Fix Path(__file__) reference in test_mobile_scenarios.py for new depth. Add __init__.py to all test subdirectories. Tests: 1503 passed, 9 failed (pre-existing), 53 errors (pre-existing) https://claude.ai/code/session_019oMFNvD8uSGSSmBMGkBfQN
This commit is contained in:
450
tests/self_coding/test_self_modify.py
Normal file
450
tests/self_coding/test_self_modify.py
Normal file
@@ -0,0 +1,450 @@
|
||||
"""Tests for the self-modification loop (self_modify/loop.py).
|
||||
|
||||
All tests are fully mocked — no Ollama, no real file I/O, no git.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from self_modify.loop import SelfModifyLoop, ModifyRequest, ModifyResult
|
||||
|
||||
|
||||
# ── Dataclass tests ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestModifyRequest:
|
||||
def test_defaults(self):
|
||||
req = ModifyRequest(instruction="Fix the bug")
|
||||
assert req.instruction == "Fix the bug"
|
||||
assert req.target_files == []
|
||||
assert req.dry_run is False
|
||||
|
||||
def test_with_target_files(self):
|
||||
req = ModifyRequest(
|
||||
instruction="Add docstring",
|
||||
target_files=["src/foo.py"],
|
||||
dry_run=True,
|
||||
)
|
||||
assert req.target_files == ["src/foo.py"]
|
||||
assert req.dry_run is True
|
||||
|
||||
|
||||
class TestModifyResult:
|
||||
def test_success_result(self):
|
||||
result = ModifyResult(
|
||||
success=True,
|
||||
files_changed=["src/foo.py"],
|
||||
test_passed=True,
|
||||
commit_sha="abc12345",
|
||||
branch_name="timmy/self-modify-123",
|
||||
llm_response="...",
|
||||
attempts=1,
|
||||
)
|
||||
assert result.success
|
||||
assert result.commit_sha == "abc12345"
|
||||
assert result.error is None
|
||||
assert result.autonomous_cycles == 0
|
||||
|
||||
def test_failure_result(self):
|
||||
result = ModifyResult(success=False, error="something broke")
|
||||
assert not result.success
|
||||
assert result.error == "something broke"
|
||||
assert result.files_changed == []
|
||||
|
||||
|
||||
# ── SelfModifyLoop unit tests ────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSelfModifyLoop:
|
||||
def test_init_defaults(self):
|
||||
loop = SelfModifyLoop()
|
||||
assert loop._max_retries == 2
|
||||
|
||||
def test_init_custom_retries(self):
|
||||
loop = SelfModifyLoop(max_retries=5)
|
||||
assert loop._max_retries == 5
|
||||
|
||||
def test_init_backend(self):
|
||||
loop = SelfModifyLoop(backend="anthropic")
|
||||
assert loop._backend == "anthropic"
|
||||
|
||||
def test_init_autonomous(self):
|
||||
loop = SelfModifyLoop(autonomous=True, max_autonomous_cycles=5)
|
||||
assert loop._autonomous is True
|
||||
assert loop._max_autonomous_cycles == 5
|
||||
|
||||
@patch("self_modify.loop.settings")
|
||||
def test_run_disabled(self, mock_settings):
|
||||
mock_settings.self_modify_enabled = False
|
||||
loop = SelfModifyLoop()
|
||||
result = loop.run(ModifyRequest(instruction="test"))
|
||||
assert not result.success
|
||||
assert "disabled" in result.error.lower()
|
||||
|
||||
@patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
|
||||
@patch("self_modify.loop.settings")
|
||||
def test_run_no_target_files(self, mock_settings):
|
||||
mock_settings.self_modify_enabled = True
|
||||
mock_settings.self_modify_max_retries = 0
|
||||
mock_settings.self_modify_allowed_dirs = "src,tests"
|
||||
mock_settings.self_modify_backend = "ollama"
|
||||
loop = SelfModifyLoop()
|
||||
loop._infer_target_files = MagicMock(return_value=[])
|
||||
result = loop.run(ModifyRequest(instruction="do something vague"))
|
||||
assert not result.success
|
||||
assert "no target files" in result.error.lower()
|
||||
|
||||
@patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
|
||||
@patch("self_modify.loop.settings")
|
||||
def test_run_success_path(self, mock_settings):
|
||||
mock_settings.self_modify_enabled = True
|
||||
mock_settings.self_modify_max_retries = 2
|
||||
mock_settings.self_modify_allowed_dirs = "src,tests"
|
||||
mock_settings.self_modify_backend = "ollama"
|
||||
|
||||
loop = SelfModifyLoop()
|
||||
loop._read_files = MagicMock(return_value={"src/foo.py": "old content"})
|
||||
loop._generate_edits = MagicMock(
|
||||
return_value=({"src/foo.py": "x = 1\n"}, "llm raw")
|
||||
)
|
||||
loop._write_files = MagicMock(return_value=["src/foo.py"])
|
||||
loop._run_tests = MagicMock(return_value=(True, "5 passed"))
|
||||
loop._git_commit = MagicMock(return_value="abc12345")
|
||||
loop._validate_paths = MagicMock()
|
||||
|
||||
result = loop.run(
|
||||
ModifyRequest(instruction="Add docstring", target_files=["src/foo.py"])
|
||||
)
|
||||
|
||||
assert result.success
|
||||
assert result.test_passed
|
||||
assert result.commit_sha == "abc12345"
|
||||
assert result.files_changed == ["src/foo.py"]
|
||||
loop._run_tests.assert_called_once()
|
||||
loop._git_commit.assert_called_once()
|
||||
|
||||
@patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
|
||||
@patch("self_modify.loop.settings")
|
||||
def test_run_test_failure_reverts(self, mock_settings):
|
||||
mock_settings.self_modify_enabled = True
|
||||
mock_settings.self_modify_max_retries = 0
|
||||
mock_settings.self_modify_allowed_dirs = "src,tests"
|
||||
mock_settings.self_modify_backend = "ollama"
|
||||
|
||||
loop = SelfModifyLoop(max_retries=0)
|
||||
loop._read_files = MagicMock(return_value={"src/foo.py": "old content"})
|
||||
loop._generate_edits = MagicMock(
|
||||
return_value=({"src/foo.py": "x = 1\n"}, "llm raw")
|
||||
)
|
||||
loop._write_files = MagicMock(return_value=["src/foo.py"])
|
||||
loop._run_tests = MagicMock(return_value=(False, "1 failed"))
|
||||
loop._revert_files = MagicMock()
|
||||
loop._validate_paths = MagicMock()
|
||||
|
||||
result = loop.run(
|
||||
ModifyRequest(instruction="Break it", target_files=["src/foo.py"])
|
||||
)
|
||||
|
||||
assert not result.success
|
||||
assert not result.test_passed
|
||||
loop._revert_files.assert_called()
|
||||
|
||||
@patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
|
||||
@patch("self_modify.loop.settings")
|
||||
def test_dry_run(self, mock_settings):
|
||||
mock_settings.self_modify_enabled = True
|
||||
mock_settings.self_modify_max_retries = 2
|
||||
mock_settings.self_modify_allowed_dirs = "src,tests"
|
||||
mock_settings.self_modify_backend = "ollama"
|
||||
|
||||
loop = SelfModifyLoop()
|
||||
loop._read_files = MagicMock(return_value={"src/foo.py": "old content"})
|
||||
loop._generate_edits = MagicMock(
|
||||
return_value=({"src/foo.py": "x = 1\n"}, "llm raw")
|
||||
)
|
||||
loop._validate_paths = MagicMock()
|
||||
|
||||
result = loop.run(
|
||||
ModifyRequest(
|
||||
instruction="Add docstring",
|
||||
target_files=["src/foo.py"],
|
||||
dry_run=True,
|
||||
)
|
||||
)
|
||||
|
||||
assert result.success
|
||||
assert result.files_changed == ["src/foo.py"]
|
||||
|
||||
|
||||
# ── Syntax validation tests ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSyntaxValidation:
|
||||
def test_valid_python_passes(self):
|
||||
loop = SelfModifyLoop()
|
||||
errors = loop._validate_syntax({"src/foo.py": "x = 1\nprint(x)\n"})
|
||||
assert errors == {}
|
||||
|
||||
def test_invalid_python_caught(self):
|
||||
loop = SelfModifyLoop()
|
||||
errors = loop._validate_syntax({"src/foo.py": "def foo(\n"})
|
||||
assert "src/foo.py" in errors
|
||||
assert "line" in errors["src/foo.py"]
|
||||
|
||||
def test_unterminated_string_caught(self):
|
||||
loop = SelfModifyLoop()
|
||||
bad_code = '"""\nTIMMY = """\nstuff\n"""\n'
|
||||
errors = loop._validate_syntax({"src/foo.py": bad_code})
|
||||
# This specific code is actually valid, but let's test truly broken code
|
||||
broken = '"""\nunclosed string\n'
|
||||
errors = loop._validate_syntax({"src/foo.py": broken})
|
||||
assert "src/foo.py" in errors
|
||||
|
||||
def test_non_python_files_skipped(self):
|
||||
loop = SelfModifyLoop()
|
||||
errors = loop._validate_syntax({"README.md": "this is not python {{{}"})
|
||||
assert errors == {}
|
||||
|
||||
@patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
|
||||
@patch("self_modify.loop.settings")
|
||||
def test_syntax_error_skips_write(self, mock_settings):
|
||||
"""When LLM produces invalid syntax, we skip writing and retry."""
|
||||
mock_settings.self_modify_enabled = True
|
||||
mock_settings.self_modify_max_retries = 1
|
||||
mock_settings.self_modify_allowed_dirs = "src,tests"
|
||||
mock_settings.self_modify_backend = "ollama"
|
||||
|
||||
loop = SelfModifyLoop(max_retries=1)
|
||||
loop._read_files = MagicMock(return_value={"src/foo.py": "x = 1\n"})
|
||||
# First call returns broken syntax, second returns valid
|
||||
loop._generate_edits = MagicMock(side_effect=[
|
||||
({"src/foo.py": "def foo(\n"}, "bad llm"),
|
||||
({"src/foo.py": "def foo():\n pass\n"}, "good llm"),
|
||||
])
|
||||
loop._write_files = MagicMock(return_value=["src/foo.py"])
|
||||
loop._run_tests = MagicMock(return_value=(True, "passed"))
|
||||
loop._git_commit = MagicMock(return_value="abc123")
|
||||
loop._validate_paths = MagicMock()
|
||||
|
||||
result = loop.run(
|
||||
ModifyRequest(instruction="Fix foo", target_files=["src/foo.py"])
|
||||
)
|
||||
|
||||
assert result.success
|
||||
# _write_files should only be called once (for the valid attempt)
|
||||
loop._write_files.assert_called_once()
|
||||
|
||||
|
||||
# ── Multi-backend tests ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestBackendResolution:
|
||||
def test_resolve_ollama(self):
|
||||
loop = SelfModifyLoop(backend="ollama")
|
||||
assert loop._resolve_backend() == "ollama"
|
||||
|
||||
def test_resolve_anthropic(self):
|
||||
loop = SelfModifyLoop(backend="anthropic")
|
||||
assert loop._resolve_backend() == "anthropic"
|
||||
|
||||
@patch.dict("os.environ", {"ANTHROPIC_API_KEY": "sk-test-123"})
|
||||
def test_resolve_auto_with_key(self):
|
||||
loop = SelfModifyLoop(backend="auto")
|
||||
assert loop._resolve_backend() == "anthropic"
|
||||
|
||||
@patch.dict("os.environ", {}, clear=True)
|
||||
def test_resolve_auto_without_key(self):
|
||||
loop = SelfModifyLoop(backend="auto")
|
||||
assert loop._resolve_backend() == "ollama"
|
||||
|
||||
|
||||
# ── Autonomous loop tests ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestAutonomousLoop:
|
||||
@patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
|
||||
@patch("self_modify.loop.settings")
|
||||
def test_autonomous_retries_after_failure(self, mock_settings):
|
||||
mock_settings.self_modify_enabled = True
|
||||
mock_settings.self_modify_max_retries = 0
|
||||
mock_settings.self_modify_allowed_dirs = "src,tests"
|
||||
mock_settings.self_modify_backend = "ollama"
|
||||
|
||||
loop = SelfModifyLoop(max_retries=0, autonomous=True, max_autonomous_cycles=2)
|
||||
loop._validate_paths = MagicMock()
|
||||
loop._read_files = MagicMock(return_value={"src/foo.py": "x = 1\n"})
|
||||
|
||||
# First run fails, autonomous cycle 1 succeeds
|
||||
call_count = [0]
|
||||
|
||||
def fake_generate(instruction, contents, prev_test_output=None, prev_syntax_errors=None):
|
||||
call_count[0] += 1
|
||||
return ({"src/foo.py": "x = 2\n"}, "llm raw")
|
||||
|
||||
loop._generate_edits = MagicMock(side_effect=fake_generate)
|
||||
loop._write_files = MagicMock(return_value=["src/foo.py"])
|
||||
loop._revert_files = MagicMock()
|
||||
|
||||
# First call fails tests, second succeeds
|
||||
test_results = [(False, "FAILED"), (True, "PASSED")]
|
||||
loop._run_tests = MagicMock(side_effect=test_results)
|
||||
loop._git_commit = MagicMock(return_value="abc123")
|
||||
loop._diagnose_failure = MagicMock(return_value="Fix: do X instead of Y")
|
||||
|
||||
result = loop.run(
|
||||
ModifyRequest(instruction="Fix foo", target_files=["src/foo.py"])
|
||||
)
|
||||
|
||||
assert result.success
|
||||
assert result.autonomous_cycles == 1
|
||||
loop._diagnose_failure.assert_called_once()
|
||||
|
||||
def test_diagnose_failure_reads_report(self, tmp_path):
|
||||
report = tmp_path / "report.md"
|
||||
report.write_text("# Report\n**Error:** SyntaxError line 5\n")
|
||||
|
||||
loop = SelfModifyLoop(backend="ollama")
|
||||
loop._call_llm = MagicMock(return_value="ROOT CAUSE: Missing closing paren")
|
||||
|
||||
diagnosis = loop._diagnose_failure(report)
|
||||
assert "Missing closing paren" in diagnosis
|
||||
loop._call_llm.assert_called_once()
|
||||
|
||||
def test_diagnose_failure_handles_missing_report(self, tmp_path):
|
||||
loop = SelfModifyLoop(backend="ollama")
|
||||
result = loop._diagnose_failure(tmp_path / "nonexistent.md")
|
||||
assert result is None
|
||||
|
||||
|
||||
# ── Path validation tests ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestPathValidation:
|
||||
def test_rejects_path_outside_repo(self):
|
||||
loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
|
||||
with pytest.raises(ValueError, match="escapes repository"):
|
||||
loop._validate_paths(["../../etc/passwd"])
|
||||
|
||||
def test_rejects_path_outside_allowed_dirs(self):
|
||||
loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
|
||||
with pytest.raises(ValueError, match="not in allowed directories"):
|
||||
loop._validate_paths(["docs/secret.py"])
|
||||
|
||||
def test_accepts_src_path(self):
|
||||
loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
|
||||
loop._validate_paths(["src/some_module.py"])
|
||||
|
||||
def test_accepts_tests_path(self):
|
||||
loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
|
||||
loop._validate_paths(["tests/test_something.py"])
|
||||
|
||||
|
||||
# ── File inference tests ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestFileInference:
|
||||
def test_infer_explicit_py_path(self):
|
||||
loop = SelfModifyLoop()
|
||||
files = loop._infer_target_files("fix bug in src/dashboard/app.py")
|
||||
assert "src/dashboard/app.py" in files
|
||||
|
||||
def test_infer_from_keyword_config(self):
|
||||
loop = SelfModifyLoop()
|
||||
files = loop._infer_target_files("update the config to add a new setting")
|
||||
assert "src/config.py" in files
|
||||
|
||||
def test_infer_from_keyword_agent(self):
|
||||
loop = SelfModifyLoop()
|
||||
files = loop._infer_target_files("modify the agent prompt")
|
||||
assert "src/timmy/agent.py" in files
|
||||
|
||||
def test_infer_returns_empty_for_vague(self):
|
||||
loop = SelfModifyLoop()
|
||||
files = loop._infer_target_files("do something cool")
|
||||
assert files == []
|
||||
|
||||
|
||||
# ── NLU intent tests ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestCodeIntent:
|
||||
def test_detects_modify_code(self):
|
||||
from voice.nlu import detect_intent
|
||||
|
||||
intent = detect_intent("modify the code in config.py")
|
||||
assert intent.name == "code"
|
||||
|
||||
def test_detects_self_modify(self):
|
||||
from voice.nlu import detect_intent
|
||||
|
||||
intent = detect_intent("self-modify to add a new endpoint")
|
||||
assert intent.name == "code"
|
||||
|
||||
def test_detects_edit_source(self):
|
||||
from voice.nlu import detect_intent
|
||||
|
||||
intent = detect_intent("edit the source to fix the bug")
|
||||
assert intent.name == "code"
|
||||
|
||||
def test_detects_update_your_code(self):
|
||||
from voice.nlu import detect_intent
|
||||
|
||||
intent = detect_intent("update your code to handle errors")
|
||||
assert intent.name == "code"
|
||||
|
||||
def test_detects_fix_function(self):
|
||||
from voice.nlu import detect_intent
|
||||
|
||||
intent = detect_intent("fix the function that calculates totals")
|
||||
assert intent.name == "code"
|
||||
|
||||
def test_does_not_match_general_chat(self):
|
||||
from voice.nlu import detect_intent
|
||||
|
||||
intent = detect_intent("tell me about the weather today")
|
||||
assert intent.name == "chat"
|
||||
|
||||
def test_extracts_target_file_entity(self):
|
||||
from voice.nlu import detect_intent
|
||||
|
||||
intent = detect_intent("modify file src/config.py to add debug flag")
|
||||
assert intent.entities.get("target_file") == "src/config.py"
|
||||
|
||||
|
||||
# ── Route tests ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSelfModifyRoutes:
|
||||
def test_status_endpoint(self, client):
|
||||
resp = client.get("/self-modify/status")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert "enabled" in data
|
||||
assert data["enabled"] is False # Default
|
||||
|
||||
def test_run_when_disabled(self, client):
|
||||
resp = client.post("/self-modify/run", data={"instruction": "test"})
|
||||
assert resp.status_code == 403
|
||||
|
||||
|
||||
# ── DirectToolExecutor integration ────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestDirectToolExecutor:
|
||||
def test_code_task_falls_back_when_disabled(self):
|
||||
from swarm.tool_executor import DirectToolExecutor
|
||||
|
||||
executor = DirectToolExecutor("forge", "forge-test-001")
|
||||
result = executor.execute_with_tools("modify the code to fix bug")
|
||||
# Should fall back to simulated since self_modify_enabled=False
|
||||
assert isinstance(result, dict)
|
||||
assert "result" in result or "success" in result
|
||||
|
||||
def test_non_code_task_delegates_to_parent(self):
|
||||
from swarm.tool_executor import DirectToolExecutor
|
||||
|
||||
executor = DirectToolExecutor("echo", "echo-test-001")
|
||||
result = executor.execute_with_tools("search for information")
|
||||
assert isinstance(result, dict)
|
||||
Reference in New Issue
Block a user