refactor: Phase 3 — reorganize tests into module-mirroring subdirectories

Move 97 test files from flat tests/ into 13 subdirectories: tests/dashboard/ (8 files — routes, mobile, mission control) tests/swarm/ (17 files — coordinator, docker, routing, tasks) tests/timmy/ (12 files — agent, backends, CLI, tools) tests/self_coding/ (14 files — git safety, indexer, self-modify) tests/lightning/ (3 files — L402, LND, interface) tests/creative/ (8 files — assembler, director, image/music/video) tests/integrations/ (10 files — chat bridge, telegram, voice, websocket) tests/mcp/ (4 files — bootstrap, discovery, executor) tests/spark/ (3 files — engine, tools, events) tests/hands/ (3 files — registry, oracle, phase5) tests/scripture/ (1 file) tests/infrastructure/ (3 files — router cascade, API) tests/security/ (3 files — XSS, regression) Fix Path(__file__) reference in test_mobile_scenarios.py for new depth. Add __init__.py to all test subdirectories. Tests: 1503 passed, 9 failed (pre-existing), 53 errors (pre-existing) https://claude.ai/code/session_019oMFNvD8uSGSSmBMGkBfQN
2026-02-26 21:21:28 +00:00
parent 6045077144
commit 4e11dd2490
104 changed files with 57 additions and 3 deletions
--- a/tests/self_coding/test_self_modify.py
+++ b/tests/self_coding/test_self_modify.py
@@ -0,0 +1,450 @@
+"""Tests for the self-modification loop (self_modify/loop.py).
+
+All tests are fully mocked — no Ollama, no real file I/O, no git.
+"""
+
+from unittest.mock import MagicMock, patch
+from pathlib import Path
+
+import pytest
+
+from self_modify.loop import SelfModifyLoop, ModifyRequest, ModifyResult
+
+
+# ── Dataclass tests ───────────────────────────────────────────────────────────
+
+
+class TestModifyRequest:
+    def test_defaults(self):
+        req = ModifyRequest(instruction="Fix the bug")
+        assert req.instruction == "Fix the bug"
+        assert req.target_files == []
+        assert req.dry_run is False
+
+    def test_with_target_files(self):
+        req = ModifyRequest(
+            instruction="Add docstring",
+            target_files=["src/foo.py"],
+            dry_run=True,
+        )
+        assert req.target_files == ["src/foo.py"]
+        assert req.dry_run is True
+
+
+class TestModifyResult:
+    def test_success_result(self):
+        result = ModifyResult(
+            success=True,
+            files_changed=["src/foo.py"],
+            test_passed=True,
+            commit_sha="abc12345",
+            branch_name="timmy/self-modify-123",
+            llm_response="...",
+            attempts=1,
+        )
+        assert result.success
+        assert result.commit_sha == "abc12345"
+        assert result.error is None
+        assert result.autonomous_cycles == 0
+
+    def test_failure_result(self):
+        result = ModifyResult(success=False, error="something broke")
+        assert not result.success
+        assert result.error == "something broke"
+        assert result.files_changed == []
+
+
+# ── SelfModifyLoop unit tests ────────────────────────────────────────────────
+
+
+class TestSelfModifyLoop:
+    def test_init_defaults(self):
+        loop = SelfModifyLoop()
+        assert loop._max_retries == 2
+
+    def test_init_custom_retries(self):
+        loop = SelfModifyLoop(max_retries=5)
+        assert loop._max_retries == 5
+
+    def test_init_backend(self):
+        loop = SelfModifyLoop(backend="anthropic")
+        assert loop._backend == "anthropic"
+
+    def test_init_autonomous(self):
+        loop = SelfModifyLoop(autonomous=True, max_autonomous_cycles=5)
+        assert loop._autonomous is True
+        assert loop._max_autonomous_cycles == 5
+
+    @patch("self_modify.loop.settings")
+    def test_run_disabled(self, mock_settings):
+        mock_settings.self_modify_enabled = False
+        loop = SelfModifyLoop()
+        result = loop.run(ModifyRequest(instruction="test"))
+        assert not result.success
+        assert "disabled" in result.error.lower()
+
+    @patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
+    @patch("self_modify.loop.settings")
+    def test_run_no_target_files(self, mock_settings):
+        mock_settings.self_modify_enabled = True
+        mock_settings.self_modify_max_retries = 0
+        mock_settings.self_modify_allowed_dirs = "src,tests"
+        mock_settings.self_modify_backend = "ollama"
+        loop = SelfModifyLoop()
+        loop._infer_target_files = MagicMock(return_value=[])
+        result = loop.run(ModifyRequest(instruction="do something vague"))
+        assert not result.success
+        assert "no target files" in result.error.lower()
+
+    @patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
+    @patch("self_modify.loop.settings")
+    def test_run_success_path(self, mock_settings):
+        mock_settings.self_modify_enabled = True
+        mock_settings.self_modify_max_retries = 2
+        mock_settings.self_modify_allowed_dirs = "src,tests"
+        mock_settings.self_modify_backend = "ollama"
+
+        loop = SelfModifyLoop()
+        loop._read_files = MagicMock(return_value={"src/foo.py": "old content"})
+        loop._generate_edits = MagicMock(
+            return_value=({"src/foo.py": "x = 1\n"}, "llm raw")
+        )
+        loop._write_files = MagicMock(return_value=["src/foo.py"])
+        loop._run_tests = MagicMock(return_value=(True, "5 passed"))
+        loop._git_commit = MagicMock(return_value="abc12345")
+        loop._validate_paths = MagicMock()
+
+        result = loop.run(
+            ModifyRequest(instruction="Add docstring", target_files=["src/foo.py"])
+        )
+
+        assert result.success
+        assert result.test_passed
+        assert result.commit_sha == "abc12345"
+        assert result.files_changed == ["src/foo.py"]
+        loop._run_tests.assert_called_once()
+        loop._git_commit.assert_called_once()
+
+    @patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
+    @patch("self_modify.loop.settings")
+    def test_run_test_failure_reverts(self, mock_settings):
+        mock_settings.self_modify_enabled = True
+        mock_settings.self_modify_max_retries = 0
+        mock_settings.self_modify_allowed_dirs = "src,tests"
+        mock_settings.self_modify_backend = "ollama"
+
+        loop = SelfModifyLoop(max_retries=0)
+        loop._read_files = MagicMock(return_value={"src/foo.py": "old content"})
+        loop._generate_edits = MagicMock(
+            return_value=({"src/foo.py": "x = 1\n"}, "llm raw")
+        )
+        loop._write_files = MagicMock(return_value=["src/foo.py"])
+        loop._run_tests = MagicMock(return_value=(False, "1 failed"))
+        loop._revert_files = MagicMock()
+        loop._validate_paths = MagicMock()
+
+        result = loop.run(
+            ModifyRequest(instruction="Break it", target_files=["src/foo.py"])
+        )
+
+        assert not result.success
+        assert not result.test_passed
+        loop._revert_files.assert_called()
+
+    @patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
+    @patch("self_modify.loop.settings")
+    def test_dry_run(self, mock_settings):
+        mock_settings.self_modify_enabled = True
+        mock_settings.self_modify_max_retries = 2
+        mock_settings.self_modify_allowed_dirs = "src,tests"
+        mock_settings.self_modify_backend = "ollama"
+
+        loop = SelfModifyLoop()
+        loop._read_files = MagicMock(return_value={"src/foo.py": "old content"})
+        loop._generate_edits = MagicMock(
+            return_value=({"src/foo.py": "x = 1\n"}, "llm raw")
+        )
+        loop._validate_paths = MagicMock()
+
+        result = loop.run(
+            ModifyRequest(
+                instruction="Add docstring",
+                target_files=["src/foo.py"],
+                dry_run=True,
+            )
+        )
+
+        assert result.success
+        assert result.files_changed == ["src/foo.py"]
+
+
+# ── Syntax validation tests ─────────────────────────────────────────────────
+
+
+class TestSyntaxValidation:
+    def test_valid_python_passes(self):
+        loop = SelfModifyLoop()
+        errors = loop._validate_syntax({"src/foo.py": "x = 1\nprint(x)\n"})
+        assert errors == {}
+
+    def test_invalid_python_caught(self):
+        loop = SelfModifyLoop()
+        errors = loop._validate_syntax({"src/foo.py": "def foo(\n"})
+        assert "src/foo.py" in errors
+        assert "line" in errors["src/foo.py"]
+
+    def test_unterminated_string_caught(self):
+        loop = SelfModifyLoop()
+        bad_code = '"""\nTIMMY = """\nstuff\n"""\n'
+        errors = loop._validate_syntax({"src/foo.py": bad_code})
+        # This specific code is actually valid, but let's test truly broken code
+        broken = '"""\nunclosed string\n'
+        errors = loop._validate_syntax({"src/foo.py": broken})
+        assert "src/foo.py" in errors
+
+    def test_non_python_files_skipped(self):
+        loop = SelfModifyLoop()
+        errors = loop._validate_syntax({"README.md": "this is not python {{{}"})
+        assert errors == {}
+
+    @patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
+    @patch("self_modify.loop.settings")
+    def test_syntax_error_skips_write(self, mock_settings):
+        """When LLM produces invalid syntax, we skip writing and retry."""
+        mock_settings.self_modify_enabled = True
+        mock_settings.self_modify_max_retries = 1
+        mock_settings.self_modify_allowed_dirs = "src,tests"
+        mock_settings.self_modify_backend = "ollama"
+
+        loop = SelfModifyLoop(max_retries=1)
+        loop._read_files = MagicMock(return_value={"src/foo.py": "x = 1\n"})
+        # First call returns broken syntax, second returns valid
+        loop._generate_edits = MagicMock(side_effect=[
+            ({"src/foo.py": "def foo(\n"}, "bad llm"),
+            ({"src/foo.py": "def foo():\n    pass\n"}, "good llm"),
+        ])
+        loop._write_files = MagicMock(return_value=["src/foo.py"])
+        loop._run_tests = MagicMock(return_value=(True, "passed"))
+        loop._git_commit = MagicMock(return_value="abc123")
+        loop._validate_paths = MagicMock()
+
+        result = loop.run(
+            ModifyRequest(instruction="Fix foo", target_files=["src/foo.py"])
+        )
+
+        assert result.success
+        # _write_files should only be called once (for the valid attempt)
+        loop._write_files.assert_called_once()
+
+
+# ── Multi-backend tests ──────────────────────────────────────────────────────
+
+
+class TestBackendResolution:
+    def test_resolve_ollama(self):
+        loop = SelfModifyLoop(backend="ollama")
+        assert loop._resolve_backend() == "ollama"
+
+    def test_resolve_anthropic(self):
+        loop = SelfModifyLoop(backend="anthropic")
+        assert loop._resolve_backend() == "anthropic"
+
+    @patch.dict("os.environ", {"ANTHROPIC_API_KEY": "sk-test-123"})
+    def test_resolve_auto_with_key(self):
+        loop = SelfModifyLoop(backend="auto")
+        assert loop._resolve_backend() == "anthropic"
+
+    @patch.dict("os.environ", {}, clear=True)
+    def test_resolve_auto_without_key(self):
+        loop = SelfModifyLoop(backend="auto")
+        assert loop._resolve_backend() == "ollama"
+
+
+# ── Autonomous loop tests ────────────────────────────────────────────────────
+
+
+class TestAutonomousLoop:
+    @patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
+    @patch("self_modify.loop.settings")
+    def test_autonomous_retries_after_failure(self, mock_settings):
+        mock_settings.self_modify_enabled = True
+        mock_settings.self_modify_max_retries = 0
+        mock_settings.self_modify_allowed_dirs = "src,tests"
+        mock_settings.self_modify_backend = "ollama"
+
+        loop = SelfModifyLoop(max_retries=0, autonomous=True, max_autonomous_cycles=2)
+        loop._validate_paths = MagicMock()
+        loop._read_files = MagicMock(return_value={"src/foo.py": "x = 1\n"})
+
+        # First run fails, autonomous cycle 1 succeeds
+        call_count = [0]
+
+        def fake_generate(instruction, contents, prev_test_output=None, prev_syntax_errors=None):
+            call_count[0] += 1
+            return ({"src/foo.py": "x = 2\n"}, "llm raw")
+
+        loop._generate_edits = MagicMock(side_effect=fake_generate)
+        loop._write_files = MagicMock(return_value=["src/foo.py"])
+        loop._revert_files = MagicMock()
+
+        # First call fails tests, second succeeds
+        test_results = [(False, "FAILED"), (True, "PASSED")]
+        loop._run_tests = MagicMock(side_effect=test_results)
+        loop._git_commit = MagicMock(return_value="abc123")
+        loop._diagnose_failure = MagicMock(return_value="Fix: do X instead of Y")
+
+        result = loop.run(
+            ModifyRequest(instruction="Fix foo", target_files=["src/foo.py"])
+        )
+
+        assert result.success
+        assert result.autonomous_cycles == 1
+        loop._diagnose_failure.assert_called_once()
+
+    def test_diagnose_failure_reads_report(self, tmp_path):
+        report = tmp_path / "report.md"
+        report.write_text("# Report\n**Error:** SyntaxError line 5\n")
+
+        loop = SelfModifyLoop(backend="ollama")
+        loop._call_llm = MagicMock(return_value="ROOT CAUSE: Missing closing paren")
+
+        diagnosis = loop._diagnose_failure(report)
+        assert "Missing closing paren" in diagnosis
+        loop._call_llm.assert_called_once()
+
+    def test_diagnose_failure_handles_missing_report(self, tmp_path):
+        loop = SelfModifyLoop(backend="ollama")
+        result = loop._diagnose_failure(tmp_path / "nonexistent.md")
+        assert result is None
+
+
+# ── Path validation tests ─────────────────────────────────────────────────────
+
+
+class TestPathValidation:
+    def test_rejects_path_outside_repo(self):
+        loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
+        with pytest.raises(ValueError, match="escapes repository"):
+            loop._validate_paths(["../../etc/passwd"])
+
+    def test_rejects_path_outside_allowed_dirs(self):
+        loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
+        with pytest.raises(ValueError, match="not in allowed directories"):
+            loop._validate_paths(["docs/secret.py"])
+
+    def test_accepts_src_path(self):
+        loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
+        loop._validate_paths(["src/some_module.py"])
+
+    def test_accepts_tests_path(self):
+        loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
+        loop._validate_paths(["tests/test_something.py"])
+
+
+# ── File inference tests ──────────────────────────────────────────────────────
+
+
+class TestFileInference:
+    def test_infer_explicit_py_path(self):
+        loop = SelfModifyLoop()
+        files = loop._infer_target_files("fix bug in src/dashboard/app.py")
+        assert "src/dashboard/app.py" in files
+
+    def test_infer_from_keyword_config(self):
+        loop = SelfModifyLoop()
+        files = loop._infer_target_files("update the config to add a new setting")
+        assert "src/config.py" in files
+
+    def test_infer_from_keyword_agent(self):
+        loop = SelfModifyLoop()
+        files = loop._infer_target_files("modify the agent prompt")
+        assert "src/timmy/agent.py" in files
+
+    def test_infer_returns_empty_for_vague(self):
+        loop = SelfModifyLoop()
+        files = loop._infer_target_files("do something cool")
+        assert files == []
+
+
+# ── NLU intent tests ──────────────────────────────────────────────────────────
+
+
+class TestCodeIntent:
+    def test_detects_modify_code(self):
+        from voice.nlu import detect_intent
+
+        intent = detect_intent("modify the code in config.py")
+        assert intent.name == "code"
+
+    def test_detects_self_modify(self):
+        from voice.nlu import detect_intent
+
+        intent = detect_intent("self-modify to add a new endpoint")
+        assert intent.name == "code"
+
+    def test_detects_edit_source(self):
+        from voice.nlu import detect_intent
+
+        intent = detect_intent("edit the source to fix the bug")
+        assert intent.name == "code"
+
+    def test_detects_update_your_code(self):
+        from voice.nlu import detect_intent
+
+        intent = detect_intent("update your code to handle errors")
+        assert intent.name == "code"
+
+    def test_detects_fix_function(self):
+        from voice.nlu import detect_intent
+
+        intent = detect_intent("fix the function that calculates totals")
+        assert intent.name == "code"
+
+    def test_does_not_match_general_chat(self):
+        from voice.nlu import detect_intent
+
+        intent = detect_intent("tell me about the weather today")
+        assert intent.name == "chat"
+
+    def test_extracts_target_file_entity(self):
+        from voice.nlu import detect_intent
+
+        intent = detect_intent("modify file src/config.py to add debug flag")
+        assert intent.entities.get("target_file") == "src/config.py"
+
+
+# ── Route tests ───────────────────────────────────────────────────────────────
+
+
+class TestSelfModifyRoutes:
+    def test_status_endpoint(self, client):
+        resp = client.get("/self-modify/status")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "enabled" in data
+        assert data["enabled"] is False  # Default
+
+    def test_run_when_disabled(self, client):
+        resp = client.post("/self-modify/run", data={"instruction": "test"})
+        assert resp.status_code == 403
+
+
+# ── DirectToolExecutor integration ────────────────────────────────────────────
+
+
+class TestDirectToolExecutor:
+    def test_code_task_falls_back_when_disabled(self):
+        from swarm.tool_executor import DirectToolExecutor
+
+        executor = DirectToolExecutor("forge", "forge-test-001")
+        result = executor.execute_with_tools("modify the code to fix bug")
+        # Should fall back to simulated since self_modify_enabled=False
+        assert isinstance(result, dict)
+        assert "result" in result or "success" in result
+
+    def test_non_code_task_delegates_to_parent(self):
+        from swarm.tool_executor import DirectToolExecutor
+
+        executor = DirectToolExecutor("echo", "echo-test-001")
+        result = executor.execute_with_tools("search for information")
+        assert isinstance(result, dict)