Timmy-time-dashboard/tests/test_self_modify.py

"""Tests for the self-modification loop (self_modify/loop.py).

All tests are fully mocked — no Ollama, no real file I/O, no git.
"""

from unittest.mock import MagicMock, patch
from pathlib import Path

import pytest

from self_modify.loop import SelfModifyLoop, ModifyRequest, ModifyResult


# ── Dataclass tests ───────────────────────────────────────────────────────────


class TestModifyRequest:
    def test_defaults(self):
        req = ModifyRequest(instruction="Fix the bug")
        assert req.instruction == "Fix the bug"
        assert req.target_files == []
        assert req.dry_run is False

    def test_with_target_files(self):
        req = ModifyRequest(
            instruction="Add docstring",
            target_files=["src/foo.py"],
            dry_run=True,
        )
        assert req.target_files == ["src/foo.py"]
        assert req.dry_run is True


class TestModifyResult:
    def test_success_result(self):
        result = ModifyResult(
            success=True,
            files_changed=["src/foo.py"],
            test_passed=True,
            commit_sha="abc12345",
            branch_name="timmy/self-modify-123",
            llm_response="...",
            attempts=1,
        )
        assert result.success
        assert result.commit_sha == "abc12345"
        assert result.error is None
        assert result.autonomous_cycles == 0

    def test_failure_result(self):
        result = ModifyResult(success=False, error="something broke")
        assert not result.success
        assert result.error == "something broke"
        assert result.files_changed == []


# ── SelfModifyLoop unit tests ────────────────────────────────────────────────


class TestSelfModifyLoop:
    def test_init_defaults(self):
        loop = SelfModifyLoop()
        assert loop._max_retries == 2

    def test_init_custom_retries(self):
        loop = SelfModifyLoop(max_retries=5)
        assert loop._max_retries == 5

    def test_init_backend(self):
        loop = SelfModifyLoop(backend="anthropic")
        assert loop._backend == "anthropic"

    def test_init_autonomous(self):
        loop = SelfModifyLoop(autonomous=True, max_autonomous_cycles=5)
        assert loop._autonomous is True
        assert loop._max_autonomous_cycles == 5

    @patch("self_modify.loop.settings")
    def test_run_disabled(self, mock_settings):
        mock_settings.self_modify_enabled = False
        loop = SelfModifyLoop()
        result = loop.run(ModifyRequest(instruction="test"))
        assert not result.success
        assert "disabled" in result.error.lower()

    @patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
    @patch("self_modify.loop.settings")
    def test_run_no_target_files(self, mock_settings):
        mock_settings.self_modify_enabled = True
        mock_settings.self_modify_max_retries = 0
        mock_settings.self_modify_allowed_dirs = "src,tests"
        mock_settings.self_modify_backend = "ollama"
        loop = SelfModifyLoop()
        loop._infer_target_files = MagicMock(return_value=[])
        result = loop.run(ModifyRequest(instruction="do something vague"))
        assert not result.success
        assert "no target files" in result.error.lower()

    @patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
    @patch("self_modify.loop.settings")
    def test_run_success_path(self, mock_settings):
        mock_settings.self_modify_enabled = True
        mock_settings.self_modify_max_retries = 2
        mock_settings.self_modify_allowed_dirs = "src,tests"
        mock_settings.self_modify_backend = "ollama"

        loop = SelfModifyLoop()
        loop._read_files = MagicMock(return_value={"src/foo.py": "old content"})
        loop._generate_edits = MagicMock(
            return_value=({"src/foo.py": "x = 1\n"}, "llm raw")
        )
        loop._write_files = MagicMock(return_value=["src/foo.py"])
        loop._run_tests = MagicMock(return_value=(True, "5 passed"))
        loop._git_commit = MagicMock(return_value="abc12345")
        loop._validate_paths = MagicMock()

        result = loop.run(
            ModifyRequest(instruction="Add docstring", target_files=["src/foo.py"])
        )

        assert result.success
        assert result.test_passed
        assert result.commit_sha == "abc12345"
        assert result.files_changed == ["src/foo.py"]
        loop._run_tests.assert_called_once()
        loop._git_commit.assert_called_once()

    @patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
    @patch("self_modify.loop.settings")
    def test_run_test_failure_reverts(self, mock_settings):
        mock_settings.self_modify_enabled = True
        mock_settings.self_modify_max_retries = 0
        mock_settings.self_modify_allowed_dirs = "src,tests"
        mock_settings.self_modify_backend = "ollama"

        loop = SelfModifyLoop(max_retries=0)
        loop._read_files = MagicMock(return_value={"src/foo.py": "old content"})
        loop._generate_edits = MagicMock(
            return_value=({"src/foo.py": "x = 1\n"}, "llm raw")
        )
        loop._write_files = MagicMock(return_value=["src/foo.py"])
        loop._run_tests = MagicMock(return_value=(False, "1 failed"))
        loop._revert_files = MagicMock()
        loop._validate_paths = MagicMock()

        result = loop.run(
            ModifyRequest(instruction="Break it", target_files=["src/foo.py"])
        )

        assert not result.success
        assert not result.test_passed
        loop._revert_files.assert_called()

    @patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
    @patch("self_modify.loop.settings")
    def test_dry_run(self, mock_settings):
        mock_settings.self_modify_enabled = True
        mock_settings.self_modify_max_retries = 2
        mock_settings.self_modify_allowed_dirs = "src,tests"
        mock_settings.self_modify_backend = "ollama"

        loop = SelfModifyLoop()
        loop._read_files = MagicMock(return_value={"src/foo.py": "old content"})
        loop._generate_edits = MagicMock(
            return_value=({"src/foo.py": "x = 1\n"}, "llm raw")
        )
        loop._validate_paths = MagicMock()

        result = loop.run(
            ModifyRequest(
                instruction="Add docstring",
                target_files=["src/foo.py"],
                dry_run=True,
            )
        )

        assert result.success
        assert result.files_changed == ["src/foo.py"]


# ── Syntax validation tests ─────────────────────────────────────────────────


class TestSyntaxValidation:
    def test_valid_python_passes(self):
        loop = SelfModifyLoop()
        errors = loop._validate_syntax({"src/foo.py": "x = 1\nprint(x)\n"})
        assert errors == {}

    def test_invalid_python_caught(self):
        loop = SelfModifyLoop()
        errors = loop._validate_syntax({"src/foo.py": "def foo(\n"})
        assert "src/foo.py" in errors
        assert "line" in errors["src/foo.py"]

    def test_unterminated_string_caught(self):
        loop = SelfModifyLoop()
        bad_code = '"""\nTIMMY = """\nstuff\n"""\n'
        errors = loop._validate_syntax({"src/foo.py": bad_code})
        # This specific code is actually valid, but let's test truly broken code
        broken = '"""\nunclosed string\n'
        errors = loop._validate_syntax({"src/foo.py": broken})
        assert "src/foo.py" in errors

    def test_non_python_files_skipped(self):
        loop = SelfModifyLoop()
        errors = loop._validate_syntax({"README.md": "this is not python {{{}"})
        assert errors == {}

    @patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
    @patch("self_modify.loop.settings")
    def test_syntax_error_skips_write(self, mock_settings):
        """When LLM produces invalid syntax, we skip writing and retry."""
        mock_settings.self_modify_enabled = True
        mock_settings.self_modify_max_retries = 1
        mock_settings.self_modify_allowed_dirs = "src,tests"
        mock_settings.self_modify_backend = "ollama"

        loop = SelfModifyLoop(max_retries=1)
        loop._read_files = MagicMock(return_value={"src/foo.py": "x = 1\n"})
        # First call returns broken syntax, second returns valid
        loop._generate_edits = MagicMock(side_effect=[
            ({"src/foo.py": "def foo(\n"}, "bad llm"),
            ({"src/foo.py": "def foo():\n    pass\n"}, "good llm"),
        ])
        loop._write_files = MagicMock(return_value=["src/foo.py"])
        loop._run_tests = MagicMock(return_value=(True, "passed"))
        loop._git_commit = MagicMock(return_value="abc123")
        loop._validate_paths = MagicMock()

        result = loop.run(
            ModifyRequest(instruction="Fix foo", target_files=["src/foo.py"])
        )

        assert result.success
        # _write_files should only be called once (for the valid attempt)
        loop._write_files.assert_called_once()


# ── Multi-backend tests ──────────────────────────────────────────────────────


class TestBackendResolution:
    def test_resolve_ollama(self):
        loop = SelfModifyLoop(backend="ollama")
        assert loop._resolve_backend() == "ollama"

    def test_resolve_anthropic(self):
        loop = SelfModifyLoop(backend="anthropic")
        assert loop._resolve_backend() == "anthropic"

    @patch.dict("os.environ", {"ANTHROPIC_API_KEY": "sk-test-123"})
    def test_resolve_auto_with_key(self):
        loop = SelfModifyLoop(backend="auto")
        assert loop._resolve_backend() == "anthropic"

    @patch.dict("os.environ", {}, clear=True)
    def test_resolve_auto_without_key(self):
        loop = SelfModifyLoop(backend="auto")
        assert loop._resolve_backend() == "ollama"


# ── Autonomous loop tests ────────────────────────────────────────────────────


class TestAutonomousLoop:
    @patch("self_modify.loop.os.environ", {"SELF_MODIFY_SKIP_BRANCH": "1"})
    @patch("self_modify.loop.settings")
    def test_autonomous_retries_after_failure(self, mock_settings):
        mock_settings.self_modify_enabled = True
        mock_settings.self_modify_max_retries = 0
        mock_settings.self_modify_allowed_dirs = "src,tests"
        mock_settings.self_modify_backend = "ollama"

        loop = SelfModifyLoop(max_retries=0, autonomous=True, max_autonomous_cycles=2)
        loop._validate_paths = MagicMock()
        loop._read_files = MagicMock(return_value={"src/foo.py": "x = 1\n"})

        # First run fails, autonomous cycle 1 succeeds
        call_count = [0]

        def fake_generate(instruction, contents, prev_test_output=None, prev_syntax_errors=None):
            call_count[0] += 1
            return ({"src/foo.py": "x = 2\n"}, "llm raw")

        loop._generate_edits = MagicMock(side_effect=fake_generate)
        loop._write_files = MagicMock(return_value=["src/foo.py"])
        loop._revert_files = MagicMock()

        # First call fails tests, second succeeds
        test_results = [(False, "FAILED"), (True, "PASSED")]
        loop._run_tests = MagicMock(side_effect=test_results)
        loop._git_commit = MagicMock(return_value="abc123")
        loop._diagnose_failure = MagicMock(return_value="Fix: do X instead of Y")

        result = loop.run(
            ModifyRequest(instruction="Fix foo", target_files=["src/foo.py"])
        )

        assert result.success
        assert result.autonomous_cycles == 1
        loop._diagnose_failure.assert_called_once()

    def test_diagnose_failure_reads_report(self, tmp_path):
        report = tmp_path / "report.md"
        report.write_text("# Report\n**Error:** SyntaxError line 5\n")

        loop = SelfModifyLoop(backend="ollama")
        loop._call_llm = MagicMock(return_value="ROOT CAUSE: Missing closing paren")

        diagnosis = loop._diagnose_failure(report)
        assert "Missing closing paren" in diagnosis
        loop._call_llm.assert_called_once()

    def test_diagnose_failure_handles_missing_report(self, tmp_path):
        loop = SelfModifyLoop(backend="ollama")
        result = loop._diagnose_failure(tmp_path / "nonexistent.md")
        assert result is None


# ── Path validation tests ─────────────────────────────────────────────────────


class TestPathValidation:
    def test_rejects_path_outside_repo(self):
        loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
        with pytest.raises(ValueError, match="escapes repository"):
            loop._validate_paths(["../../etc/passwd"])

    def test_rejects_path_outside_allowed_dirs(self):
        loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
        with pytest.raises(ValueError, match="not in allowed directories"):
            loop._validate_paths(["docs/secret.py"])

    def test_accepts_src_path(self):
        loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
        loop._validate_paths(["src/some_module.py"])

    def test_accepts_tests_path(self):
        loop = SelfModifyLoop(repo_path=Path("/tmp/test-repo"))
        loop._validate_paths(["tests/test_something.py"])


# ── File inference tests ──────────────────────────────────────────────────────


class TestFileInference:
    def test_infer_explicit_py_path(self):
        loop = SelfModifyLoop()
        files = loop._infer_target_files("fix bug in src/dashboard/app.py")
        assert "src/dashboard/app.py" in files

    def test_infer_from_keyword_config(self):
        loop = SelfModifyLoop()
        files = loop._infer_target_files("update the config to add a new setting")
        assert "src/config.py" in files

    def test_infer_from_keyword_agent(self):
        loop = SelfModifyLoop()
        files = loop._infer_target_files("modify the agent prompt")
        assert "src/timmy/agent.py" in files

    def test_infer_returns_empty_for_vague(self):
        loop = SelfModifyLoop()
        files = loop._infer_target_files("do something cool")
        assert files == []


# ── NLU intent tests ──────────────────────────────────────────────────────────


class TestCodeIntent:
    def test_detects_modify_code(self):
        from voice.nlu import detect_intent

        intent = detect_intent("modify the code in config.py")
        assert intent.name == "code"

    def test_detects_self_modify(self):
        from voice.nlu import detect_intent

        intent = detect_intent("self-modify to add a new endpoint")
        assert intent.name == "code"

    def test_detects_edit_source(self):
        from voice.nlu import detect_intent

        intent = detect_intent("edit the source to fix the bug")
        assert intent.name == "code"

    def test_detects_update_your_code(self):
        from voice.nlu import detect_intent

        intent = detect_intent("update your code to handle errors")
        assert intent.name == "code"

    def test_detects_fix_function(self):
        from voice.nlu import detect_intent

        intent = detect_intent("fix the function that calculates totals")
        assert intent.name == "code"

    def test_does_not_match_general_chat(self):
        from voice.nlu import detect_intent

        intent = detect_intent("tell me about the weather today")
        assert intent.name == "chat"

    def test_extracts_target_file_entity(self):
        from voice.nlu import detect_intent

        intent = detect_intent("modify file src/config.py to add debug flag")
        assert intent.entities.get("target_file") == "src/config.py"


# ── Route tests ───────────────────────────────────────────────────────────────


class TestSelfModifyRoutes:
    def test_status_endpoint(self, client):
        resp = client.get("/self-modify/status")
        assert resp.status_code == 200
        data = resp.json()
        assert "enabled" in data
        assert data["enabled"] is False  # Default

    def test_run_when_disabled(self, client):
        resp = client.post("/self-modify/run", data={"instruction": "test"})
        assert resp.status_code == 403


# ── DirectToolExecutor integration ────────────────────────────────────────────


class TestDirectToolExecutor:
    def test_code_task_falls_back_when_disabled(self):
        from swarm.tool_executor import DirectToolExecutor

        executor = DirectToolExecutor("forge", "forge-test-001")
        result = executor.execute_with_tools("modify the code to fix bug")
        # Should fall back to simulated since self_modify_enabled=False
        assert isinstance(result, dict)
        assert "result" in result or "success" in result

    def test_non_code_task_delegates_to_parent(self):
        from swarm.tool_executor import DirectToolExecutor

        executor = DirectToolExecutor("echo", "echo-test-001")
        result = executor.execute_with_tools("search for information")
        assert isinstance(result, dict)