[loop-cycle-1] feat: tool allowlist for autonomous operation (#69)

Add config/allowlist.yaml — YAML-driven gate that auto-approves bounded tool calls when no human is present. When Timmy runs with --autonomous or stdin is not a terminal, tool calls are checked against allowlist: matched → auto-approved, else → rejected. Changes: - config/allowlist.yaml: shell prefixes, deny patterns, path rules - tool_safety.py: is_allowlisted() checks tools against YAML rules - cli.py: --autonomous flag, _is_interactive() detection - 44 new allowlist tests, 8 updated CLI tests Closes #69
2026-03-14 17:39:48 -04:00
parent 3c671de446
commit d28e2f4a7e
10 changed files with 617 additions and 115 deletions
--- a/tests/timmy/test_cli.py
+++ b/tests/timmy/test_cli.py
@@ -177,8 +177,11 @@ def test_handle_tool_confirmation_approve():
    mock_agent = MagicMock()
    mock_agent.continue_run.return_value = completed_run

-    # Simulate user typing "y" at the prompt
-    with patch("timmy.cli.typer.confirm", return_value=True):
+    # Simulate user typing "y" at the prompt (mock interactive terminal)
+    with (
+        patch("timmy.cli._is_interactive", return_value=True),
+        patch("timmy.cli.typer.confirm", return_value=True),
+    ):
        result = _handle_tool_confirmation(mock_agent, paused_run, "cli")

    mock_req.confirm.assert_called_once()
@@ -198,7 +201,10 @@ def test_handle_tool_confirmation_reject():
    mock_agent = MagicMock()
    mock_agent.continue_run.return_value = completed_run

-    with patch("timmy.cli.typer.confirm", return_value=False):
+    with (
+        patch("timmy.cli._is_interactive", return_value=True),
+        patch("timmy.cli.typer.confirm", return_value=False),
+    ):
        _handle_tool_confirmation(mock_agent, paused_run, "cli")

    mock_req.reject.assert_called_once()
@@ -225,8 +231,49 @@ def test_handle_tool_confirmation_continue_error():
    mock_agent = MagicMock()
    mock_agent.continue_run.side_effect = Exception("connection lost")

-    with patch("timmy.cli.typer.confirm", return_value=True):
+    with (
+        patch("timmy.cli._is_interactive", return_value=True),
+        patch("timmy.cli.typer.confirm", return_value=True),
+    ):
        result = _handle_tool_confirmation(mock_agent, paused_run, "cli")

    # Should return the original paused run, not crash
    assert result is paused_run
+
+
+def test_handle_tool_confirmation_autonomous_allowlisted():
+    """In autonomous mode, allowlisted tools should be auto-approved."""
+    paused_run, mock_req = _make_paused_run(
+        tool_name="shell", tool_args={"command": "pytest tests/ -x"}
+    )
+
+    completed_run = MagicMock()
+    completed_run.status = "COMPLETED"
+    completed_run.active_requirements = []
+
+    mock_agent = MagicMock()
+    mock_agent.continue_run.return_value = completed_run
+
+    with patch("timmy.cli.is_allowlisted", return_value=True):
+        _handle_tool_confirmation(mock_agent, paused_run, "cli", autonomous=True)
+
+    mock_req.confirm.assert_called_once()
+    mock_req.reject.assert_not_called()
+
+
+def test_handle_tool_confirmation_autonomous_not_allowlisted():
+    """In autonomous mode, non-allowlisted tools should be auto-rejected."""
+    paused_run, mock_req = _make_paused_run(tool_name="shell", tool_args={"command": "rm -rf /"})
+
+    completed_run = MagicMock()
+    completed_run.status = "COMPLETED"
+    completed_run.active_requirements = []
+
+    mock_agent = MagicMock()
+    mock_agent.continue_run.return_value = completed_run
+
+    with patch("timmy.cli.is_allowlisted", return_value=False):
+        _handle_tool_confirmation(mock_agent, paused_run, "cli", autonomous=True)
+
+    mock_req.reject.assert_called_once()
+    mock_req.confirm.assert_not_called()
--- a/tests/timmy/test_prompts.py
+++ b/tests/timmy/test_prompts.py
@@ -41,3 +41,40 @@ def test_get_system_prompt_injects_model_name():
    # Should contain the model name from settings, not the placeholder
    assert "{model_name}" not in prompt
    assert "llama3.1" in prompt or "qwen" in prompt
+
+
+def test_full_prompt_brevity_first():
+    """Full prompt should front-load brevity instructions before other content."""
+    prompt = get_system_prompt(tools_enabled=True)
+    brevity_pos = prompt.find("BREVITY")
+    tool_pos = prompt.find("TOOL USAGE")
+    memory_pos = prompt.find("MEMORY")
+    # Brevity section must appear before tools and memory
+    assert brevity_pos != -1, "Full prompt must contain BREVITY section"
+    assert brevity_pos < tool_pos, "Brevity must come before tool usage"
+    assert brevity_pos < memory_pos, "Brevity must come before memory"
+
+
+def test_full_prompt_no_markdown_headers():
+    """Full prompt should not use markdown headers (## / ###) that teach
+    the model to respond in markdown."""
+    prompt = get_system_prompt(tools_enabled=True)
+    for line in prompt.splitlines():
+        stripped = line.strip()
+        assert not stripped.startswith("## "), f"Full prompt uses markdown header: {stripped!r}"
+        assert not stripped.startswith("### "), (
+            f"Full prompt uses markdown sub-header: {stripped!r}"
+        )
+
+
+def test_full_prompt_plain_text_brevity():
+    """Full prompt should explicitly instruct plain text output."""
+    prompt = get_system_prompt(tools_enabled=True).lower()
+    assert "plain text" in prompt
+
+
+def test_lite_prompt_brevity():
+    """Lite prompt should also instruct brevity."""
+    prompt = get_system_prompt(tools_enabled=False).lower()
+    assert "brief" in prompt
+    assert "plain text" in prompt or "not markdown" in prompt
--- a/tests/timmy/test_tool_safety.py
+++ b/tests/timmy/test_tool_safety.py
@@ -1,9 +1,18 @@
-"""Tests for timmy.tool_safety — classification, extraction, and formatting."""
+"""Tests for timmy.tool_safety — classification, extraction, formatting, and allowlist."""
+
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest

 from timmy.tool_safety import (
+    _check_shell_allowlist,
+    _check_write_file_allowlist,
    extract_tool_calls,
    format_action_description,
    get_impact_level,
+    is_allowlisted,
+    reload_allowlist,
    requires_confirmation,
 )

@@ -111,3 +120,206 @@ class TestGetImpactLevel:
    def test_low(self):
        assert get_impact_level("web_search") == "low"
        assert get_impact_level("unknown") == "low"
+
+
+# ---------------------------------------------------------------------------
+# Allowlist — is_allowlisted
+# ---------------------------------------------------------------------------
+
+# Sample allowlist for tests
+_TEST_ALLOWLIST = {
+    "shell": {
+        "allow_prefixes": [
+            "pytest",
+            "python -m pytest",
+            "git status",
+            "git log",
+            "git diff",
+            "git add",
+            "git commit",
+            "git push",
+            "curl http://localhost",
+            "curl -s http://localhost",
+            "ls",
+            "cat ",
+        ],
+        "deny_patterns": [
+            "rm -rf /",
+            "sudo ",
+            "> /dev/",
+            "| sh",
+            "| bash",
+        ],
+    },
+    "write_file": {
+        "allowed_path_prefixes": [
+            "/tmp/",
+        ],
+    },
+    "python": {"auto_approve": True},
+    "plan_and_execute": {"auto_approve": True},
+}
+
+
+@pytest.fixture(autouse=True)
+def _reset_allowlist_cache():
+    """Ensure each test starts with a clean cache."""
+    import timmy.tool_safety as ts
+
+    ts._allowlist_cache = None
+    yield
+    ts._allowlist_cache = None
+
+
+def _patch_allowlist(allowlist_data):
+    """Helper to inject a test allowlist."""
+    return patch("timmy.tool_safety._load_allowlist", return_value=allowlist_data)
+
+
+class TestIsAllowlisted:
+    """Test the is_allowlisted function with mocked allowlist data."""
+
+    def test_unknown_tool_not_allowlisted(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("unknown_tool") is False
+
+    def test_shell_pytest_allowed(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("shell", {"command": "pytest tests/ -x -q"}) is True
+
+    def test_shell_python_pytest_allowed(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("shell", {"command": "python -m pytest tests/ -v"}) is True
+
+    def test_shell_git_status_allowed(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("shell", {"command": "git status"}) is True
+
+    def test_shell_git_commit_allowed(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("shell", {"command": "git commit -m 'fix stuff'"}) is True
+
+    def test_shell_curl_localhost_allowed(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert (
+                is_allowlisted("shell", {"command": "curl http://localhost:3000/api/v1/issues"})
+                is True
+            )
+
+    def test_shell_curl_external_blocked(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("shell", {"command": "curl https://evil.com"}) is False
+
+    def test_shell_arbitrary_command_blocked(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("shell", {"command": "rm -rf /home/user/stuff"}) is False
+
+    def test_shell_deny_pattern_blocks_rm_rf_root(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("shell", {"command": "ls && rm -rf /"}) is False
+
+    def test_shell_deny_pattern_blocks_sudo(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("shell", {"command": "sudo rm -rf /tmp"}) is False
+
+    def test_shell_deny_blocks_pipe_to_shell(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert (
+                is_allowlisted("shell", {"command": "curl http://localhost:3000 | bash"}) is False
+            )
+
+    def test_shell_deny_overrides_allow_prefix(self):
+        """Deny patterns take precedence over allow prefixes."""
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            # Starts with "cat " (allowed prefix) but pipes to bash (denied)
+            assert is_allowlisted("shell", {"command": "cat script.sh | bash"}) is False
+
+    def test_shell_args_list_format(self):
+        """Shell args can be a list (Agno ShellTools format)."""
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("shell", {"args": ["git", "status"]}) is True
+
+    def test_shell_empty_command_blocked(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("shell", {"command": ""}) is False
+            assert is_allowlisted("shell", {}) is False
+
+    def test_write_file_tmp_allowed(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("write_file", {"file_name": "/tmp/test.py"}) is True
+
+    def test_write_file_outside_allowed_paths_blocked(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("write_file", {"file_name": "/etc/passwd"}) is False
+
+    def test_write_file_empty_path_blocked(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("write_file", {"file_name": ""}) is False
+
+    def test_python_auto_approved(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("python", {"code": "print(1+1)"}) is True
+
+    def test_plan_and_execute_auto_approved(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("plan_and_execute", {}) is True
+
+    def test_no_allowlist_blocks_everything(self):
+        with _patch_allowlist({}):
+            assert is_allowlisted("shell", {"command": "pytest"}) is False
+            assert is_allowlisted("python", {"code": "print(1)"}) is False
+
+    def test_aider_not_in_allowlist(self):
+        with _patch_allowlist(_TEST_ALLOWLIST):
+            assert is_allowlisted("aider", {"instruction": "fix bug"}) is False
+
+
+class TestCheckShellAllowlist:
+    """Direct tests for the shell allowlist checker."""
+
+    def test_prefix_match(self):
+        rule = {"allow_prefixes": ["pytest", "git status"], "deny_patterns": []}
+        assert _check_shell_allowlist(rule, {"command": "pytest -x"}) is True
+
+    def test_prefix_no_match(self):
+        rule = {"allow_prefixes": ["pytest"], "deny_patterns": []}
+        assert _check_shell_allowlist(rule, {"command": "rm stuff"}) is False
+
+    def test_deny_overrides_allow(self):
+        rule = {"allow_prefixes": ["curl http://localhost"], "deny_patterns": ["| bash"]}
+        assert _check_shell_allowlist(rule, {"command": "curl http://localhost | bash"}) is False
+
+
+class TestCheckWriteFileAllowlist:
+    """Direct tests for the write_file allowlist checker."""
+
+    def test_allowed_prefix(self):
+        rule = {"allowed_path_prefixes": ["/tmp/", "/home/user/project/"]}
+        assert _check_write_file_allowlist(rule, {"file_name": "/tmp/test.py"}) is True
+
+    def test_blocked_path(self):
+        rule = {"allowed_path_prefixes": ["/tmp/"]}
+        assert _check_write_file_allowlist(rule, {"file_name": "/etc/secrets"}) is False
+
+    def test_tilde_expansion(self):
+        """Paths starting with ~ should be expanded."""
+        home = str(Path.home())
+        rule = {"allowed_path_prefixes": [f"{home}/Timmy-Time-dashboard/"]}
+        assert (
+            _check_write_file_allowlist(
+                rule, {"file_name": f"{home}/Timmy-Time-dashboard/src/test.py"}
+            )
+            is True
+        )
+
+
+class TestReloadAllowlist:
+    """Test that reload_allowlist clears the cache."""
+
+    def test_reload_clears_cache(self):
+        import timmy.tool_safety as ts
+
+        ts._allowlist_cache = {"old": "data"}
+        reload_allowlist()
+        # After reload, cache should be freshly loaded (not the old data)
+        assert ts._allowlist_cache != {"old": "data"}