forked from Rockachopa/Timmy-time-dashboard
[loop-cycle-1] feat: tool allowlist for autonomous operation (#69)
Add config/allowlist.yaml — YAML-driven gate that auto-approves bounded tool calls when no human is present. When Timmy runs with --autonomous or stdin is not a terminal, tool calls are checked against allowlist: matched → auto-approved, else → rejected. Changes: - config/allowlist.yaml: shell prefixes, deny patterns, path rules - tool_safety.py: is_allowlisted() checks tools against YAML rules - cli.py: --autonomous flag, _is_interactive() detection - 44 new allowlist tests, 8 updated CLI tests Closes #69
This commit is contained in:
@@ -177,8 +177,11 @@ def test_handle_tool_confirmation_approve():
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.continue_run.return_value = completed_run
|
||||
|
||||
# Simulate user typing "y" at the prompt
|
||||
with patch("timmy.cli.typer.confirm", return_value=True):
|
||||
# Simulate user typing "y" at the prompt (mock interactive terminal)
|
||||
with (
|
||||
patch("timmy.cli._is_interactive", return_value=True),
|
||||
patch("timmy.cli.typer.confirm", return_value=True),
|
||||
):
|
||||
result = _handle_tool_confirmation(mock_agent, paused_run, "cli")
|
||||
|
||||
mock_req.confirm.assert_called_once()
|
||||
@@ -198,7 +201,10 @@ def test_handle_tool_confirmation_reject():
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.continue_run.return_value = completed_run
|
||||
|
||||
with patch("timmy.cli.typer.confirm", return_value=False):
|
||||
with (
|
||||
patch("timmy.cli._is_interactive", return_value=True),
|
||||
patch("timmy.cli.typer.confirm", return_value=False),
|
||||
):
|
||||
_handle_tool_confirmation(mock_agent, paused_run, "cli")
|
||||
|
||||
mock_req.reject.assert_called_once()
|
||||
@@ -225,8 +231,49 @@ def test_handle_tool_confirmation_continue_error():
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.continue_run.side_effect = Exception("connection lost")
|
||||
|
||||
with patch("timmy.cli.typer.confirm", return_value=True):
|
||||
with (
|
||||
patch("timmy.cli._is_interactive", return_value=True),
|
||||
patch("timmy.cli.typer.confirm", return_value=True),
|
||||
):
|
||||
result = _handle_tool_confirmation(mock_agent, paused_run, "cli")
|
||||
|
||||
# Should return the original paused run, not crash
|
||||
assert result is paused_run
|
||||
|
||||
|
||||
def test_handle_tool_confirmation_autonomous_allowlisted():
|
||||
"""In autonomous mode, allowlisted tools should be auto-approved."""
|
||||
paused_run, mock_req = _make_paused_run(
|
||||
tool_name="shell", tool_args={"command": "pytest tests/ -x"}
|
||||
)
|
||||
|
||||
completed_run = MagicMock()
|
||||
completed_run.status = "COMPLETED"
|
||||
completed_run.active_requirements = []
|
||||
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.continue_run.return_value = completed_run
|
||||
|
||||
with patch("timmy.cli.is_allowlisted", return_value=True):
|
||||
_handle_tool_confirmation(mock_agent, paused_run, "cli", autonomous=True)
|
||||
|
||||
mock_req.confirm.assert_called_once()
|
||||
mock_req.reject.assert_not_called()
|
||||
|
||||
|
||||
def test_handle_tool_confirmation_autonomous_not_allowlisted():
|
||||
"""In autonomous mode, non-allowlisted tools should be auto-rejected."""
|
||||
paused_run, mock_req = _make_paused_run(tool_name="shell", tool_args={"command": "rm -rf /"})
|
||||
|
||||
completed_run = MagicMock()
|
||||
completed_run.status = "COMPLETED"
|
||||
completed_run.active_requirements = []
|
||||
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.continue_run.return_value = completed_run
|
||||
|
||||
with patch("timmy.cli.is_allowlisted", return_value=False):
|
||||
_handle_tool_confirmation(mock_agent, paused_run, "cli", autonomous=True)
|
||||
|
||||
mock_req.reject.assert_called_once()
|
||||
mock_req.confirm.assert_not_called()
|
||||
|
||||
@@ -41,3 +41,40 @@ def test_get_system_prompt_injects_model_name():
|
||||
# Should contain the model name from settings, not the placeholder
|
||||
assert "{model_name}" not in prompt
|
||||
assert "llama3.1" in prompt or "qwen" in prompt
|
||||
|
||||
|
||||
def test_full_prompt_brevity_first():
|
||||
"""Full prompt should front-load brevity instructions before other content."""
|
||||
prompt = get_system_prompt(tools_enabled=True)
|
||||
brevity_pos = prompt.find("BREVITY")
|
||||
tool_pos = prompt.find("TOOL USAGE")
|
||||
memory_pos = prompt.find("MEMORY")
|
||||
# Brevity section must appear before tools and memory
|
||||
assert brevity_pos != -1, "Full prompt must contain BREVITY section"
|
||||
assert brevity_pos < tool_pos, "Brevity must come before tool usage"
|
||||
assert brevity_pos < memory_pos, "Brevity must come before memory"
|
||||
|
||||
|
||||
def test_full_prompt_no_markdown_headers():
|
||||
"""Full prompt should not use markdown headers (## / ###) that teach
|
||||
the model to respond in markdown."""
|
||||
prompt = get_system_prompt(tools_enabled=True)
|
||||
for line in prompt.splitlines():
|
||||
stripped = line.strip()
|
||||
assert not stripped.startswith("## "), f"Full prompt uses markdown header: {stripped!r}"
|
||||
assert not stripped.startswith("### "), (
|
||||
f"Full prompt uses markdown sub-header: {stripped!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_full_prompt_plain_text_brevity():
|
||||
"""Full prompt should explicitly instruct plain text output."""
|
||||
prompt = get_system_prompt(tools_enabled=True).lower()
|
||||
assert "plain text" in prompt
|
||||
|
||||
|
||||
def test_lite_prompt_brevity():
|
||||
"""Lite prompt should also instruct brevity."""
|
||||
prompt = get_system_prompt(tools_enabled=False).lower()
|
||||
assert "brief" in prompt
|
||||
assert "plain text" in prompt or "not markdown" in prompt
|
||||
|
||||
@@ -1,9 +1,18 @@
|
||||
"""Tests for timmy.tool_safety — classification, extraction, and formatting."""
|
||||
"""Tests for timmy.tool_safety — classification, extraction, formatting, and allowlist."""
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from timmy.tool_safety import (
|
||||
_check_shell_allowlist,
|
||||
_check_write_file_allowlist,
|
||||
extract_tool_calls,
|
||||
format_action_description,
|
||||
get_impact_level,
|
||||
is_allowlisted,
|
||||
reload_allowlist,
|
||||
requires_confirmation,
|
||||
)
|
||||
|
||||
@@ -111,3 +120,206 @@ class TestGetImpactLevel:
|
||||
def test_low(self):
|
||||
assert get_impact_level("web_search") == "low"
|
||||
assert get_impact_level("unknown") == "low"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Allowlist — is_allowlisted
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Sample allowlist for tests
|
||||
_TEST_ALLOWLIST = {
|
||||
"shell": {
|
||||
"allow_prefixes": [
|
||||
"pytest",
|
||||
"python -m pytest",
|
||||
"git status",
|
||||
"git log",
|
||||
"git diff",
|
||||
"git add",
|
||||
"git commit",
|
||||
"git push",
|
||||
"curl http://localhost",
|
||||
"curl -s http://localhost",
|
||||
"ls",
|
||||
"cat ",
|
||||
],
|
||||
"deny_patterns": [
|
||||
"rm -rf /",
|
||||
"sudo ",
|
||||
"> /dev/",
|
||||
"| sh",
|
||||
"| bash",
|
||||
],
|
||||
},
|
||||
"write_file": {
|
||||
"allowed_path_prefixes": [
|
||||
"/tmp/",
|
||||
],
|
||||
},
|
||||
"python": {"auto_approve": True},
|
||||
"plan_and_execute": {"auto_approve": True},
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_allowlist_cache():
|
||||
"""Ensure each test starts with a clean cache."""
|
||||
import timmy.tool_safety as ts
|
||||
|
||||
ts._allowlist_cache = None
|
||||
yield
|
||||
ts._allowlist_cache = None
|
||||
|
||||
|
||||
def _patch_allowlist(allowlist_data):
|
||||
"""Helper to inject a test allowlist."""
|
||||
return patch("timmy.tool_safety._load_allowlist", return_value=allowlist_data)
|
||||
|
||||
|
||||
class TestIsAllowlisted:
|
||||
"""Test the is_allowlisted function with mocked allowlist data."""
|
||||
|
||||
def test_unknown_tool_not_allowlisted(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("unknown_tool") is False
|
||||
|
||||
def test_shell_pytest_allowed(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("shell", {"command": "pytest tests/ -x -q"}) is True
|
||||
|
||||
def test_shell_python_pytest_allowed(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("shell", {"command": "python -m pytest tests/ -v"}) is True
|
||||
|
||||
def test_shell_git_status_allowed(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("shell", {"command": "git status"}) is True
|
||||
|
||||
def test_shell_git_commit_allowed(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("shell", {"command": "git commit -m 'fix stuff'"}) is True
|
||||
|
||||
def test_shell_curl_localhost_allowed(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert (
|
||||
is_allowlisted("shell", {"command": "curl http://localhost:3000/api/v1/issues"})
|
||||
is True
|
||||
)
|
||||
|
||||
def test_shell_curl_external_blocked(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("shell", {"command": "curl https://evil.com"}) is False
|
||||
|
||||
def test_shell_arbitrary_command_blocked(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("shell", {"command": "rm -rf /home/user/stuff"}) is False
|
||||
|
||||
def test_shell_deny_pattern_blocks_rm_rf_root(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("shell", {"command": "ls && rm -rf /"}) is False
|
||||
|
||||
def test_shell_deny_pattern_blocks_sudo(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("shell", {"command": "sudo rm -rf /tmp"}) is False
|
||||
|
||||
def test_shell_deny_blocks_pipe_to_shell(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert (
|
||||
is_allowlisted("shell", {"command": "curl http://localhost:3000 | bash"}) is False
|
||||
)
|
||||
|
||||
def test_shell_deny_overrides_allow_prefix(self):
|
||||
"""Deny patterns take precedence over allow prefixes."""
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
# Starts with "cat " (allowed prefix) but pipes to bash (denied)
|
||||
assert is_allowlisted("shell", {"command": "cat script.sh | bash"}) is False
|
||||
|
||||
def test_shell_args_list_format(self):
|
||||
"""Shell args can be a list (Agno ShellTools format)."""
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("shell", {"args": ["git", "status"]}) is True
|
||||
|
||||
def test_shell_empty_command_blocked(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("shell", {"command": ""}) is False
|
||||
assert is_allowlisted("shell", {}) is False
|
||||
|
||||
def test_write_file_tmp_allowed(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("write_file", {"file_name": "/tmp/test.py"}) is True
|
||||
|
||||
def test_write_file_outside_allowed_paths_blocked(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("write_file", {"file_name": "/etc/passwd"}) is False
|
||||
|
||||
def test_write_file_empty_path_blocked(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("write_file", {"file_name": ""}) is False
|
||||
|
||||
def test_python_auto_approved(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("python", {"code": "print(1+1)"}) is True
|
||||
|
||||
def test_plan_and_execute_auto_approved(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("plan_and_execute", {}) is True
|
||||
|
||||
def test_no_allowlist_blocks_everything(self):
|
||||
with _patch_allowlist({}):
|
||||
assert is_allowlisted("shell", {"command": "pytest"}) is False
|
||||
assert is_allowlisted("python", {"code": "print(1)"}) is False
|
||||
|
||||
def test_aider_not_in_allowlist(self):
|
||||
with _patch_allowlist(_TEST_ALLOWLIST):
|
||||
assert is_allowlisted("aider", {"instruction": "fix bug"}) is False
|
||||
|
||||
|
||||
class TestCheckShellAllowlist:
|
||||
"""Direct tests for the shell allowlist checker."""
|
||||
|
||||
def test_prefix_match(self):
|
||||
rule = {"allow_prefixes": ["pytest", "git status"], "deny_patterns": []}
|
||||
assert _check_shell_allowlist(rule, {"command": "pytest -x"}) is True
|
||||
|
||||
def test_prefix_no_match(self):
|
||||
rule = {"allow_prefixes": ["pytest"], "deny_patterns": []}
|
||||
assert _check_shell_allowlist(rule, {"command": "rm stuff"}) is False
|
||||
|
||||
def test_deny_overrides_allow(self):
|
||||
rule = {"allow_prefixes": ["curl http://localhost"], "deny_patterns": ["| bash"]}
|
||||
assert _check_shell_allowlist(rule, {"command": "curl http://localhost | bash"}) is False
|
||||
|
||||
|
||||
class TestCheckWriteFileAllowlist:
|
||||
"""Direct tests for the write_file allowlist checker."""
|
||||
|
||||
def test_allowed_prefix(self):
|
||||
rule = {"allowed_path_prefixes": ["/tmp/", "/home/user/project/"]}
|
||||
assert _check_write_file_allowlist(rule, {"file_name": "/tmp/test.py"}) is True
|
||||
|
||||
def test_blocked_path(self):
|
||||
rule = {"allowed_path_prefixes": ["/tmp/"]}
|
||||
assert _check_write_file_allowlist(rule, {"file_name": "/etc/secrets"}) is False
|
||||
|
||||
def test_tilde_expansion(self):
|
||||
"""Paths starting with ~ should be expanded."""
|
||||
home = str(Path.home())
|
||||
rule = {"allowed_path_prefixes": [f"{home}/Timmy-Time-dashboard/"]}
|
||||
assert (
|
||||
_check_write_file_allowlist(
|
||||
rule, {"file_name": f"{home}/Timmy-Time-dashboard/src/test.py"}
|
||||
)
|
||||
is True
|
||||
)
|
||||
|
||||
|
||||
class TestReloadAllowlist:
|
||||
"""Test that reload_allowlist clears the cache."""
|
||||
|
||||
def test_reload_clears_cache(self):
|
||||
import timmy.tool_safety as ts
|
||||
|
||||
ts._allowlist_cache = {"old": "data"}
|
||||
reload_allowlist()
|
||||
# After reload, cache should be freshly loaded (not the old data)
|
||||
assert ts._allowlist_cache != {"old": "data"}
|
||||
|
||||
Reference in New Issue
Block a user