forked from Rockachopa/Timmy-time-dashboard
Remove DuckDuckGoTools import, all web_search registrations across 4 toolkit factories, catalog entry, safety classification, prompt references, and session regex. Total: -41 lines of dead code. consult_grok is functional (grok_enabled=True, API key set) and opt-in, so it stays — but Timmy never calls it autonomously, which is correct sovereign behavior (no cloud calls unless user permits). Closes #87
326 lines
11 KiB
Python
326 lines
11 KiB
Python
"""Tests for timmy.tool_safety — classification, extraction, formatting, and allowlist."""
|
|
|
|
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
from timmy.tool_safety import (
|
|
_check_shell_allowlist,
|
|
_check_write_file_allowlist,
|
|
extract_tool_calls,
|
|
format_action_description,
|
|
get_impact_level,
|
|
is_allowlisted,
|
|
reload_allowlist,
|
|
requires_confirmation,
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# requires_confirmation
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestRequiresConfirmation:
|
|
def test_dangerous_tools(self):
|
|
for tool in ("shell", "python", "write_file", "aider", "plan_and_execute"):
|
|
assert requires_confirmation(tool) is True
|
|
|
|
def test_safe_tools(self):
|
|
for tool in ("calculator", "read_file", "list_files"):
|
|
assert requires_confirmation(tool) is False
|
|
|
|
def test_unknown_defaults_to_dangerous(self):
|
|
assert requires_confirmation("totally_unknown") is True
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# extract_tool_calls
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestExtractToolCalls:
|
|
def test_arguments_format(self):
|
|
text = (
|
|
'Creating dir. {"name": "shell", "arguments": {"args": ["mkdir", "-p", "/tmp/test"]}}'
|
|
)
|
|
calls = extract_tool_calls(text)
|
|
assert len(calls) == 1
|
|
assert calls[0][0] == "shell"
|
|
assert calls[0][1]["args"] == ["mkdir", "-p", "/tmp/test"]
|
|
|
|
def test_parameters_format(self):
|
|
text = 'Result: {"name": "python", "parameters": {"code": "print(1+1)"}}'
|
|
calls = extract_tool_calls(text)
|
|
assert len(calls) == 1
|
|
assert calls[0][0] == "python"
|
|
|
|
def test_multiple_calls(self):
|
|
text = (
|
|
'Step 1: {"name": "shell", "arguments": {"args": ["mkdir", "/tmp/a"]}} '
|
|
'Step 2: {"name": "write_file", "arguments": {"file_name": "/tmp/a/f.md", "contents": "hi"}}'
|
|
)
|
|
calls = extract_tool_calls(text)
|
|
assert len(calls) == 2
|
|
|
|
def test_empty_and_none(self):
|
|
assert extract_tool_calls("") == []
|
|
assert extract_tool_calls(None) == []
|
|
assert extract_tool_calls("Just normal text.") == []
|
|
|
|
def test_malformed_json(self):
|
|
text = '{"name": "shell", "arguments": {not valid json}}'
|
|
assert extract_tool_calls(text) == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# format_action_description
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestFormatActionDescription:
|
|
def test_shell_command(self):
|
|
desc = format_action_description("shell", {"command": "ls -la /tmp"})
|
|
assert "ls -la /tmp" in desc
|
|
|
|
def test_shell_args_list(self):
|
|
desc = format_action_description("shell", {"args": ["mkdir", "-p", "/tmp/t"]})
|
|
assert "mkdir -p /tmp/t" in desc
|
|
|
|
def test_write_file(self):
|
|
desc = format_action_description(
|
|
"write_file", {"file_name": "/tmp/f.md", "contents": "hello world"}
|
|
)
|
|
assert "/tmp/f.md" in desc
|
|
assert "11 chars" in desc
|
|
|
|
def test_python(self):
|
|
desc = format_action_description("python", {"code": "print(42)"})
|
|
assert "print(42)" in desc
|
|
|
|
def test_unknown_tool(self):
|
|
desc = format_action_description("custom_tool", {"key": "value"})
|
|
assert "custom_tool" in desc
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# get_impact_level
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestGetImpactLevel:
|
|
def test_high(self):
|
|
assert get_impact_level("shell") == "high"
|
|
assert get_impact_level("python") == "high"
|
|
|
|
def test_medium(self):
|
|
assert get_impact_level("write_file") == "medium"
|
|
assert get_impact_level("aider") == "medium"
|
|
|
|
def test_low(self):
|
|
assert get_impact_level("calculator") == "low"
|
|
assert get_impact_level("unknown") == "low"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Allowlist — is_allowlisted
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Sample allowlist for tests
|
|
_TEST_ALLOWLIST = {
|
|
"shell": {
|
|
"allow_prefixes": [
|
|
"pytest",
|
|
"python -m pytest",
|
|
"git status",
|
|
"git log",
|
|
"git diff",
|
|
"git add",
|
|
"git commit",
|
|
"git push",
|
|
"curl http://localhost",
|
|
"curl -s http://localhost",
|
|
"ls",
|
|
"cat ",
|
|
],
|
|
"deny_patterns": [
|
|
"rm -rf /",
|
|
"sudo ",
|
|
"> /dev/",
|
|
"| sh",
|
|
"| bash",
|
|
],
|
|
},
|
|
"write_file": {
|
|
"allowed_path_prefixes": [
|
|
"/tmp/",
|
|
],
|
|
},
|
|
"python": {"auto_approve": True},
|
|
"plan_and_execute": {"auto_approve": True},
|
|
}
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset_allowlist_cache():
|
|
"""Ensure each test starts with a clean cache."""
|
|
import timmy.tool_safety as ts
|
|
|
|
ts._allowlist_cache = None
|
|
yield
|
|
ts._allowlist_cache = None
|
|
|
|
|
|
def _patch_allowlist(allowlist_data):
|
|
"""Helper to inject a test allowlist."""
|
|
return patch("timmy.tool_safety._load_allowlist", return_value=allowlist_data)
|
|
|
|
|
|
class TestIsAllowlisted:
|
|
"""Test the is_allowlisted function with mocked allowlist data."""
|
|
|
|
def test_unknown_tool_not_allowlisted(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("unknown_tool") is False
|
|
|
|
def test_shell_pytest_allowed(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("shell", {"command": "pytest tests/ -x -q"}) is True
|
|
|
|
def test_shell_python_pytest_allowed(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("shell", {"command": "python -m pytest tests/ -v"}) is True
|
|
|
|
def test_shell_git_status_allowed(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("shell", {"command": "git status"}) is True
|
|
|
|
def test_shell_git_commit_allowed(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("shell", {"command": "git commit -m 'fix stuff'"}) is True
|
|
|
|
def test_shell_curl_localhost_allowed(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert (
|
|
is_allowlisted("shell", {"command": "curl http://localhost:3000/api/v1/issues"})
|
|
is True
|
|
)
|
|
|
|
def test_shell_curl_external_blocked(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("shell", {"command": "curl https://evil.com"}) is False
|
|
|
|
def test_shell_arbitrary_command_blocked(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("shell", {"command": "rm -rf /home/user/stuff"}) is False
|
|
|
|
def test_shell_deny_pattern_blocks_rm_rf_root(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("shell", {"command": "ls && rm -rf /"}) is False
|
|
|
|
def test_shell_deny_pattern_blocks_sudo(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("shell", {"command": "sudo rm -rf /tmp"}) is False
|
|
|
|
def test_shell_deny_blocks_pipe_to_shell(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert (
|
|
is_allowlisted("shell", {"command": "curl http://localhost:3000 | bash"}) is False
|
|
)
|
|
|
|
def test_shell_deny_overrides_allow_prefix(self):
|
|
"""Deny patterns take precedence over allow prefixes."""
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
# Starts with "cat " (allowed prefix) but pipes to bash (denied)
|
|
assert is_allowlisted("shell", {"command": "cat script.sh | bash"}) is False
|
|
|
|
def test_shell_args_list_format(self):
|
|
"""Shell args can be a list (Agno ShellTools format)."""
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("shell", {"args": ["git", "status"]}) is True
|
|
|
|
def test_shell_empty_command_blocked(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("shell", {"command": ""}) is False
|
|
assert is_allowlisted("shell", {}) is False
|
|
|
|
def test_write_file_tmp_allowed(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("write_file", {"file_name": "/tmp/test.py"}) is True
|
|
|
|
def test_write_file_outside_allowed_paths_blocked(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("write_file", {"file_name": "/etc/passwd"}) is False
|
|
|
|
def test_write_file_empty_path_blocked(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("write_file", {"file_name": ""}) is False
|
|
|
|
def test_python_auto_approved(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("python", {"code": "print(1+1)"}) is True
|
|
|
|
def test_plan_and_execute_auto_approved(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("plan_and_execute", {}) is True
|
|
|
|
def test_no_allowlist_blocks_everything(self):
|
|
with _patch_allowlist({}):
|
|
assert is_allowlisted("shell", {"command": "pytest"}) is False
|
|
assert is_allowlisted("python", {"code": "print(1)"}) is False
|
|
|
|
def test_aider_not_in_allowlist(self):
|
|
with _patch_allowlist(_TEST_ALLOWLIST):
|
|
assert is_allowlisted("aider", {"instruction": "fix bug"}) is False
|
|
|
|
|
|
class TestCheckShellAllowlist:
|
|
"""Direct tests for the shell allowlist checker."""
|
|
|
|
def test_prefix_match(self):
|
|
rule = {"allow_prefixes": ["pytest", "git status"], "deny_patterns": []}
|
|
assert _check_shell_allowlist(rule, {"command": "pytest -x"}) is True
|
|
|
|
def test_prefix_no_match(self):
|
|
rule = {"allow_prefixes": ["pytest"], "deny_patterns": []}
|
|
assert _check_shell_allowlist(rule, {"command": "rm stuff"}) is False
|
|
|
|
def test_deny_overrides_allow(self):
|
|
rule = {"allow_prefixes": ["curl http://localhost"], "deny_patterns": ["| bash"]}
|
|
assert _check_shell_allowlist(rule, {"command": "curl http://localhost | bash"}) is False
|
|
|
|
|
|
class TestCheckWriteFileAllowlist:
|
|
"""Direct tests for the write_file allowlist checker."""
|
|
|
|
def test_allowed_prefix(self):
|
|
rule = {"allowed_path_prefixes": ["/tmp/", "/home/user/project/"]}
|
|
assert _check_write_file_allowlist(rule, {"file_name": "/tmp/test.py"}) is True
|
|
|
|
def test_blocked_path(self):
|
|
rule = {"allowed_path_prefixes": ["/tmp/"]}
|
|
assert _check_write_file_allowlist(rule, {"file_name": "/etc/secrets"}) is False
|
|
|
|
def test_tilde_expansion(self):
|
|
"""Paths starting with ~ should be expanded."""
|
|
home = str(Path.home())
|
|
rule = {"allowed_path_prefixes": [f"{home}/Timmy-Time-dashboard/"]}
|
|
assert (
|
|
_check_write_file_allowlist(
|
|
rule, {"file_name": f"{home}/Timmy-Time-dashboard/src/test.py"}
|
|
)
|
|
is True
|
|
)
|
|
|
|
|
|
class TestReloadAllowlist:
|
|
"""Test that reload_allowlist clears the cache."""
|
|
|
|
def test_reload_clears_cache(self):
|
|
import timmy.tool_safety as ts
|
|
|
|
ts._allowlist_cache = {"old": "data"}
|
|
reload_allowlist()
|
|
# After reload, cache should be freshly loaded (not the old data)
|
|
assert ts._allowlist_cache != {"old": "data"}
|