fix(approval): show full command in dangerous command approval (#1553)
* fix: prevent infinite 400 failure loop on context overflow (#1630) When a gateway session exceeds the model's context window, Anthropic may return a generic 400 invalid_request_error with just 'Error' as the message. This bypassed the phrase-based context-length detection, causing the agent to treat it as a non-retryable client error. Worse, the failed user message was still persisted to the transcript, making the session even larger on each attempt — creating an infinite loop. Three-layer fix: 1. run_agent.py — Fallback heuristic: when a 400 error has a very short generic message AND the session is large (>40% of context or >80 messages), treat it as a probable context overflow and trigger compression instead of aborting. 2. run_agent.py + gateway/run.py — Don't persist failed messages: when the agent returns failed=True before generating any response, skip writing the user's message to the transcript/DB. This prevents the session from growing on each failure. 3. gateway/run.py — Smarter error messages: detect context-overflow failures and suggest /compact or /reset specifically, instead of a generic 'try again' that will fail identically. * fix(skills): detect prompt injection patterns and block cache file reads Adds two security layers to prevent prompt injection via skills hub cache files (#1558): 1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json was the original injection vector — untrusted skill descriptions in the catalog contained adversarial text that the model executed. 2. skill_view: warns when skills are loaded from outside the trusted ~/.hermes/skills/ directory, and detects common injection patterns in skill content ("ignore previous instructions", "<system>", etc.). Cherry-picked from PR #1562 by ygd58. * fix(tools): chunk long messages in send_message_tool before dispatch (#1552) Long messages sent via send_message tool or cron delivery silently failed when exceeding platform limits. Gateway adapters handle this via truncate_message(), but the standalone senders in send_message_tool bypassed that entirely. - Apply truncate_message() chunking in _send_to_platform() before dispatching to individual platform senders - Remove naive message[i:i+2000] character split in _send_discord() in favor of centralized smart splitting - Attach media files to last chunk only for Telegram - Add regression tests for chunking and media placement Cherry-picked from PR #1557 by llbn. * fix(approval): show full command in dangerous command approval (#1553) Previously the command was truncated to 80 chars in CLI (with a [v]iew full option), 500 chars in Discord embeds, and missing entirely in Telegram/Slack approval messages. Now the full command is always displayed everywhere: - CLI: removed 80-char truncation and [v]iew full menu option - Gateway (TG/Slack): approval_required message includes full command in a code block - Discord: embed shows full command up to 4096-char limit - Windows: skip SIGALRM-based test timeout (Unix-only) - Updated tests: replaced view-flow tests with direct approval tests Cherry-picked from PR #1566 by crazywriter1. --------- Co-authored-by: buray <ygd58@users.noreply.github.com> Co-authored-by: lbn <llbn@users.noreply.github.com> Co-authored-by: crazywriter1 <53251494+crazywriter1@users.noreply.github.com>
This commit is contained in:
@@ -107,7 +107,11 @@ def _ensure_current_event_loop(request):
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _enforce_test_timeout():
|
||||
"""Kill any individual test that takes longer than 30 seconds."""
|
||||
"""Kill any individual test that takes longer than 30 seconds.
|
||||
SIGALRM is Unix-only; skip on Windows."""
|
||||
if sys.platform == "win32":
|
||||
yield
|
||||
return
|
||||
old = signal.signal(signal.SIGALRM, _timeout_handler)
|
||||
signal.alarm(30)
|
||||
yield
|
||||
|
||||
@@ -385,75 +385,47 @@ class TestPatternKeyUniqueness:
|
||||
assert is_approved("legacy-find", key_delete) is True
|
||||
|
||||
|
||||
class TestViewFullCommand:
|
||||
"""Tests for the 'view full command' option in prompt_dangerous_approval."""
|
||||
class TestFullCommandAlwaysShown:
|
||||
"""The full command is always shown in the approval prompt (no truncation).
|
||||
|
||||
def test_view_then_once_fallback(self):
|
||||
"""Pressing 'v' shows the full command, then 'o' approves once."""
|
||||
Previously there was a [v]iew full option for long commands. Now the full
|
||||
command is always displayed. These tests verify the basic approval flow
|
||||
still works with long commands. (#1553)
|
||||
"""
|
||||
|
||||
def test_once_with_long_command(self):
|
||||
"""Pressing 'o' approves once even for very long commands."""
|
||||
long_cmd = "rm -rf " + "a" * 200
|
||||
inputs = iter(["v", "o"])
|
||||
with mock_patch("builtins.input", side_effect=inputs):
|
||||
result = prompt_dangerous_approval(long_cmd, "recursive delete")
|
||||
assert result == "once"
|
||||
|
||||
def test_view_then_deny_fallback(self):
|
||||
"""Pressing 'v' shows the full command, then 'd' denies."""
|
||||
long_cmd = "rm -rf " + "b" * 200
|
||||
inputs = iter(["v", "d"])
|
||||
with mock_patch("builtins.input", side_effect=inputs):
|
||||
result = prompt_dangerous_approval(long_cmd, "recursive delete")
|
||||
assert result == "deny"
|
||||
|
||||
def test_view_then_session_fallback(self):
|
||||
"""Pressing 'v' shows the full command, then 's' approves for session."""
|
||||
long_cmd = "rm -rf " + "c" * 200
|
||||
inputs = iter(["v", "s"])
|
||||
with mock_patch("builtins.input", side_effect=inputs):
|
||||
result = prompt_dangerous_approval(long_cmd, "recursive delete")
|
||||
assert result == "session"
|
||||
|
||||
def test_view_then_always_fallback(self):
|
||||
"""Pressing 'v' shows the full command, then 'a' approves always."""
|
||||
long_cmd = "rm -rf " + "d" * 200
|
||||
inputs = iter(["v", "a"])
|
||||
with mock_patch("builtins.input", side_effect=inputs):
|
||||
result = prompt_dangerous_approval(long_cmd, "recursive delete")
|
||||
assert result == "always"
|
||||
|
||||
def test_view_then_session_when_permanent_hidden(self):
|
||||
"""The view-full flow still works when allow_permanent=False."""
|
||||
long_cmd = "rm -rf " + "d" * 200
|
||||
inputs = iter(["v", "s"])
|
||||
with mock_patch("builtins.input", side_effect=inputs):
|
||||
result = prompt_dangerous_approval(
|
||||
long_cmd,
|
||||
"recursive delete",
|
||||
allow_permanent=False,
|
||||
)
|
||||
assert result == "session"
|
||||
|
||||
def test_view_not_shown_for_short_command(self):
|
||||
"""Short commands don't offer the view option; 'v' falls through to deny."""
|
||||
short_cmd = "rm -rf /tmp"
|
||||
with mock_patch("builtins.input", return_value="v"):
|
||||
result = prompt_dangerous_approval(short_cmd, "recursive delete")
|
||||
# 'v' is not a valid choice for short commands, should deny
|
||||
assert result == "deny"
|
||||
|
||||
def test_once_without_view(self):
|
||||
"""Directly pressing 'o' without viewing still works."""
|
||||
long_cmd = "rm -rf " + "e" * 200
|
||||
with mock_patch("builtins.input", return_value="o"):
|
||||
result = prompt_dangerous_approval(long_cmd, "recursive delete")
|
||||
assert result == "once"
|
||||
|
||||
def test_view_ignored_after_already_shown(self):
|
||||
"""After viewing once, 'v' on a now-untruncated display falls through to deny."""
|
||||
long_cmd = "rm -rf " + "f" * 200
|
||||
inputs = iter(["v", "v"]) # second 'v' should not match since is_truncated is False
|
||||
with mock_patch("builtins.input", side_effect=inputs):
|
||||
def test_session_with_long_command(self):
|
||||
"""Pressing 's' approves for session with long commands."""
|
||||
long_cmd = "rm -rf " + "c" * 200
|
||||
with mock_patch("builtins.input", return_value="s"):
|
||||
result = prompt_dangerous_approval(long_cmd, "recursive delete")
|
||||
# After first 'v', is_truncated becomes False, so second 'v' -> deny
|
||||
assert result == "session"
|
||||
|
||||
def test_always_with_long_command(self):
|
||||
"""Pressing 'a' approves always with long commands."""
|
||||
long_cmd = "rm -rf " + "d" * 200
|
||||
with mock_patch("builtins.input", return_value="a"):
|
||||
result = prompt_dangerous_approval(long_cmd, "recursive delete")
|
||||
assert result == "always"
|
||||
|
||||
def test_deny_with_long_command(self):
|
||||
"""Pressing 'd' denies with long commands."""
|
||||
long_cmd = "rm -rf " + "b" * 200
|
||||
with mock_patch("builtins.input", return_value="d"):
|
||||
result = prompt_dangerous_approval(long_cmd, "recursive delete")
|
||||
assert result == "deny"
|
||||
|
||||
def test_invalid_input_denies(self):
|
||||
"""Invalid input (like 'v' which no longer exists) falls through to deny."""
|
||||
short_cmd = "rm -rf /tmp"
|
||||
with mock_patch("builtins.input", return_value="v"):
|
||||
result = prompt_dangerous_approval(short_cmd, "recursive delete")
|
||||
assert result == "deny"
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user