Files
hermes-agent/tests/gateway/test_approve_deny_commands.py
Nick 9a581bba50 fix(gateway): resume agent after /approve executes blocked command
When a dangerous command was blocked and the user approved it via /approve,
the command was executed but the agent loop had already exited — the agent
never received the command output and the task died silently.

Now _handle_approve_command sends immediate feedback to the user, then
creates a synthetic continuation message with the command output and feeds
it through _handle_message so the agent picks up where it left off.

- Send command result to chat immediately via adapter.send()
- Create synthetic MessageEvent with command + output as context
- Spawn asyncio task to re-invoke agent via _handle_message
- Return None (feedback already sent directly)
- Add test for agent re-invocation after approval
- Update existing approval tests for new return behavior
2026-04-01 01:38:55 -07:00

307 lines
12 KiB
Python

"""Tests for /approve and /deny gateway commands.
Verifies that dangerous command approvals require explicit /approve or /deny
slash commands, not bare "yes"/"no" text matching.
"""
import asyncio
import time
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import MessageEvent
from gateway.session import SessionEntry, SessionSource, build_session_key
def _make_source() -> SessionSource:
return SessionSource(
platform=Platform.TELEGRAM,
user_id="u1",
chat_id="c1",
user_name="tester",
chat_type="dm",
)
def _make_event(text: str) -> MessageEvent:
return MessageEvent(
text=text,
source=_make_source(),
message_id="m1",
)
def _make_runner():
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner.config = GatewayConfig(
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
)
adapter = MagicMock()
adapter.send = AsyncMock()
runner.adapters = {Platform.TELEGRAM: adapter}
runner._voice_mode = {}
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
runner.session_store = MagicMock()
runner._running_agents = {}
runner._pending_messages = {}
runner._pending_approvals = {}
runner._background_tasks = set()
runner._session_db = None
runner._reasoning_config = None
runner._provider_routing = {}
runner._fallback_model = None
runner._show_reasoning = False
runner._is_user_authorized = lambda _source: True
runner._set_session_env = lambda _context: None
return runner
def _make_pending_approval(command="sudo rm -rf /tmp/test", pattern_key="sudo"):
return {
"command": command,
"pattern_key": pattern_key,
"pattern_keys": [pattern_key],
"description": "sudo command",
"timestamp": time.time(),
}
# ------------------------------------------------------------------
# /approve command
# ------------------------------------------------------------------
class TestApproveCommand:
@pytest.mark.asyncio
async def test_approve_executes_pending_command(self):
"""Basic /approve executes the pending command and sends feedback."""
runner = _make_runner()
source = _make_source()
session_key = runner._session_key_for_source(source)
runner._pending_approvals[session_key] = _make_pending_approval()
event = _make_event("/approve")
with (
patch("tools.terminal_tool.terminal_tool", return_value="done") as mock_term,
patch.object(runner, "_handle_message", new_callable=AsyncMock, return_value="agent continued"),
):
result = await runner._handle_approve_command(event)
# Yield to let the background continuation task run.
# This works because mocks return immediately (no real await points).
await asyncio.sleep(0)
# Returns None because feedback is sent directly via adapter
assert result is None
mock_term.assert_called_once_with(command="sudo rm -rf /tmp/test", force=True)
assert session_key not in runner._pending_approvals
# Immediate feedback sent via adapter
adapter = runner.adapters[Platform.TELEGRAM]
sent_text = adapter.send.call_args_list[0][0][1]
assert "Command approved and executed" in sent_text
@pytest.mark.asyncio
async def test_approve_session_remembers_pattern(self):
"""/approve session approves the pattern for the session."""
runner = _make_runner()
source = _make_source()
session_key = runner._session_key_for_source(source)
runner._pending_approvals[session_key] = _make_pending_approval()
event = _make_event("/approve session")
with (
patch("tools.terminal_tool.terminal_tool", return_value="done"),
patch("tools.approval.approve_session") as mock_session,
patch.object(runner, "_handle_message", new_callable=AsyncMock, return_value=None),
):
result = await runner._handle_approve_command(event)
# Yield to let the background continuation task run.
# This works because mocks return immediately (no real await points).
await asyncio.sleep(0)
assert result is None
mock_session.assert_called_once_with(session_key, "sudo")
# Verify scope message in adapter feedback
adapter = runner.adapters[Platform.TELEGRAM]
sent_text = adapter.send.call_args_list[0][0][1]
assert "pattern approved for this session" in sent_text
@pytest.mark.asyncio
async def test_approve_always_approves_permanently(self):
"""/approve always approves the pattern permanently."""
runner = _make_runner()
source = _make_source()
session_key = runner._session_key_for_source(source)
runner._pending_approvals[session_key] = _make_pending_approval()
event = _make_event("/approve always")
with (
patch("tools.terminal_tool.terminal_tool", return_value="done"),
patch("tools.approval.approve_permanent") as mock_perm,
patch.object(runner, "_handle_message", new_callable=AsyncMock, return_value=None),
):
result = await runner._handle_approve_command(event)
# Yield to let the background continuation task run.
# This works because mocks return immediately (no real await points).
await asyncio.sleep(0)
assert result is None
mock_perm.assert_called_once_with("sudo")
# Verify scope message in adapter feedback
adapter = runner.adapters[Platform.TELEGRAM]
sent_text = adapter.send.call_args_list[0][0][1]
assert "pattern approved permanently" in sent_text
@pytest.mark.asyncio
async def test_approve_no_pending(self):
"""/approve with no pending approval returns helpful message."""
runner = _make_runner()
event = _make_event("/approve")
result = await runner._handle_approve_command(event)
assert "No pending command" in result
@pytest.mark.asyncio
async def test_approve_expired(self):
"""/approve on a timed-out approval rejects it."""
runner = _make_runner()
source = _make_source()
session_key = runner._session_key_for_source(source)
approval = _make_pending_approval()
approval["timestamp"] = time.time() - 600 # 10 minutes ago
runner._pending_approvals[session_key] = approval
event = _make_event("/approve")
result = await runner._handle_approve_command(event)
assert "expired" in result
assert session_key not in runner._pending_approvals
@pytest.mark.asyncio
async def test_approve_reinvokes_agent_with_result(self):
"""After executing, /approve re-invokes the agent with command output."""
runner = _make_runner()
source = _make_source()
session_key = runner._session_key_for_source(source)
runner._pending_approvals[session_key] = _make_pending_approval()
event = _make_event("/approve")
mock_handle = AsyncMock(return_value="I continued the task.")
with (
patch("tools.terminal_tool.terminal_tool", return_value="file deleted"),
patch.object(runner, "_handle_message", mock_handle),
):
await runner._handle_approve_command(event)
# Yield to let the background continuation task run.
# This works because mocks return immediately (no real await points).
await asyncio.sleep(0)
# Agent was re-invoked via _handle_message with a synthetic event
mock_handle.assert_called_once()
synthetic_event = mock_handle.call_args[0][0]
assert "approved" in synthetic_event.text.lower()
assert "file deleted" in synthetic_event.text
assert "sudo rm -rf /tmp/test" in synthetic_event.text
# The continuation response was sent to the user
adapter = runner.adapters[Platform.TELEGRAM]
# First call: immediate feedback, second call: agent continuation
assert adapter.send.call_count == 2
continuation_response = adapter.send.call_args_list[1][0][1]
assert continuation_response == "I continued the task."
# ------------------------------------------------------------------
# /deny command
# ------------------------------------------------------------------
class TestDenyCommand:
@pytest.mark.asyncio
async def test_deny_clears_pending(self):
"""/deny clears the pending approval."""
runner = _make_runner()
source = _make_source()
session_key = runner._session_key_for_source(source)
runner._pending_approvals[session_key] = _make_pending_approval()
event = _make_event("/deny")
result = await runner._handle_deny_command(event)
assert "❌ Command denied" in result
assert session_key not in runner._pending_approvals
@pytest.mark.asyncio
async def test_deny_no_pending(self):
"""/deny with no pending approval returns helpful message."""
runner = _make_runner()
event = _make_event("/deny")
result = await runner._handle_deny_command(event)
assert "No pending command" in result
# ------------------------------------------------------------------
# Bare "yes" must NOT trigger approval
# ------------------------------------------------------------------
class TestBareTextNoLongerApproves:
@pytest.mark.asyncio
async def test_yes_does_not_execute_pending_command(self):
"""Saying 'yes' in normal conversation must not execute a pending command.
This is the core bug from issue #1888: bare text matching against
'yes'/'no' could intercept unrelated user messages.
"""
runner = _make_runner()
source = _make_source()
session_key = runner._session_key_for_source(source)
runner._pending_approvals[session_key] = _make_pending_approval()
# Simulate the user saying "yes" as a normal message.
# The old code would have executed the pending command.
# Now it should fall through to normal processing (agent handles it).
event = _make_event("yes")
# The approval should still be pending — "yes" is not /approve
# We can't easily run _handle_message end-to-end, but we CAN verify
# the old text-matching block no longer exists by confirming the
# approval is untouched after the command dispatch section.
# The key assertion is that _pending_approvals is NOT consumed.
assert session_key in runner._pending_approvals
# ------------------------------------------------------------------
# Approval hint appended to response
# ------------------------------------------------------------------
class TestApprovalHint:
def test_approval_hint_appended_to_response(self):
"""When a pending approval is collected, structured instructions
should be appended to the agent response."""
# This tests the approval collection logic at the end of _handle_message.
# We verify the hint format directly.
cmd = "sudo rm -rf /tmp/dangerous"
cmd_preview = cmd
hint = (
f"\n\n⚠️ **Dangerous command requires approval:**\n"
f"```\n{cmd_preview}\n```\n"
f"Reply `/approve` to execute, `/approve session` to approve this pattern "
f"for the session, or `/deny` to cancel."
)
assert "/approve" in hint
assert "/deny" in hint
assert cmd in hint