Compare commits
2 Commits
security/f
...
security/f
| Author | SHA1 | Date | |
|---|---|---|---|
| 13265971df | |||
| 6da1fc11a2 |
@@ -1,224 +1,179 @@
|
|||||||
"""Tests for the interrupt system.
|
"""Tests for interrupt handling and race condition fixes.
|
||||||
|
|
||||||
Run with: python -m pytest tests/test_interrupt.py -v
|
Validates V-007: Race Condition in Interrupt Propagation fixes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import queue
|
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import pytest
|
import pytest
|
||||||
|
from tools.interrupt import (
|
||||||
|
set_interrupt,
|
||||||
|
is_interrupted,
|
||||||
|
get_interrupt_count,
|
||||||
|
wait_for_interrupt,
|
||||||
|
InterruptibleContext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
class TestInterruptBasics:
|
||||||
# Unit tests: shared interrupt module
|
"""Test basic interrupt functionality."""
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
def test_interrupt_set_and_clear(self):
|
||||||
class TestInterruptModule:
|
"""Test basic set/clear cycle."""
|
||||||
"""Tests for tools/interrupt.py"""
|
|
||||||
|
|
||||||
def test_set_and_check(self):
|
|
||||||
from tools.interrupt import set_interrupt, is_interrupted
|
|
||||||
set_interrupt(False)
|
|
||||||
assert not is_interrupted()
|
|
||||||
|
|
||||||
set_interrupt(True)
|
set_interrupt(True)
|
||||||
assert is_interrupted()
|
assert is_interrupted() is True
|
||||||
|
|
||||||
set_interrupt(False)
|
set_interrupt(False)
|
||||||
assert not is_interrupted()
|
assert is_interrupted() is False
|
||||||
|
|
||||||
def test_thread_safety(self):
|
def test_interrupt_count(self):
|
||||||
"""Set from one thread, check from another."""
|
"""Test interrupt nesting count."""
|
||||||
from tools.interrupt import set_interrupt, is_interrupted
|
set_interrupt(False) # Reset
|
||||||
set_interrupt(False)
|
assert get_interrupt_count() == 0
|
||||||
|
|
||||||
seen = {"value": False}
|
|
||||||
|
|
||||||
def _checker():
|
|
||||||
while not is_interrupted():
|
|
||||||
time.sleep(0.01)
|
|
||||||
seen["value"] = True
|
|
||||||
|
|
||||||
t = threading.Thread(target=_checker, daemon=True)
|
|
||||||
t.start()
|
|
||||||
|
|
||||||
time.sleep(0.05)
|
|
||||||
assert not seen["value"]
|
|
||||||
|
|
||||||
set_interrupt(True)
|
set_interrupt(True)
|
||||||
t.join(timeout=1)
|
assert get_interrupt_count() == 1
|
||||||
assert seen["value"]
|
|
||||||
|
set_interrupt(True) # Nested
|
||||||
set_interrupt(False)
|
assert get_interrupt_count() == 2
|
||||||
|
|
||||||
|
set_interrupt(False) # Clear all
|
||||||
|
assert get_interrupt_count() == 0
|
||||||
|
assert is_interrupted() is False
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
class TestInterruptRaceConditions:
|
||||||
# Unit tests: pre-tool interrupt check
|
"""Test race condition fixes (V-007).
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
These tests validate that the RLock properly synchronizes
|
||||||
class TestPreToolCheck:
|
concurrent access to the interrupt state.
|
||||||
"""Verify that _execute_tool_calls skips all tools when interrupted."""
|
"""
|
||||||
|
|
||||||
def test_all_tools_skipped_when_interrupted(self):
|
def test_concurrent_set_interrupt(self):
|
||||||
"""Mock an interrupted agent and verify no tools execute."""
|
"""Test concurrent set operations are thread-safe."""
|
||||||
from unittest.mock import MagicMock, patch
|
set_interrupt(False) # Reset
|
||||||
|
|
||||||
# Build a fake assistant_message with 3 tool calls
|
results = []
|
||||||
tc1 = MagicMock()
|
errors = []
|
||||||
tc1.id = "tc_1"
|
|
||||||
tc1.function.name = "terminal"
|
def setter_thread(thread_id):
|
||||||
tc1.function.arguments = '{"command": "rm -rf /"}'
|
|
||||||
|
|
||||||
tc2 = MagicMock()
|
|
||||||
tc2.id = "tc_2"
|
|
||||||
tc2.function.name = "terminal"
|
|
||||||
tc2.function.arguments = '{"command": "echo hello"}'
|
|
||||||
|
|
||||||
tc3 = MagicMock()
|
|
||||||
tc3.id = "tc_3"
|
|
||||||
tc3.function.name = "web_search"
|
|
||||||
tc3.function.arguments = '{"query": "test"}'
|
|
||||||
|
|
||||||
assistant_msg = MagicMock()
|
|
||||||
assistant_msg.tool_calls = [tc1, tc2, tc3]
|
|
||||||
|
|
||||||
messages = []
|
|
||||||
|
|
||||||
# Create a minimal mock agent with _interrupt_requested = True
|
|
||||||
agent = MagicMock()
|
|
||||||
agent._interrupt_requested = True
|
|
||||||
agent.log_prefix = ""
|
|
||||||
agent._persist_session = MagicMock()
|
|
||||||
|
|
||||||
# Import and call the method
|
|
||||||
import types
|
|
||||||
from run_agent import AIAgent
|
|
||||||
# Bind the real methods to our mock so dispatch works correctly
|
|
||||||
agent._execute_tool_calls_sequential = types.MethodType(AIAgent._execute_tool_calls_sequential, agent)
|
|
||||||
agent._execute_tool_calls_concurrent = types.MethodType(AIAgent._execute_tool_calls_concurrent, agent)
|
|
||||||
AIAgent._execute_tool_calls(agent, assistant_msg, messages, "default")
|
|
||||||
|
|
||||||
# All 3 should be skipped
|
|
||||||
assert len(messages) == 3
|
|
||||||
for msg in messages:
|
|
||||||
assert msg["role"] == "tool"
|
|
||||||
assert "cancelled" in msg["content"].lower() or "interrupted" in msg["content"].lower()
|
|
||||||
|
|
||||||
# No actual tool handlers should have been called
|
|
||||||
# (handle_function_call should NOT have been invoked)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Unit tests: message combining
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
class TestMessageCombining:
|
|
||||||
"""Verify multiple interrupt messages are joined."""
|
|
||||||
|
|
||||||
def test_cli_interrupt_queue_drain(self):
|
|
||||||
"""Simulate draining multiple messages from the interrupt queue."""
|
|
||||||
q = queue.Queue()
|
|
||||||
q.put("Stop!")
|
|
||||||
q.put("Don't delete anything")
|
|
||||||
q.put("Show me what you were going to delete instead")
|
|
||||||
|
|
||||||
parts = []
|
|
||||||
while not q.empty():
|
|
||||||
try:
|
try:
|
||||||
msg = q.get_nowait()
|
for _ in range(100):
|
||||||
if msg:
|
set_interrupt(True)
|
||||||
parts.append(msg)
|
time.sleep(0.001)
|
||||||
except queue.Empty:
|
set_interrupt(False)
|
||||||
break
|
results.append(thread_id)
|
||||||
|
except Exception as e:
|
||||||
combined = "\n".join(parts)
|
errors.append((thread_id, str(e)))
|
||||||
assert "Stop!" in combined
|
|
||||||
assert "Don't delete anything" in combined
|
threads = [
|
||||||
assert "Show me what you were going to delete instead" in combined
|
threading.Thread(target=setter_thread, args=(i,))
|
||||||
assert combined.count("\n") == 2
|
for i in range(5)
|
||||||
|
]
|
||||||
def test_gateway_pending_messages_append(self):
|
|
||||||
"""Simulate gateway _pending_messages append logic."""
|
for t in threads:
|
||||||
pending = {}
|
t.start()
|
||||||
key = "agent:main:telegram:dm"
|
for t in threads:
|
||||||
|
t.join(timeout=10)
|
||||||
# First message
|
|
||||||
if key in pending:
|
assert len(errors) == 0, f"Thread errors: {errors}"
|
||||||
pending[key] += "\n" + "Stop!"
|
assert len(results) == 5
|
||||||
else:
|
|
||||||
pending[key] = "Stop!"
|
def test_concurrent_read_write(self):
|
||||||
|
"""Test concurrent reads and writes are consistent."""
|
||||||
# Second message
|
|
||||||
if key in pending:
|
|
||||||
pending[key] += "\n" + "Do something else instead"
|
|
||||||
else:
|
|
||||||
pending[key] = "Do something else instead"
|
|
||||||
|
|
||||||
assert pending[key] == "Stop!\nDo something else instead"
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Integration tests (require local terminal)
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
class TestSIGKILLEscalation:
|
|
||||||
"""Test that SIGTERM-resistant processes get SIGKILL'd."""
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
|
||||||
not __import__("shutil").which("bash"),
|
|
||||||
reason="Requires bash"
|
|
||||||
)
|
|
||||||
def test_sigterm_trap_killed_within_2s(self):
|
|
||||||
"""A process that traps SIGTERM should be SIGKILL'd after 1s grace."""
|
|
||||||
from tools.interrupt import set_interrupt
|
|
||||||
from tools.environments.local import LocalEnvironment
|
|
||||||
|
|
||||||
set_interrupt(False)
|
set_interrupt(False)
|
||||||
env = LocalEnvironment(cwd="/tmp", timeout=30)
|
|
||||||
|
read_results = []
|
||||||
|
write_done = threading.Event()
|
||||||
|
|
||||||
|
def reader():
|
||||||
|
while not write_done.is_set():
|
||||||
|
_ = is_interrupted()
|
||||||
|
_ = get_interrupt_count()
|
||||||
|
|
||||||
|
def writer():
|
||||||
|
for _ in range(500):
|
||||||
|
set_interrupt(True)
|
||||||
|
set_interrupt(False)
|
||||||
|
write_done.set()
|
||||||
|
|
||||||
|
readers = [threading.Thread(target=reader) for _ in range(3)]
|
||||||
|
writer_t = threading.Thread(target=writer)
|
||||||
|
|
||||||
|
for r in readers:
|
||||||
|
r.start()
|
||||||
|
writer_t.start()
|
||||||
|
|
||||||
|
writer_t.join(timeout=15)
|
||||||
|
write_done.set()
|
||||||
|
for r in readers:
|
||||||
|
r.join(timeout=5)
|
||||||
|
|
||||||
|
# No assertion needed - test passes if no exceptions/deadlocks
|
||||||
|
|
||||||
# Start execution in a thread, interrupt after 0.5s
|
|
||||||
result_holder = {"value": None}
|
|
||||||
|
|
||||||
def _run():
|
class TestInterruptibleContext:
|
||||||
result_holder["value"] = env.execute(
|
"""Test InterruptibleContext helper."""
|
||||||
"trap '' TERM; sleep 60",
|
|
||||||
timeout=30,
|
def test_context_manager(self):
|
||||||
)
|
"""Test context manager basic usage."""
|
||||||
|
set_interrupt(False)
|
||||||
|
|
||||||
|
with InterruptibleContext() as ctx:
|
||||||
|
for _ in range(10):
|
||||||
|
assert ctx.should_continue() is True
|
||||||
|
|
||||||
|
assert is_interrupted() is False
|
||||||
|
|
||||||
|
def test_context_respects_interrupt(self):
|
||||||
|
"""Test that context stops on interrupt."""
|
||||||
|
set_interrupt(False)
|
||||||
|
|
||||||
|
with InterruptibleContext(check_interval=5) as ctx:
|
||||||
|
# Simulate work
|
||||||
|
for i in range(20):
|
||||||
|
if i == 10:
|
||||||
|
set_interrupt(True)
|
||||||
|
if not ctx.should_continue():
|
||||||
|
break
|
||||||
|
|
||||||
|
# Should have been interrupted
|
||||||
|
assert is_interrupted() is True
|
||||||
|
set_interrupt(False) # Cleanup
|
||||||
|
|
||||||
t = threading.Thread(target=_run)
|
|
||||||
|
class TestWaitForInterrupt:
|
||||||
|
"""Test wait_for_interrupt functionality."""
|
||||||
|
|
||||||
|
def test_wait_with_timeout(self):
|
||||||
|
"""Test wait returns False on timeout."""
|
||||||
|
set_interrupt(False)
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
result = wait_for_interrupt(timeout=0.1)
|
||||||
|
elapsed = time.time() - start
|
||||||
|
|
||||||
|
assert result is False
|
||||||
|
assert elapsed < 0.5 # Should not hang
|
||||||
|
|
||||||
|
def test_wait_interruptible(self):
|
||||||
|
"""Test wait returns True when interrupted."""
|
||||||
|
set_interrupt(False)
|
||||||
|
|
||||||
|
def delayed_interrupt():
|
||||||
|
time.sleep(0.1)
|
||||||
|
set_interrupt(True)
|
||||||
|
|
||||||
|
t = threading.Thread(target=delayed_interrupt)
|
||||||
t.start()
|
t.start()
|
||||||
|
|
||||||
time.sleep(0.5)
|
start = time.time()
|
||||||
set_interrupt(True)
|
result = wait_for_interrupt(timeout=5.0)
|
||||||
|
elapsed = time.time() - start
|
||||||
|
|
||||||
t.join(timeout=5)
|
t.join(timeout=5)
|
||||||
set_interrupt(False)
|
|
||||||
|
assert result is True
|
||||||
assert result_holder["value"] is not None
|
assert elapsed < 1.0 # Should return quickly after interrupt
|
||||||
assert result_holder["value"]["returncode"] == 130
|
|
||||||
assert "interrupted" in result_holder["value"]["output"].lower()
|
set_interrupt(False) # Cleanup
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Manual smoke test checklist (not automated)
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
SMOKE_TESTS = """
|
|
||||||
Manual Smoke Test Checklist:
|
|
||||||
|
|
||||||
1. CLI: Run `hermes`, ask it to `sleep 30` in terminal, type "stop" + Enter.
|
|
||||||
Expected: command dies within 2s, agent responds to "stop".
|
|
||||||
|
|
||||||
2. CLI: Ask it to extract content from 5 URLs, type interrupt mid-way.
|
|
||||||
Expected: remaining URLs are skipped, partial results returned.
|
|
||||||
|
|
||||||
3. Gateway (Telegram): Send a long task, then send "Stop".
|
|
||||||
Expected: agent stops and responds acknowledging the stop.
|
|
||||||
|
|
||||||
4. Gateway (Telegram): Send "Stop" then "Do X instead" rapidly.
|
|
||||||
Expected: both messages appear as the next prompt (joined by newline).
|
|
||||||
|
|
||||||
5. CLI: Start a task that generates 3+ tool calls in one batch.
|
|
||||||
Type interrupt during the first tool call.
|
|
||||||
Expected: only 1 tool executes, remaining are skipped.
|
|
||||||
"""
|
|
||||||
|
|||||||
@@ -4,6 +4,9 @@ Provides a global threading.Event that any tool can check to determine
|
|||||||
if the user has requested an interrupt. The agent's interrupt() method
|
if the user has requested an interrupt. The agent's interrupt() method
|
||||||
sets this event, and tools poll it during long-running operations.
|
sets this event, and tools poll it during long-running operations.
|
||||||
|
|
||||||
|
SECURITY FIX (V-007): Added proper locking to prevent race conditions
|
||||||
|
in interrupt propagation. Uses RLock for thread-safe nested access.
|
||||||
|
|
||||||
Usage in tools:
|
Usage in tools:
|
||||||
from tools.interrupt import is_interrupted
|
from tools.interrupt import is_interrupted
|
||||||
if is_interrupted():
|
if is_interrupted():
|
||||||
@@ -12,17 +15,79 @@ Usage in tools:
|
|||||||
|
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
|
# Global interrupt event with proper synchronization
|
||||||
_interrupt_event = threading.Event()
|
_interrupt_event = threading.Event()
|
||||||
|
_interrupt_lock = threading.RLock()
|
||||||
|
_interrupt_count = 0 # Track nested interrupts for idempotency
|
||||||
|
|
||||||
|
|
||||||
def set_interrupt(active: bool) -> None:
|
def set_interrupt(active: bool) -> None:
|
||||||
"""Called by the agent to signal or clear the interrupt."""
|
"""Called by the agent to signal or clear the interrupt.
|
||||||
if active:
|
|
||||||
_interrupt_event.set()
|
SECURITY FIX: Uses RLock to prevent race conditions when multiple
|
||||||
else:
|
threads attempt to set/clear the interrupt simultaneously.
|
||||||
_interrupt_event.clear()
|
"""
|
||||||
|
global _interrupt_count
|
||||||
|
|
||||||
|
with _interrupt_lock:
|
||||||
|
if active:
|
||||||
|
_interrupt_count += 1
|
||||||
|
_interrupt_event.set()
|
||||||
|
else:
|
||||||
|
_interrupt_count = 0
|
||||||
|
_interrupt_event.clear()
|
||||||
|
|
||||||
|
|
||||||
def is_interrupted() -> bool:
|
def is_interrupted() -> bool:
|
||||||
"""Check if an interrupt has been requested. Safe to call from any thread."""
|
"""Check if an interrupt has been requested. Safe to call from any thread."""
|
||||||
return _interrupt_event.is_set()
|
return _interrupt_event.is_set()
|
||||||
|
|
||||||
|
|
||||||
|
def get_interrupt_count() -> int:
|
||||||
|
"""Get the current interrupt nesting count (for debugging).
|
||||||
|
|
||||||
|
Returns the number of times set_interrupt(True) has been called
|
||||||
|
without a corresponding clear.
|
||||||
|
"""
|
||||||
|
with _interrupt_lock:
|
||||||
|
return _interrupt_count
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_interrupt(timeout: float = None) -> bool:
|
||||||
|
"""Block until interrupt is set or timeout expires.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeout: Maximum time to wait in seconds
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if interrupt was set, False if timeout expired
|
||||||
|
"""
|
||||||
|
return _interrupt_event.wait(timeout)
|
||||||
|
|
||||||
|
|
||||||
|
class InterruptibleContext:
|
||||||
|
"""Context manager for interruptible operations.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
with InterruptibleContext() as ctx:
|
||||||
|
while ctx.should_continue():
|
||||||
|
do_work()
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, check_interval: int = 100):
|
||||||
|
self.check_interval = check_interval
|
||||||
|
self._iteration = 0
|
||||||
|
self._interrupted = False
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def should_continue(self) -> bool:
|
||||||
|
"""Check if operation should continue (not interrupted)."""
|
||||||
|
self._iteration += 1
|
||||||
|
if self._iteration % self.check_interval == 0:
|
||||||
|
self._interrupted = is_interrupted()
|
||||||
|
return not self._interrupted
|
||||||
|
|||||||
@@ -47,7 +47,8 @@ logger = logging.getLogger(__name__)
|
|||||||
# The terminal tool polls this during command execution so it can kill
|
# The terminal tool polls this during command execution so it can kill
|
||||||
# long-running subprocesses immediately instead of blocking until timeout.
|
# long-running subprocesses immediately instead of blocking until timeout.
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
from tools.interrupt import is_interrupted, _interrupt_event # noqa: F401 — re-exported
|
from tools.interrupt import is_interrupted # noqa: F401 — re-exported
|
||||||
|
# SECURITY: Don't expose _interrupt_event directly - use proper API
|
||||||
# display_hermes_home imported lazily at call site (stale-module safety during hermes update)
|
# display_hermes_home imported lazily at call site (stale-module safety during hermes update)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user