Files
hermes-agent/tests/tools/test_interrupt.py

229 lines
7.5 KiB
Python
Raw Permalink Normal View History

"""Tests for the interrupt system.
Run with: python -m pytest tests/test_interrupt.py -v
"""
import queue
import threading
import time
import pytest
# ---------------------------------------------------------------------------
# Unit tests: shared interrupt module
# ---------------------------------------------------------------------------
class TestInterruptModule:
"""Tests for tools/interrupt.py"""
def test_set_and_check(self):
from tools.interrupt import set_interrupt, is_interrupted
set_interrupt(False)
assert not is_interrupted()
set_interrupt(True)
assert is_interrupted()
set_interrupt(False)
assert not is_interrupted()
def test_thread_safety(self):
fix(tests): fix 78 CI test failures and remove dead test (#9036) Production fixes: - voice_mode.py: add is_recording property to AudioRecorder (parity with TermuxAudioRecorder) - cronjob_tools.py: add sms example to deliver description Test fixes: - test_real_interrupt_subagent: add missing _execution_thread_id (fixes 19 cascading failures from leaked _build_system_prompt patch) - test_anthropic_error_handling: add _FakeMessages, override _interruptible_streaming_api_call (6 fixes) - test_ctx_halving_fix: add missing request_overrides attribute (4 fixes) - test_context_token_tracking: set _disable_streaming=True for non-streaming test path (4 fixes) - test_dict_tool_call_args: set _disable_streaming=True (1 fix) - test_provider_parity: add model='gpt-4o' for AIGateway tests to meet 64K minimum context (4 fixes) - test_session_race_guard: add user_id to SessionSource (5 fixes) - test_restart_drain/helpers: add user_id to SessionSource (2 fixes) - test_telegram_photo_interrupts: add user_id to SessionSource - test_interrupt: target thread_id for per-thread interrupt system (2 fixes) - test_zombie_process_cleanup: rewrite with object.__new__ for refactored GatewayRunner.stop() (1 fix) - test_browser_camofox_state: update config version 15->17 (1 fix) - test_trajectory_compressor_async: widen lookback window 10->20 for line-shifted AsyncOpenAI (1 fix) - test_voice_mode: fixed by production is_recording addition (5 fixes) - test_voice_cli_integration: add _attached_images to CLI stub (2 fixes) - test_hermes_logging: explicit propagation/level reset for cross-test pollution defense (1 fix) - test_run_agent: add base_url for OpenRouter detection tests (2 fixes) Deleted: - test_inline_think_blocks_reasoning_only_accepted: tested unimplemented inline <think> handling
2026-04-13 10:50:24 -07:00
"""Set from one thread targeting another thread's ident."""
from tools.interrupt import set_interrupt, is_interrupted, _interrupted_threads, _lock
set_interrupt(False)
# Clear any stale thread idents left by prior tests in this worker.
with _lock:
_interrupted_threads.clear()
seen = {"value": False}
def _checker():
while not is_interrupted():
time.sleep(0.01)
seen["value"] = True
t = threading.Thread(target=_checker, daemon=True)
t.start()
time.sleep(0.05)
assert not seen["value"]
fix(tests): fix 78 CI test failures and remove dead test (#9036) Production fixes: - voice_mode.py: add is_recording property to AudioRecorder (parity with TermuxAudioRecorder) - cronjob_tools.py: add sms example to deliver description Test fixes: - test_real_interrupt_subagent: add missing _execution_thread_id (fixes 19 cascading failures from leaked _build_system_prompt patch) - test_anthropic_error_handling: add _FakeMessages, override _interruptible_streaming_api_call (6 fixes) - test_ctx_halving_fix: add missing request_overrides attribute (4 fixes) - test_context_token_tracking: set _disable_streaming=True for non-streaming test path (4 fixes) - test_dict_tool_call_args: set _disable_streaming=True (1 fix) - test_provider_parity: add model='gpt-4o' for AIGateway tests to meet 64K minimum context (4 fixes) - test_session_race_guard: add user_id to SessionSource (5 fixes) - test_restart_drain/helpers: add user_id to SessionSource (2 fixes) - test_telegram_photo_interrupts: add user_id to SessionSource - test_interrupt: target thread_id for per-thread interrupt system (2 fixes) - test_zombie_process_cleanup: rewrite with object.__new__ for refactored GatewayRunner.stop() (1 fix) - test_browser_camofox_state: update config version 15->17 (1 fix) - test_trajectory_compressor_async: widen lookback window 10->20 for line-shifted AsyncOpenAI (1 fix) - test_voice_mode: fixed by production is_recording addition (5 fixes) - test_voice_cli_integration: add _attached_images to CLI stub (2 fixes) - test_hermes_logging: explicit propagation/level reset for cross-test pollution defense (1 fix) - test_run_agent: add base_url for OpenRouter detection tests (2 fixes) Deleted: - test_inline_think_blocks_reasoning_only_accepted: tested unimplemented inline <think> handling
2026-04-13 10:50:24 -07:00
# Target the checker thread's ident so it sees the interrupt
set_interrupt(True, thread_id=t.ident)
t.join(timeout=1)
assert seen["value"]
fix(tests): fix 78 CI test failures and remove dead test (#9036) Production fixes: - voice_mode.py: add is_recording property to AudioRecorder (parity with TermuxAudioRecorder) - cronjob_tools.py: add sms example to deliver description Test fixes: - test_real_interrupt_subagent: add missing _execution_thread_id (fixes 19 cascading failures from leaked _build_system_prompt patch) - test_anthropic_error_handling: add _FakeMessages, override _interruptible_streaming_api_call (6 fixes) - test_ctx_halving_fix: add missing request_overrides attribute (4 fixes) - test_context_token_tracking: set _disable_streaming=True for non-streaming test path (4 fixes) - test_dict_tool_call_args: set _disable_streaming=True (1 fix) - test_provider_parity: add model='gpt-4o' for AIGateway tests to meet 64K minimum context (4 fixes) - test_session_race_guard: add user_id to SessionSource (5 fixes) - test_restart_drain/helpers: add user_id to SessionSource (2 fixes) - test_telegram_photo_interrupts: add user_id to SessionSource - test_interrupt: target thread_id for per-thread interrupt system (2 fixes) - test_zombie_process_cleanup: rewrite with object.__new__ for refactored GatewayRunner.stop() (1 fix) - test_browser_camofox_state: update config version 15->17 (1 fix) - test_trajectory_compressor_async: widen lookback window 10->20 for line-shifted AsyncOpenAI (1 fix) - test_voice_mode: fixed by production is_recording addition (5 fixes) - test_voice_cli_integration: add _attached_images to CLI stub (2 fixes) - test_hermes_logging: explicit propagation/level reset for cross-test pollution defense (1 fix) - test_run_agent: add base_url for OpenRouter detection tests (2 fixes) Deleted: - test_inline_think_blocks_reasoning_only_accepted: tested unimplemented inline <think> handling
2026-04-13 10:50:24 -07:00
set_interrupt(False, thread_id=t.ident)
# ---------------------------------------------------------------------------
# Unit tests: pre-tool interrupt check
# ---------------------------------------------------------------------------
class TestPreToolCheck:
"""Verify that _execute_tool_calls skips all tools when interrupted."""
def test_all_tools_skipped_when_interrupted(self):
"""Mock an interrupted agent and verify no tools execute."""
from unittest.mock import MagicMock, patch
# Build a fake assistant_message with 3 tool calls
tc1 = MagicMock()
tc1.id = "tc_1"
tc1.function.name = "terminal"
tc1.function.arguments = '{"command": "rm -rf /"}'
tc2 = MagicMock()
tc2.id = "tc_2"
tc2.function.name = "terminal"
tc2.function.arguments = '{"command": "echo hello"}'
tc3 = MagicMock()
tc3.id = "tc_3"
tc3.function.name = "web_search"
tc3.function.arguments = '{"query": "test"}'
assistant_msg = MagicMock()
assistant_msg.tool_calls = [tc1, tc2, tc3]
messages = []
# Create a minimal mock agent with _interrupt_requested = True
agent = MagicMock()
agent._interrupt_requested = True
agent.log_prefix = ""
agent._persist_session = MagicMock()
# Import and call the method
import types
from run_agent import AIAgent
# Bind the real methods to our mock so dispatch works correctly
agent._execute_tool_calls_sequential = types.MethodType(AIAgent._execute_tool_calls_sequential, agent)
agent._execute_tool_calls_concurrent = types.MethodType(AIAgent._execute_tool_calls_concurrent, agent)
AIAgent._execute_tool_calls(agent, assistant_msg, messages, "default")
# All 3 should be skipped
assert len(messages) == 3
for msg in messages:
assert msg["role"] == "tool"
assert "cancelled" in msg["content"].lower() or "interrupted" in msg["content"].lower()
# No actual tool handlers should have been called
# (handle_function_call should NOT have been invoked)
# ---------------------------------------------------------------------------
# Unit tests: message combining
# ---------------------------------------------------------------------------
class TestMessageCombining:
"""Verify multiple interrupt messages are joined."""
def test_cli_interrupt_queue_drain(self):
"""Simulate draining multiple messages from the interrupt queue."""
q = queue.Queue()
q.put("Stop!")
q.put("Don't delete anything")
q.put("Show me what you were going to delete instead")
parts = []
while not q.empty():
try:
msg = q.get_nowait()
if msg:
parts.append(msg)
except queue.Empty:
break
combined = "\n".join(parts)
assert "Stop!" in combined
assert "Don't delete anything" in combined
assert "Show me what you were going to delete instead" in combined
assert combined.count("\n") == 2
def test_gateway_pending_messages_append(self):
"""Simulate gateway _pending_messages append logic."""
pending = {}
key = "agent:main:telegram:dm"
# First message
if key in pending:
pending[key] += "\n" + "Stop!"
else:
pending[key] = "Stop!"
# Second message
if key in pending:
pending[key] += "\n" + "Do something else instead"
else:
pending[key] = "Do something else instead"
assert pending[key] == "Stop!\nDo something else instead"
# ---------------------------------------------------------------------------
# Integration tests (require local terminal)
# ---------------------------------------------------------------------------
class TestSIGKILLEscalation:
"""Test that SIGTERM-resistant processes get SIGKILL'd."""
@pytest.mark.skipif(
not __import__("shutil").which("bash"),
reason="Requires bash"
)
def test_sigterm_trap_killed_within_2s(self):
"""A process that traps SIGTERM should be SIGKILL'd after 1s grace."""
from tools.interrupt import set_interrupt
from tools.environments.local import LocalEnvironment
set_interrupt(False)
env = LocalEnvironment(cwd="/tmp", timeout=30)
# Start execution in a thread, interrupt after 0.5s
result_holder = {"value": None}
def _run():
result_holder["value"] = env.execute(
"trap '' TERM; sleep 60",
timeout=30,
)
t = threading.Thread(target=_run)
t.start()
time.sleep(0.5)
fix(tests): fix 78 CI test failures and remove dead test (#9036) Production fixes: - voice_mode.py: add is_recording property to AudioRecorder (parity with TermuxAudioRecorder) - cronjob_tools.py: add sms example to deliver description Test fixes: - test_real_interrupt_subagent: add missing _execution_thread_id (fixes 19 cascading failures from leaked _build_system_prompt patch) - test_anthropic_error_handling: add _FakeMessages, override _interruptible_streaming_api_call (6 fixes) - test_ctx_halving_fix: add missing request_overrides attribute (4 fixes) - test_context_token_tracking: set _disable_streaming=True for non-streaming test path (4 fixes) - test_dict_tool_call_args: set _disable_streaming=True (1 fix) - test_provider_parity: add model='gpt-4o' for AIGateway tests to meet 64K minimum context (4 fixes) - test_session_race_guard: add user_id to SessionSource (5 fixes) - test_restart_drain/helpers: add user_id to SessionSource (2 fixes) - test_telegram_photo_interrupts: add user_id to SessionSource - test_interrupt: target thread_id for per-thread interrupt system (2 fixes) - test_zombie_process_cleanup: rewrite with object.__new__ for refactored GatewayRunner.stop() (1 fix) - test_browser_camofox_state: update config version 15->17 (1 fix) - test_trajectory_compressor_async: widen lookback window 10->20 for line-shifted AsyncOpenAI (1 fix) - test_voice_mode: fixed by production is_recording addition (5 fixes) - test_voice_cli_integration: add _attached_images to CLI stub (2 fixes) - test_hermes_logging: explicit propagation/level reset for cross-test pollution defense (1 fix) - test_run_agent: add base_url for OpenRouter detection tests (2 fixes) Deleted: - test_inline_think_blocks_reasoning_only_accepted: tested unimplemented inline <think> handling
2026-04-13 10:50:24 -07:00
set_interrupt(True, thread_id=t.ident)
t.join(timeout=5)
fix(tests): fix 78 CI test failures and remove dead test (#9036) Production fixes: - voice_mode.py: add is_recording property to AudioRecorder (parity with TermuxAudioRecorder) - cronjob_tools.py: add sms example to deliver description Test fixes: - test_real_interrupt_subagent: add missing _execution_thread_id (fixes 19 cascading failures from leaked _build_system_prompt patch) - test_anthropic_error_handling: add _FakeMessages, override _interruptible_streaming_api_call (6 fixes) - test_ctx_halving_fix: add missing request_overrides attribute (4 fixes) - test_context_token_tracking: set _disable_streaming=True for non-streaming test path (4 fixes) - test_dict_tool_call_args: set _disable_streaming=True (1 fix) - test_provider_parity: add model='gpt-4o' for AIGateway tests to meet 64K minimum context (4 fixes) - test_session_race_guard: add user_id to SessionSource (5 fixes) - test_restart_drain/helpers: add user_id to SessionSource (2 fixes) - test_telegram_photo_interrupts: add user_id to SessionSource - test_interrupt: target thread_id for per-thread interrupt system (2 fixes) - test_zombie_process_cleanup: rewrite with object.__new__ for refactored GatewayRunner.stop() (1 fix) - test_browser_camofox_state: update config version 15->17 (1 fix) - test_trajectory_compressor_async: widen lookback window 10->20 for line-shifted AsyncOpenAI (1 fix) - test_voice_mode: fixed by production is_recording addition (5 fixes) - test_voice_cli_integration: add _attached_images to CLI stub (2 fixes) - test_hermes_logging: explicit propagation/level reset for cross-test pollution defense (1 fix) - test_run_agent: add base_url for OpenRouter detection tests (2 fixes) Deleted: - test_inline_think_blocks_reasoning_only_accepted: tested unimplemented inline <think> handling
2026-04-13 10:50:24 -07:00
set_interrupt(False, thread_id=t.ident)
assert result_holder["value"] is not None
assert result_holder["value"]["returncode"] == 130
assert "interrupted" in result_holder["value"]["output"].lower()
# ---------------------------------------------------------------------------
# Manual smoke test checklist (not automated)
# ---------------------------------------------------------------------------
SMOKE_TESTS = """
Manual Smoke Test Checklist:
1. CLI: Run `hermes`, ask it to `sleep 30` in terminal, type "stop" + Enter.
Expected: command dies within 2s, agent responds to "stop".
2. CLI: Ask it to extract content from 5 URLs, type interrupt mid-way.
Expected: remaining URLs are skipped, partial results returned.
3. Gateway (Telegram): Send a long task, then send "Stop".
Expected: agent stops and responds acknowledging the stop.
4. Gateway (Telegram): Send "Stop" then "Do X instead" rapidly.
Expected: both messages appear as the next prompt (joined by newline).
5. CLI: Start a task that generates 3+ tool calls in one batch.
Type interrupt during the first tool call.
Expected: only 1 tool executes, remaining are skipped.
"""