Files
hermes-agent/tests/run_agent/test_real_interrupt_subagent.py
Siddharth Balyan f3006ebef9 refactor(tests): re-architect tests + fix CI failures (#5946)
* refactor: re-architect tests to mirror the codebase

* Update tests.yml

* fix: add missing tool_error imports after registry refactor

* fix(tests): replace patch.dict with monkeypatch to prevent env var leaks under xdist

patch.dict(os.environ) can leak TERMINAL_ENV across xdist workers,
causing test_code_execution tests to hit the Modal remote path.

* fix(tests): fix update_check and telegram xdist failures

- test_update_check: replace patch("hermes_cli.banner.os.getenv") with
  monkeypatch.setenv("HERMES_HOME") — banner.py no longer imports os
  directly, it uses get_hermes_home() from hermes_constants.

- test_telegram_conflict/approval_buttons: provide real exception classes
  for telegram.error mock (NetworkError, TimedOut, BadRequest) so the
  except clause in connect() doesn't fail with "catching classes that do
  not inherit from BaseException" when xdist pollutes sys.modules.

* fix(tests): accept unavailable_models kwarg in _prompt_model_selection mock
2026-04-07 17:19:07 -07:00

187 lines
7.4 KiB
Python

"""Test real interrupt propagation through delegate_task with actual AIAgent.
This uses a real AIAgent with mocked HTTP responses to test the complete
interrupt flow through _run_single_child → child.run_conversation().
"""
import json
import os
import threading
import time
import unittest
from unittest.mock import MagicMock, patch, PropertyMock
from tools.interrupt import set_interrupt, is_interrupted
def _make_slow_api_response(delay=5.0):
"""Create a mock that simulates a slow API response (like a real LLM call)."""
def slow_create(**kwargs):
# Simulate a slow API call
time.sleep(delay)
# Return a simple text response (no tool calls)
resp = MagicMock()
resp.choices = [MagicMock()]
resp.choices[0].message = MagicMock()
resp.choices[0].message.content = "Done"
resp.choices[0].message.tool_calls = None
resp.choices[0].message.refusal = None
resp.choices[0].finish_reason = "stop"
resp.usage = MagicMock()
resp.usage.prompt_tokens = 100
resp.usage.completion_tokens = 10
resp.usage.total_tokens = 110
resp.usage.prompt_tokens_details = None
return resp
return slow_create
class TestRealSubagentInterrupt(unittest.TestCase):
"""Test interrupt with real AIAgent child through delegate_tool."""
def setUp(self):
set_interrupt(False)
os.environ.setdefault("OPENAI_API_KEY", "test-key")
def tearDown(self):
set_interrupt(False)
def test_interrupt_child_during_api_call(self):
"""Real AIAgent child interrupted while making API call."""
from run_agent import AIAgent, IterationBudget
# Create a real parent agent (just enough to be a parent)
parent = AIAgent.__new__(AIAgent)
parent._interrupt_requested = False
parent._interrupt_message = None
parent._active_children = []
parent._active_children_lock = threading.Lock()
parent.quiet_mode = True
parent.model = "test/model"
parent.base_url = "http://localhost:1"
parent.api_key = "test"
parent.provider = "test"
parent.api_mode = "chat_completions"
parent.platform = "cli"
parent.enabled_toolsets = ["terminal", "file"]
parent.providers_allowed = None
parent.providers_ignored = None
parent.providers_order = None
parent.provider_sort = None
parent.max_tokens = None
parent.reasoning_config = None
parent.prefill_messages = None
parent._session_db = None
parent._delegate_depth = 0
parent._delegate_spinner = None
parent.tool_progress_callback = None
parent.iteration_budget = IterationBudget(max_total=100)
parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"}
from tools.delegate_tool import _run_single_child
child_started = threading.Event()
result_holder = [None]
error_holder = [None]
def run_delegate():
try:
# Patch the OpenAI client creation inside AIAgent.__init__
with patch('run_agent.OpenAI') as MockOpenAI:
mock_client = MagicMock()
# API call takes 5 seconds — should be interrupted before that
mock_client.chat.completions.create = _make_slow_api_response(delay=5.0)
mock_client.close = MagicMock()
MockOpenAI.return_value = mock_client
# Patch the instance method so it skips prompt assembly
with patch.object(AIAgent, '_build_system_prompt', return_value="You are a test agent"):
# Signal when child starts
original_run = AIAgent.run_conversation
def patched_run(self_agent, *args, **kwargs):
child_started.set()
return original_run(self_agent, *args, **kwargs)
with patch.object(AIAgent, 'run_conversation', patched_run):
# Build a real child agent (AIAgent is NOT patched here,
# only run_conversation and _build_system_prompt are)
child = AIAgent(
base_url="http://localhost:1",
api_key="test-key",
model="test/model",
provider="test",
api_mode="chat_completions",
max_iterations=5,
enabled_toolsets=["terminal"],
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
platform="cli",
)
child._delegate_depth = 1
parent._active_children.append(child)
result = _run_single_child(
task_index=0,
goal="Test task",
child=child,
parent_agent=parent,
)
result_holder[0] = result
except Exception as e:
import traceback
traceback.print_exc()
error_holder[0] = e
agent_thread = threading.Thread(target=run_delegate, daemon=True)
agent_thread.start()
# Wait for child to start run_conversation
started = child_started.wait(timeout=10)
if not started:
agent_thread.join(timeout=1)
if error_holder[0]:
raise error_holder[0]
self.fail("Child never started run_conversation")
# Give child time to enter main loop and start API call
time.sleep(0.5)
# Verify child is registered
print(f"Active children: {len(parent._active_children)}")
self.assertGreaterEqual(len(parent._active_children), 1,
"Child not registered in _active_children")
# Interrupt! (simulating what CLI does)
start = time.monotonic()
parent.interrupt("User typed a new message")
# Check propagation
child = parent._active_children[0] if parent._active_children else None
if child:
print(f"Child._interrupt_requested after parent.interrupt(): {child._interrupt_requested}")
self.assertTrue(child._interrupt_requested,
"Interrupt did not propagate to child!")
# Wait for delegate to finish (should be fast since interrupted)
agent_thread.join(timeout=5)
elapsed = time.monotonic() - start
if error_holder[0]:
raise error_holder[0]
result = result_holder[0]
self.assertIsNotNone(result, "Delegate returned no result")
print(f"Result status: {result['status']}, elapsed: {elapsed:.2f}s")
print(f"Full result: {result}")
# The child should have been interrupted, not completed the full 5s API call
self.assertLess(elapsed, 3.0,
f"Took {elapsed:.2f}s — interrupt was not detected quickly enough")
self.assertEqual(result["status"], "interrupted",
f"Expected 'interrupted', got '{result['status']}'")
if __name__ == "__main__":
unittest.main()