tests/run_interrupt_test.py

#!/usr/bin/env python3
"""Run a real interrupt test with actual AIAgent + delegate child.

Not a pytest test — runs directly as a script for live testing.
"""

import threading
import time
import sys
import os

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from unittest.mock import MagicMock, patch
from run_agent import AIAgent, IterationBudget
from tools.delegate_tool import _run_single_child
from tools.interrupt import set_interrupt, is_interrupted

def main() -> int:
    set_interrupt(False)

    # Create parent agent (minimal)
    parent = AIAgent.__new__(AIAgent)
    parent._interrupt_requested = False
    parent._interrupt_message = None
    parent._active_children = []
    parent._active_children_lock = threading.Lock()
    parent.quiet_mode = True
    parent.model = "test/model"
    parent.base_url = "http://localhost:1"
    parent.api_key = "test"
    parent.provider = "test"
    parent.api_mode = "chat_completions"
    parent.platform = "cli"
    parent.enabled_toolsets = ["terminal", "file"]
    parent.providers_allowed = None
    parent.providers_ignored = None
    parent.providers_order = None
    parent.provider_sort = None
    parent.max_tokens = None
    parent.reasoning_config = None
    parent.prefill_messages = None
    parent._session_db = None
    parent._delegate_depth = 0
    parent._delegate_spinner = None
    parent.tool_progress_callback = None
    parent.iteration_budget = IterationBudget(max_total=100)
    parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"}

    child_started = threading.Event()
    result_holder = [None]

    def run_delegate():
        with patch("run_agent.OpenAI") as MockOpenAI:
            mock_client = MagicMock()

            def slow_create(**kwargs):
                time.sleep(3)
                resp = MagicMock()
                resp.choices = [MagicMock()]
                resp.choices[0].message.content = "Done"
                resp.choices[0].message.tool_calls = None
                resp.choices[0].message.refusal = None
                resp.choices[0].finish_reason = "stop"
                resp.usage.prompt_tokens = 100
                resp.usage.completion_tokens = 10
                resp.usage.total_tokens = 110
                resp.usage.prompt_tokens_details = None
                return resp

            mock_client.chat.completions.create = slow_create
            mock_client.close = MagicMock()
            MockOpenAI.return_value = mock_client

            original_init = AIAgent.__init__

            def patched_init(self_agent, *a, **kw):
                original_init(self_agent, *a, **kw)
                child_started.set()

            with patch.object(AIAgent, "__init__", patched_init):
                try:
                    result = _run_single_child(
                        task_index=0,
                        goal="Test slow task",
                        context=None,
                        toolsets=["terminal"],
                        model="test/model",
                        max_iterations=5,
                        parent_agent=parent,
                        task_count=1,
                        override_provider="test",
                        override_base_url="http://localhost:1",
                        override_api_key="test",
                        override_api_mode="chat_completions",
                    )
                    result_holder[0] = result
                except Exception as e:
                    print(f"ERROR in delegate: {e}")
                    import traceback
                    traceback.print_exc()

    print("Starting agent thread...")
    agent_thread = threading.Thread(target=run_delegate, daemon=True)
    agent_thread.start()

    started = child_started.wait(timeout=10)
    if not started:
        print("ERROR: Child never started")
        set_interrupt(False)
        return 1

    time.sleep(0.5)

    print(f"Active children: {len(parent._active_children)}")
    for i, c in enumerate(parent._active_children):
        print(f"  Child {i}: _interrupt_requested={c._interrupt_requested}")

    t0 = time.monotonic()
    parent.interrupt("User typed a new message")
    print("Called parent.interrupt()")

    for i, c in enumerate(parent._active_children):
        print(f"  Child {i} after interrupt: _interrupt_requested={c._interrupt_requested}")
    print(f"Global is_interrupted: {is_interrupted()}")

    agent_thread.join(timeout=10)
    elapsed = time.monotonic() - t0
    print(f"Agent thread finished in {elapsed:.2f}s")

    result = result_holder[0]
    if result:
        print(f"Status: {result['status']}")
        print(f"Duration: {result['duration_seconds']}s")
        if elapsed < 2.0:
            print("✅ PASS: Interrupt detected quickly!")
        else:
            print(f"❌ FAIL: Took {elapsed:.2f}s — interrupt was too slow or not detected")
    else:
        print("❌ FAIL: No result!")

    set_interrupt(False)
    return 0


if __name__ == "__main__":
    sys.exit(main())
fix: use session_key instead of chat_id for adapter interrupt lookups * fix: use session_key instead of chat_id for adapter interrupt lookups monitor_for_interrupt() in _run_agent was using source.chat_id to query the adapter's has_pending_interrupt() and get_pending_message() methods. But the adapter stores interrupt events under build_session_key(source), which produces a different string (e.g. 'agent:main:telegram:dm' vs '123456'). This key mismatch meant the interrupt was never detected through the adapter path, which is the only active interrupt path for all adapter-based platforms (Telegram, Discord, Slack, etc.). The gateway-level interrupt path (in dispatch_message) is unreachable because the adapter intercepts the 2nd message in handle_message() before it reaches dispatch_message(). Result: sending a new message while subagents were running had no effect — the interrupt was silently lost. Fix: replace all source.chat_id references in the interrupt-related code within _run_agent() with the session_key parameter, which matches the adapter's storage keys. Also adds regression tests verifying session_key vs chat_id consistency. * debug: add file-based logging to CLI interrupt path Temporary instrumentation to diagnose why message-based interrupts don't seem to work during subagent execution. Logs to ~/.hermes/interrupt_debug.log (immune to redirect_stdout). Two log points: 1. When Enter handler puts message into _interrupt_queue 2. When chat() reads it and calls agent.interrupt() This will reveal whether the message reaches the queue and whether the interrupt is actually fired. 2026-03-12 08:35:45 -07:00			`#!/usr/bin/env python3`
			`"""Run a real interrupt test with actual AIAgent + delegate child.`

			`Not a pytest test — runs directly as a script for live testing.`
			`"""`

			`import threading`
			`import time`
			`import sys`
			`import os`

			`sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))`

			`from unittest.mock import MagicMock, patch`
			`from run_agent import AIAgent, IterationBudget`
			`from tools.delegate_tool import _run_single_child`
			`from tools.interrupt import set_interrupt, is_interrupted`

fix(honcho): isolate session routing for multi-user gateway (#1500) Salvaged from PR #1470 by adavyas. Core fix: Honcho tool calls in a multi-session gateway could route to the wrong session because honcho_tools.py relied on process-global state. Now threads session context through the call chain: AIAgent._invoke_tool() → handle_function_call() → registry.dispatch() → handler **kw → _resolve_session_context() Changes: - Add _resolve_session_context() to prefer per-call context over globals - Plumb honcho_manager + honcho_session_key through handle_function_call - Add sync_honcho=False to run_conversation() for synthetic flush turns - Pass honcho_session_key through gateway memory flush lifecycle - Harden gateway PID detection when /proc cmdline is unreadable - Make interrupt test scripts import-safe for pytest-xdist - Wrap BibTeX examples in Jekyll raw blocks for docs build - Fix thread-order-dependent assertion in client lifecycle test - Expand Honcho docs: session isolation, lifecycle, routing internals Dropped from original PR: - Indentation change in _create_request_openai_client that would move client creation inside the lock (causes unnecessary contention) Co-authored-by: adavyas <adavyas@users.noreply.github.com> 2026-03-16 00:23:47 -07:00			`def main() -> int:`
			`set_interrupt(False)`

			`# Create parent agent (minimal)`
			`parent = AIAgent.__new__(AIAgent)`
			`parent._interrupt_requested = False`
			`parent._interrupt_message = None`
			`parent._active_children = []`
fix: thread safety for concurrent subagent delegation (#1672) * fix: thread safety for concurrent subagent delegation Four thread-safety fixes that prevent crashes and data races when running multiple subagents concurrently via delegate_task: 1. Remove redirect_stdout/stderr from delegate_tool — mutating global sys.stdout races with the spinner thread when multiple children start concurrently, causing segfaults. Children already run with quiet_mode=True so the redirect was redundant. 2. Split _run_single_child into _build_child_agent (main thread) + _run_single_child (worker thread). AIAgent construction creates httpx/SSL clients which are not thread-safe to initialize concurrently. 3. Add threading.Lock to SessionDB — subagents share the parent's SessionDB and call create_session/append_message from worker threads with no synchronization. 4. Add _active_children_lock to AIAgent — interrupt() iterates _active_children while worker threads append/remove children. 5. Add _client_cache_lock to auxiliary_client — multiple subagent threads may resolve clients concurrently via call_llm(). Based on PR #1471 by peteromallet. * feat: Honcho base_url override via config.yaml + quick command alias type Two features salvaged from PR #1576: 1. Honcho base_url override: allows pointing Hermes at a remote self-hosted Honcho deployment via config.yaml: honcho: base_url: "http://192.168.x.x:8000" When set, this overrides the Honcho SDK's environment mapping (production/local), enabling LAN/VPN Honcho deployments without requiring the server to live on localhost. Uses config.yaml instead of env var (HONCHO_URL) per project convention. 2. Quick command alias type: adds a new 'alias' quick command type that rewrites to another slash command before normal dispatch: quick_commands: sc: type: alias target: /context Supports both CLI and gateway. Arguments are forwarded to the target command. Based on PR #1576 by redhelix. --------- Co-authored-by: peteromallet <peteromallet@users.noreply.github.com> Co-authored-by: redhelix <redhelix@users.noreply.github.com> 2026-03-17 02:53:33 -07:00			`parent._active_children_lock = threading.Lock()`
fix(honcho): isolate session routing for multi-user gateway (#1500) Salvaged from PR #1470 by adavyas. Core fix: Honcho tool calls in a multi-session gateway could route to the wrong session because honcho_tools.py relied on process-global state. Now threads session context through the call chain: AIAgent._invoke_tool() → handle_function_call() → registry.dispatch() → handler **kw → _resolve_session_context() Changes: - Add _resolve_session_context() to prefer per-call context over globals - Plumb honcho_manager + honcho_session_key through handle_function_call - Add sync_honcho=False to run_conversation() for synthetic flush turns - Pass honcho_session_key through gateway memory flush lifecycle - Harden gateway PID detection when /proc cmdline is unreadable - Make interrupt test scripts import-safe for pytest-xdist - Wrap BibTeX examples in Jekyll raw blocks for docs build - Fix thread-order-dependent assertion in client lifecycle test - Expand Honcho docs: session isolation, lifecycle, routing internals Dropped from original PR: - Indentation change in _create_request_openai_client that would move client creation inside the lock (causes unnecessary contention) Co-authored-by: adavyas <adavyas@users.noreply.github.com> 2026-03-16 00:23:47 -07:00			`parent.quiet_mode = True`
			`parent.model = "test/model"`
			`parent.base_url = "http://localhost:1"`
			`parent.api_key = "test"`
			`parent.provider = "test"`
			`parent.api_mode = "chat_completions"`
			`parent.platform = "cli"`
			`parent.enabled_toolsets = ["terminal", "file"]`
			`parent.providers_allowed = None`
			`parent.providers_ignored = None`
			`parent.providers_order = None`
			`parent.provider_sort = None`
			`parent.max_tokens = None`
			`parent.reasoning_config = None`
			`parent.prefill_messages = None`
			`parent._session_db = None`
			`parent._delegate_depth = 0`
			`parent._delegate_spinner = None`
			`parent.tool_progress_callback = None`
			`parent.iteration_budget = IterationBudget(max_total=100)`
			`parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"}`

			`child_started = threading.Event()`
			`result_holder = [None]`

			`def run_delegate():`
			`with patch("run_agent.OpenAI") as MockOpenAI:`
			`mock_client = MagicMock()`

			`def slow_create(**kwargs):`
			`time.sleep(3)`
			`resp = MagicMock()`
			`resp.choices = [MagicMock()]`
			`resp.choices[0].message.content = "Done"`
			`resp.choices[0].message.tool_calls = None`
			`resp.choices[0].message.refusal = None`
			`resp.choices[0].finish_reason = "stop"`
			`resp.usage.prompt_tokens = 100`
			`resp.usage.completion_tokens = 10`
			`resp.usage.total_tokens = 110`
			`resp.usage.prompt_tokens_details = None`
			`return resp`

			`mock_client.chat.completions.create = slow_create`
			`mock_client.close = MagicMock()`
			`MockOpenAI.return_value = mock_client`

			`original_init = AIAgent.__init__`

			`def patched_init(self_agent, a, *kw):`
			`original_init(self_agent, a, *kw)`
			`child_started.set()`

			`with patch.object(AIAgent, "__init__", patched_init):`
			`try:`
			`result = _run_single_child(`
			`task_index=0,`
			`goal="Test slow task",`
			`context=None,`
			`toolsets=["terminal"],`
			`model="test/model",`
			`max_iterations=5,`
			`parent_agent=parent,`
			`task_count=1,`
			`override_provider="test",`
			`override_base_url="http://localhost:1",`
			`override_api_key="test",`
			`override_api_mode="chat_completions",`
			`)`
			`result_holder[0] = result`
			`except Exception as e:`
			`print(f"ERROR in delegate: {e}")`
			`import traceback`
			`traceback.print_exc()`

			`print("Starting agent thread...")`
			`agent_thread = threading.Thread(target=run_delegate, daemon=True)`
			`agent_thread.start()`

			`started = child_started.wait(timeout=10)`
			`if not started:`
			`print("ERROR: Child never started")`
			`set_interrupt(False)`
			`return 1`

			`time.sleep(0.5)`

			`print(f"Active children: {len(parent._active_children)}")`
			`for i, c in enumerate(parent._active_children):`
			`print(f" Child {i}: _interrupt_requested={c._interrupt_requested}")`

			`t0 = time.monotonic()`
			`parent.interrupt("User typed a new message")`
			`print("Called parent.interrupt()")`

			`for i, c in enumerate(parent._active_children):`
			`print(f" Child {i} after interrupt: _interrupt_requested={c._interrupt_requested}")`
			`print(f"Global is_interrupted: {is_interrupted()}")`

			`agent_thread.join(timeout=10)`
			`elapsed = time.monotonic() - t0`
			`print(f"Agent thread finished in {elapsed:.2f}s")`

			`result = result_holder[0]`
			`if result:`
			`print(f"Status: {result['status']}")`
			`print(f"Duration: {result['duration_seconds']}s")`
			`if elapsed < 2.0:`
			`print("✅ PASS: Interrupt detected quickly!")`
			`else:`
			`print(f"❌ FAIL: Took {elapsed:.2f}s — interrupt was too slow or not detected")`
fix: use session_key instead of chat_id for adapter interrupt lookups * fix: use session_key instead of chat_id for adapter interrupt lookups monitor_for_interrupt() in _run_agent was using source.chat_id to query the adapter's has_pending_interrupt() and get_pending_message() methods. But the adapter stores interrupt events under build_session_key(source), which produces a different string (e.g. 'agent:main:telegram:dm' vs '123456'). This key mismatch meant the interrupt was never detected through the adapter path, which is the only active interrupt path for all adapter-based platforms (Telegram, Discord, Slack, etc.). The gateway-level interrupt path (in dispatch_message) is unreachable because the adapter intercepts the 2nd message in handle_message() before it reaches dispatch_message(). Result: sending a new message while subagents were running had no effect — the interrupt was silently lost. Fix: replace all source.chat_id references in the interrupt-related code within _run_agent() with the session_key parameter, which matches the adapter's storage keys. Also adds regression tests verifying session_key vs chat_id consistency. * debug: add file-based logging to CLI interrupt path Temporary instrumentation to diagnose why message-based interrupts don't seem to work during subagent execution. Logs to ~/.hermes/interrupt_debug.log (immune to redirect_stdout). Two log points: 1. When Enter handler puts message into _interrupt_queue 2. When chat() reads it and calls agent.interrupt() This will reveal whether the message reaches the queue and whether the interrupt is actually fired. 2026-03-12 08:35:45 -07:00			`else:`
fix(honcho): isolate session routing for multi-user gateway (#1500) Salvaged from PR #1470 by adavyas. Core fix: Honcho tool calls in a multi-session gateway could route to the wrong session because honcho_tools.py relied on process-global state. Now threads session context through the call chain: AIAgent._invoke_tool() → handle_function_call() → registry.dispatch() → handler **kw → _resolve_session_context() Changes: - Add _resolve_session_context() to prefer per-call context over globals - Plumb honcho_manager + honcho_session_key through handle_function_call - Add sync_honcho=False to run_conversation() for synthetic flush turns - Pass honcho_session_key through gateway memory flush lifecycle - Harden gateway PID detection when /proc cmdline is unreadable - Make interrupt test scripts import-safe for pytest-xdist - Wrap BibTeX examples in Jekyll raw blocks for docs build - Fix thread-order-dependent assertion in client lifecycle test - Expand Honcho docs: session isolation, lifecycle, routing internals Dropped from original PR: - Indentation change in _create_request_openai_client that would move client creation inside the lock (causes unnecessary contention) Co-authored-by: adavyas <adavyas@users.noreply.github.com> 2026-03-16 00:23:47 -07:00			`print("❌ FAIL: No result!")`
fix: use session_key instead of chat_id for adapter interrupt lookups * fix: use session_key instead of chat_id for adapter interrupt lookups monitor_for_interrupt() in _run_agent was using source.chat_id to query the adapter's has_pending_interrupt() and get_pending_message() methods. But the adapter stores interrupt events under build_session_key(source), which produces a different string (e.g. 'agent:main:telegram:dm' vs '123456'). This key mismatch meant the interrupt was never detected through the adapter path, which is the only active interrupt path for all adapter-based platforms (Telegram, Discord, Slack, etc.). The gateway-level interrupt path (in dispatch_message) is unreachable because the adapter intercepts the 2nd message in handle_message() before it reaches dispatch_message(). Result: sending a new message while subagents were running had no effect — the interrupt was silently lost. Fix: replace all source.chat_id references in the interrupt-related code within _run_agent() with the session_key parameter, which matches the adapter's storage keys. Also adds regression tests verifying session_key vs chat_id consistency. * debug: add file-based logging to CLI interrupt path Temporary instrumentation to diagnose why message-based interrupts don't seem to work during subagent execution. Logs to ~/.hermes/interrupt_debug.log (immune to redirect_stdout). Two log points: 1. When Enter handler puts message into _interrupt_queue 2. When chat() reads it and calls agent.interrupt() This will reveal whether the message reaches the queue and whether the interrupt is actually fired. 2026-03-12 08:35:45 -07:00
fix(honcho): isolate session routing for multi-user gateway (#1500) Salvaged from PR #1470 by adavyas. Core fix: Honcho tool calls in a multi-session gateway could route to the wrong session because honcho_tools.py relied on process-global state. Now threads session context through the call chain: AIAgent._invoke_tool() → handle_function_call() → registry.dispatch() → handler **kw → _resolve_session_context() Changes: - Add _resolve_session_context() to prefer per-call context over globals - Plumb honcho_manager + honcho_session_key through handle_function_call - Add sync_honcho=False to run_conversation() for synthetic flush turns - Pass honcho_session_key through gateway memory flush lifecycle - Harden gateway PID detection when /proc cmdline is unreadable - Make interrupt test scripts import-safe for pytest-xdist - Wrap BibTeX examples in Jekyll raw blocks for docs build - Fix thread-order-dependent assertion in client lifecycle test - Expand Honcho docs: session isolation, lifecycle, routing internals Dropped from original PR: - Indentation change in _create_request_openai_client that would move client creation inside the lock (causes unnecessary contention) Co-authored-by: adavyas <adavyas@users.noreply.github.com> 2026-03-16 00:23:47 -07:00			`set_interrupt(False)`
			`return 0`


			`if __name__ == "__main__":`
			`sys.exit(main())`