fix(gateway): silence background agent terminal output (#3297)

* fix(gateway): silence flush agent terminal output quiet_mode=True only suppresses AIAgent init messages. Tool call output still leaks to the terminal through _safe_print → _print_fn during session reset/expiry. Since #2670 injected live memory state into the flush prompt, the flush agent now reliably calls memory tools — making the output leak noticeable for the first time. Set _print_fn to a no-op so the background flush is fully silent. * test(gateway): add test for flush agent terminal silence + fix dotenv mock - Add TestFlushAgentSilenced: verifies _print_fn is set to a no-op on the flush agent so tool output never leaks to the terminal - Fix pre-existing test failures: replace patch('run_agent.AIAgent') with sys.modules mock to avoid importing run_agent (requires openai) - Add autouse _mock_dotenv fixture so all tests in this file run without the dotenv package installed * fix(display): route KawaiiSpinner output through print_fn to fully silence flush agent The previous fix set tmp_agent._print_fn = no-op on the flush agent but spinner output and quiet-mode cute messages bypassed _print_fn entirely: - KawaiiSpinner captured sys.stdout at __init__ and wrote directly to it - quiet-mode tool results used builtin print() instead of _safe_print() Add optional print_fn parameter to KawaiiSpinner.__init__; _write routes through it when set. Pass self._print_fn to all spinner construction sites in run_agent.py and change the quiet-mode cute message print to _safe_print. The existing gateway fix (tmp_agent._print_fn = lambda) now propagates correctly through both paths. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(gateway): silence hygiene and compression background agents Two more background AIAgent instances in the gateway were created with quiet_mode=True but without _print_fn = no-op, causing tool output to leak to the terminal: - _hyg_agent (in-turn hygiene memory agent) - tmp_agent (_compress_context path) Apply the same _print_fn no-op pattern used for the flush agent. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * chore(display): remove unused _last_flush_time from KawaiiSpinner Attribute was set but never read; upstream already removed it. Leftover from conflict resolution during rebase onto upstream/main. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Dilee <uzmpsk.dilekakbas@gmail.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-26 17:40:31 -07:00
parent 08fa326bb0
commit 0375b2a0d7
4 changed files with 116 additions and 40 deletions
--- a/agent/display.py
+++ b/agent/display.py
@@ -231,7 +231,7 @@ class KawaiiSpinner:
        "analyzing", "computing", "synthesizing", "formulating", "brainstorming",
    ]

-    def __init__(self, message: str = "", spinner_type: str = 'dots'):
+    def __init__(self, message: str = "", spinner_type: str = 'dots', print_fn=None):
        self.message = message
        self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
        self.running = False
@@ -239,12 +239,26 @@ class KawaiiSpinner:
        self.frame_idx = 0
        self.start_time = None
        self.last_line_len = 0
+        # Optional callable to route all output through (e.g. a no-op for silent
+        # background agents).  When set, bypasses self._out entirely so that
+        # agents with _print_fn overridden remain fully silent.
+        self._print_fn = print_fn
        # Capture stdout NOW, before any redirect_stdout(devnull) from
        # child agents can replace sys.stdout with a black hole.
        self._out = sys.stdout

    def _write(self, text: str, end: str = '\n', flush: bool = False):
-        """Write to the stdout captured at spinner creation time."""
+        """Write to the stdout captured at spinner creation time.
+
+        If a print_fn was supplied at construction, all output is routed through
+        it instead — allowing callers to silence the spinner with a no-op lambda.
+        """
+        if self._print_fn is not None:
+            try:
+                self._print_fn(text)
+            except Exception:
+                pass
+            return
        try:
            self._out.write(text + end)
            if flush:
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -573,6 +573,10 @@ class GatewayRunner:
                session_id=old_session_id,
                honcho_session_key=honcho_session_key,
            )
+            # Fully silence the flush agent — quiet_mode only suppresses init
+            # messages; tool call output still leaks to the terminal through
+            # _safe_print → _print_fn.  Set a no-op to prevent that.
+            tmp_agent._print_fn = lambda *a, **kw: None

            # Build conversation history from transcript
            msgs = [
@@ -2175,6 +2179,7 @@ class GatewayRunner:
                                    enabled_toolsets=["memory"],
                                    session_id=session_entry.session_id,
                                )
+                                _hyg_agent._print_fn = lambda *a, **kw: None

                                loop = asyncio.get_event_loop()
                                _compressed, _ = await loop.run_in_executor(
@@ -3885,6 +3890,7 @@ class GatewayRunner:
                enabled_toolsets=["memory"],
                session_id=session_entry.session_id,
            )
+            tmp_agent._print_fn = lambda *a, **kw: None

            loop = asyncio.get_event_loop()
            compressed, _ = await loop.run_in_executor(
--- a/run_agent.py
+++ b/run_agent.py
@@ -5087,7 +5087,7 @@ class AIAgent:
        spinner = None
        if self.quiet_mode and not self.tool_progress_callback:
            face = random.choice(KawaiiSpinner.KAWAII_WAITING)
-            spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots')
+            spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots', print_fn=self._print_fn)
            spinner.start()

        try:
@@ -5128,7 +5128,7 @@ class AIAgent:
            # Print cute message per tool
            if self.quiet_mode:
                cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result)
-                print(f"  {cute_msg}")
+                self._safe_print(f"  {cute_msg}")
            elif not self.quiet_mode:
                if self.verbose_logging:
                    print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s")
@@ -5313,7 +5313,7 @@ class AIAgent:
                spinner = None
                if self.quiet_mode and not self.tool_progress_callback:
                    face = random.choice(KawaiiSpinner.KAWAII_WAITING)
-                    spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots')
+                    spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots', print_fn=self._print_fn)
                    spinner.start()
                self._delegate_spinner = spinner
                _delegate_result = None
@@ -5343,7 +5343,7 @@ class AIAgent:
                    preview = _build_tool_preview(function_name, function_args) or function_name
                    if len(preview) > 30:
                        preview = preview[:27] + "..."
-                    spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots')
+                    spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn)
                    spinner.start()
                _spinner_result = None
                try:
@@ -6026,7 +6026,7 @@ class AIAgent:
                    # Raw KawaiiSpinner only when no streaming consumers
                    # (would conflict with streamed token output)
                    spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star'])
-                    thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type)
+                    thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type, print_fn=self._print_fn)
                    thinking_spinner.start()
            
            # Log request details if verbose
--- a/tests/gateway/test_flush_memory_stale_guard.py
+++ b/tests/gateway/test_flush_memory_stale_guard.py
@@ -7,11 +7,21 @@ Verifies that:
 3. The flush still works normally when memory files don't exist
 """

+import sys
+import types
 import pytest
 from pathlib import Path
 from unittest.mock import MagicMock, patch, call


+@pytest.fixture(autouse=True)
+def _mock_dotenv(monkeypatch):
+    """gateway.run imports dotenv at module level; stub it so tests run without the package."""
+    fake = types.ModuleType("dotenv")
+    fake.load_dotenv = lambda *a, **kw: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake)
+
+
 def _make_runner():
    from gateway.run import GatewayRunner

@@ -57,105 +67,151 @@ class TestCronSessionBypass:
        runner.session_store.load_transcript.assert_called_once_with("session_abc123")


+def _make_flush_context(monkeypatch, memory_dir=None):
+    """Return (runner, tmp_agent, fake_run_agent) with run_agent mocked in sys.modules."""
+    tmp_agent = MagicMock()
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = MagicMock(return_value=tmp_agent)
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    runner = _make_runner()
+    runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
+    return runner, tmp_agent, memory_dir
+
+
 class TestMemoryInjection:
    """The flush prompt should include current memory state from disk."""

-    def test_memory_content_injected_into_flush_prompt(self, tmp_path):
+    def test_memory_content_injected_into_flush_prompt(self, tmp_path, monkeypatch):
        """When memory files exist, their content appears in the flush prompt."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
-
-        tmp_agent = MagicMock()
        memory_dir = tmp_path / "memories"
        memory_dir.mkdir()
        (memory_dir / "MEMORY.md").write_text("Agent knows Python\n§\nUser prefers dark mode")
        (memory_dir / "USER.md").write_text("Name: Alice\n§\nTimezone: PST")

+        runner, tmp_agent, _ = _make_flush_context(monkeypatch, memory_dir)
+
        with (
            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch("run_agent.AIAgent", return_value=tmp_agent),
-            # Intercept `from tools.memory_tool import MEMORY_DIR` inside the function
            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=memory_dir)}),
        ):
            runner._flush_memories_for_session("session_123")

        tmp_agent.run_conversation.assert_called_once()
-        call_kwargs = tmp_agent.run_conversation.call_args.kwargs
-        flush_prompt = call_kwargs.get("user_message", "")
-        
-        # Verify both memory sections appear in the prompt
+        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
+
        assert "Agent knows Python" in flush_prompt
        assert "User prefers dark mode" in flush_prompt
        assert "Name: Alice" in flush_prompt
        assert "Timezone: PST" in flush_prompt
-        # Verify the stale-overwrite warning is present
        assert "Do NOT overwrite or remove entries" in flush_prompt
        assert "current live state of memory" in flush_prompt

-    def test_flush_works_without_memory_files(self, tmp_path):
+    def test_flush_works_without_memory_files(self, tmp_path, monkeypatch):
        """When no memory files exist, flush still runs without the guard."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
-
-        tmp_agent = MagicMock()
        empty_dir = tmp_path / "no_memories"
        empty_dir.mkdir()

+        runner, tmp_agent, _ = _make_flush_context(monkeypatch)
+
        with (
            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch("run_agent.AIAgent", return_value=tmp_agent),
            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=empty_dir)}),
        ):
            runner._flush_memories_for_session("session_456")

-        # Should still run, just without the memory guard section
        tmp_agent.run_conversation.assert_called_once()
        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
        assert "Do NOT overwrite or remove entries" not in flush_prompt
        assert "Review the conversation above" in flush_prompt

-    def test_empty_memory_files_no_injection(self, tmp_path):
+    def test_empty_memory_files_no_injection(self, tmp_path, monkeypatch):
        """Empty memory files should not trigger the guard section."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
-
-        tmp_agent = MagicMock()
        memory_dir = tmp_path / "memories"
        memory_dir.mkdir()
        (memory_dir / "MEMORY.md").write_text("")
        (memory_dir / "USER.md").write_text("  \n  ")  # whitespace only

+        runner, tmp_agent, _ = _make_flush_context(monkeypatch)
+
        with (
            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch("run_agent.AIAgent", return_value=tmp_agent),
            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=memory_dir)}),
        ):
            runner._flush_memories_for_session("session_789")

        tmp_agent.run_conversation.assert_called_once()
        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
-        # No memory content → no guard section
        assert "current live state of memory" not in flush_prompt


+class TestFlushAgentSilenced:
+    """The flush agent must not produce any terminal output."""
+
+    def test_print_fn_set_to_noop(self, tmp_path, monkeypatch):
+        """_print_fn on the flush agent must be a no-op so tool output never leaks."""
+        runner = _make_runner()
+        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
+
+        captured_agent = {}
+
+        def _fake_ai_agent(*args, **kwargs):
+            agent = MagicMock()
+            captured_agent["instance"] = agent
+            return agent
+
+        fake_run_agent = types.ModuleType("run_agent")
+        fake_run_agent.AIAgent = _fake_ai_agent
+        monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+        with (
+            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
+            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
+            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=tmp_path)}),
+        ):
+            runner._flush_memories_for_session("session_silent")
+
+        agent = captured_agent["instance"]
+        assert agent._print_fn is not None, "_print_fn should be overridden to suppress output"
+        # Confirm it is callable and produces no output (no exception)
+        agent._print_fn("should be silenced")
+
+    def test_kawaii_spinner_respects_print_fn(self):
+        """KawaiiSpinner must route all output through print_fn when supplied."""
+        from agent.display import KawaiiSpinner
+
+        written = []
+        spinner = KawaiiSpinner("test", print_fn=lambda *a, **kw: written.append(a))
+        spinner._write("hello")
+        assert written == [("hello",)], "spinner should route through print_fn"
+
+        # A no-op print_fn must produce no output to stdout
+        import io, sys
+        buf = io.StringIO()
+        old_stdout = sys.stdout
+        sys.stdout = buf
+        try:
+            silent_spinner = KawaiiSpinner("silent", print_fn=lambda *a, **kw: None)
+            silent_spinner._write("should not appear")
+            silent_spinner.stop("done")
+        finally:
+            sys.stdout = old_stdout
+        assert buf.getvalue() == "", "no-op print_fn spinner must not write to stdout"
+
+
 class TestFlushPromptStructure:
    """Verify the flush prompt retains its core instructions."""

-    def test_core_instructions_present(self):
+    def test_core_instructions_present(self, monkeypatch):
        """The flush prompt should still contain the original guidance."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
-
-        tmp_agent = MagicMock()
+        runner, tmp_agent, _ = _make_flush_context(monkeypatch)

        with (
            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch("run_agent.AIAgent", return_value=tmp_agent),
-            # Make the import fail gracefully so we test without memory files
            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=Path("/nonexistent"))}),
        ):
            runner._flush_memories_for_session("session_struct")