fix: guard init-time stdio writes

This commit is contained in:
teknium1
2026-03-14 02:19:46 -07:00
parent 163fa4a9d1
commit 936040d8f7
2 changed files with 56 additions and 12 deletions

View File

@@ -110,18 +110,17 @@ HONCHO_TOOL_NAMES = {
class _SafeWriter:
"""Transparent stdout wrapper that catches OSError from broken pipes.
"""Transparent stdio wrapper that catches OSError from broken pipes.
When hermes-agent runs as a systemd service, Docker container, or headless
daemon, the stdout pipe can become unavailable (idle timeout, buffer
daemon, the stdout/stderr pipe can become unavailable (idle timeout, buffer
exhaustion, socket reset). Any print() call then raises
``OSError: [Errno 5] Input/output error``, which can crash
run_conversation() — especially via double-fault when the except handler
``OSError: [Errno 5] Input/output error``, which can crash agent setup or
run_conversation() — especially via double-fault when an except handler
also tries to print.
This wrapper delegates all writes to the underlying stream and silently
catches OSError. It is installed once at the start of run_conversation()
and is transparent when stdout is healthy (zero overhead on the happy path).
catches OSError. It is transparent when the wrapped stream is healthy.
"""
__slots__ = ("_inner",)
@@ -154,6 +153,14 @@ class _SafeWriter:
return getattr(self._inner, name)
def _install_safe_stdio() -> None:
"""Wrap stdout/stderr so best-effort console output cannot crash the agent."""
for stream_name in ("stdout", "stderr"):
stream = getattr(sys, stream_name, None)
if stream is not None and not isinstance(stream, _SafeWriter):
setattr(sys, stream_name, _SafeWriter(stream))
class IterationBudget:
"""Thread-safe shared iteration counter for parent and child agents.
@@ -324,6 +331,8 @@ class AIAgent:
honcho_manager: Optional shared HonchoSessionManager owned by the caller.
honcho_config: Optional HonchoClientConfig corresponding to honcho_manager.
"""
_install_safe_stdio()
self.model = model
self.max_iterations = max_iterations
# Shared iteration budget — parent creates, children inherit.
@@ -3868,10 +3877,9 @@ class AIAgent:
Returns:
Dict: Complete conversation result with final response and message history
"""
# Guard stdout against OSError from broken pipes (systemd/headless/daemon).
# Installed once, transparent when stdout is healthy, prevents crash on write.
if not isinstance(sys.stdout, _SafeWriter):
sys.stdout = _SafeWriter(sys.stdout)
# Guard stdio against OSError from broken pipes (systemd/headless/daemon).
# Installed once, transparent when streams are healthy, prevents crash on write.
_install_safe_stdio()
# Generate unique task_id if not provided to isolate VMs between concurrent tasks
effective_task_id = task_id or str(uuid.uuid4())

View File

@@ -1800,12 +1800,13 @@ class TestSafeWriter:
sys.stdout = original
def test_installed_in_run_conversation(self, agent):
"""run_conversation installs _SafeWriter on sys.stdout."""
"""run_conversation installs _SafeWriter on stdio."""
import sys
from run_agent import _SafeWriter
resp = _mock_response(content="Done", finish_reason="stop")
agent.client.chat.completions.create.return_value = resp
original = sys.stdout
original_stdout = sys.stdout
original_stderr = sys.stderr
try:
with (
patch.object(agent, "_persist_session"),
@@ -1814,6 +1815,41 @@ class TestSafeWriter:
):
agent.run_conversation("test")
assert isinstance(sys.stdout, _SafeWriter)
assert isinstance(sys.stderr, _SafeWriter)
finally:
sys.stdout = original_stdout
sys.stderr = original_stderr
def test_installed_before_init_time_honcho_error_prints(self):
"""AIAgent.__init__ wraps stdout before Honcho fallback prints can fire."""
import sys
from run_agent import _SafeWriter
broken = MagicMock()
broken.write.side_effect = OSError(5, "Input/output error")
broken.flush.side_effect = OSError(5, "Input/output error")
original = sys.stdout
sys.stdout = broken
try:
hcfg = HonchoClientConfig(enabled=True, api_key="test-honcho-key")
with (
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
patch("hermes_cli.config.load_config", return_value={"memory": {}}),
patch("honcho_integration.client.HonchoClientConfig.from_global_config", return_value=hcfg),
patch("honcho_integration.client.get_honcho_client", side_effect=RuntimeError("boom")),
):
agent = AIAgent(
api_key="test-k...7890",
quiet_mode=True,
skip_context_files=True,
skip_memory=False,
)
assert isinstance(sys.stdout, _SafeWriter)
assert agent._honcho is None
finally:
sys.stdout = original