fix(core): guard print() calls in run_conversation() against OSError

In headless environments (systemd, Docker, nohup) stdout can become unavailable mid-session. Raw print() raises OSError which crashes cron jobs — agent finishes work but delivery never happens because the error handler's own print() also raises OSError. Fix: - Add _safe_print() static method that wraps print() with try/except OSError — silently drops output when stdout is broken - Make _vprint() use _safe_print() — protects all calls through the verbose print path - Convert raw print() calls in run_conversation() hot path to use _safe_print(): starting conversation, interrupt, budget exhausted, preflight compression, context cache, conversation completed - Error handler print (the cascading crash point) gets explicit try/except with logger.error() fallback so diagnostics aren't lost Fixes #845 Closes #1358 (superseded — PR was 323 commits stale with a bug)
2026-03-17 02:41:01 -07:00
parent 342a0ad372
commit a3ac142c83
1 changed files with 24 additions and 8 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -856,6 +856,19 @@ class AIAgent:
            else:
                print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
    
+    @staticmethod
+    def _safe_print(*args, **kwargs):
+        """Print that silently handles broken pipes / closed stdout.
+
+        In headless environments (systemd, Docker, nohup) stdout may become
+        unavailable mid-session.  A raw ``print()`` raises ``OSError`` which
+        can crash cron jobs and lose completed work.
+        """
+        try:
+            print(*args, **kwargs)
+        except OSError:
+            pass
+
    def _vprint(self, *args, force: bool = False, **kwargs):
        """Verbose print — suppressed when streaming TTS is active.

@@ -864,7 +877,7 @@ class AIAgent:
        """
        if not force and self._has_stream_consumers():
            return
-        print(*args, **kwargs)
+        self._safe_print(*args, **kwargs)

    def _max_tokens_param(self, value: int) -> dict:
        """Return the correct max tokens kwarg for the current provider.
@@ -4752,7 +4765,7 @@ class AIAgent:
        self._persist_user_message_idx = current_turn_user_idx
        
        if not self.quiet_mode:
-            print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
+            self._safe_print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
        
        # ── System prompt (cached per session for prefix caching) ──
        # Built once on first call, reused for all subsequent calls.
@@ -4822,7 +4835,7 @@ class AIAgent:
                    f"{self.context_compressor.context_length:,}",
                )
                if not self.quiet_mode:
-                    print(
+                    self._safe_print(
                        f"📦 Preflight compression: ~{_preflight_tokens:,} tokens "
                        f">= {self.context_compressor.threshold_tokens:,} threshold"
                    )
@@ -4862,13 +4875,13 @@ class AIAgent:
            if self._interrupt_requested:
                interrupted = True
                if not self.quiet_mode:
-                    print(f"\n⚡ Breaking out of tool loop due to interrupt...")
+                    self._safe_print(f"\n⚡ Breaking out of tool loop due to interrupt...")
                break
            
            api_call_count += 1
            if not self.iteration_budget.consume():
                if not self.quiet_mode:
-                    print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)")
+                    self._safe_print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)")
                break

            # Fire step_callback for gateway hooks (agent:step event)
@@ -5287,7 +5300,7 @@ class AIAgent:
                        if self.context_compressor._context_probed:
                            ctx = self.context_compressor.context_length
                            save_context_length(self.model, self.base_url, ctx)
-                            print(f"{self.log_prefix}💾 Cached context length: {ctx:,} tokens for {self.model}")
+                            self._safe_print(f"{self.log_prefix}💾 Cached context length: {ctx:,} tokens for {self.model}")
                            self.context_compressor._context_probed = False

                        self.session_prompt_tokens += prompt_tokens
@@ -6129,12 +6142,15 @@ class AIAgent:
                    messages.append(final_msg)
                    
                    if not self.quiet_mode:
-                        print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
+                        self._safe_print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
                    break
                
            except Exception as e:
                error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
-                print(f"❌ {error_msg}")
+                try:
+                    print(f"❌ {error_msg}")
+                except OSError:
+                    logger.error(error_msg)
                
                if self.verbose_logging:
                    logging.exception("Detailed error information:")