From fb46a90098e0fad3cc8e3c44193f96ad3ce9bb91 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 27 Mar 2026 13:02:23 -0700 Subject: [PATCH] fix: increase API timeout default from 900s to 1800s for slow-thinking models (#3431) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Models like GLM-5/5.1 can think for 15+ minutes. The previous 900s (15 min) default for HERMES_API_TIMEOUT killed legitimate requests. Raised to 1800s (30 min) in both places that read the env var: - _build_api_kwargs() timeout (non-streaming total timeout) - _call_chat_completions() write timeout (streaming connection) The streaming per-chunk read timeout (60s) and stale stream detector (180-300s) are unchanged — those are appropriate for inter-chunk timing. --- run_agent.py | 4 ++-- tests/test_run_agent.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/run_agent.py b/run_agent.py index 457479f6e..8a3fcf614 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3772,7 +3772,7 @@ class AIAgent: def _call_chat_completions(): """Stream a chat completions response.""" import httpx as _httpx - _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 900.0)) + _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0)) _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 60.0)) stream_kwargs = { **api_kwargs, @@ -4497,7 +4497,7 @@ class AIAgent: "model": self.model, "messages": sanitized_messages, "tools": self.tools if self.tools else None, - "timeout": float(os.getenv("HERMES_API_TIMEOUT", 900.0)), + "timeout": float(os.getenv("HERMES_API_TIMEOUT", 1800.0)), } if self.max_tokens is not None: diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 1d60d6db4..b6aaedf72 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -637,7 +637,7 @@ class TestBuildApiKwargs: kwargs = agent._build_api_kwargs(messages) assert kwargs["model"] == agent.model assert kwargs["messages"] is messages - assert kwargs["timeout"] == 900.0 + assert kwargs["timeout"] == 1800.0 def test_provider_preferences_injected(self, agent): agent.providers_allowed = ["Anthropic"]