From fb46a90098e0fad3cc8e3c44193f96ad3ce9bb91 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 27 Mar 2026 13:02:23 -0700
Subject: [PATCH] fix: increase API timeout default from 900s to 1800s for
 slow-thinking models (#3431)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Models like GLM-5/5.1 can think for 15+ minutes. The previous 900s
(15 min) default for HERMES_API_TIMEOUT killed legitimate requests.

Raised to 1800s (30 min) in both places that read the env var:
- _build_api_kwargs() timeout (non-streaming total timeout)
- _call_chat_completions() write timeout (streaming connection)

The streaming per-chunk read timeout (60s) and stale stream detector
(180-300s) are unchanged — those are appropriate for inter-chunk timing.
---
 run_agent.py            | 4 ++--
 tests/test_run_agent.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 457479f6e..8a3fcf614 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3772,7 +3772,7 @@ class AIAgent:
         def _call_chat_completions():
             """Stream a chat completions response."""
             import httpx as _httpx
-            _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 900.0))
+            _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 1800.0))
             _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 60.0))
             stream_kwargs = {
                 **api_kwargs,
@@ -4497,7 +4497,7 @@ class AIAgent:
             "model": self.model,
             "messages": sanitized_messages,
             "tools": self.tools if self.tools else None,
-            "timeout": float(os.getenv("HERMES_API_TIMEOUT", 900.0)),
+            "timeout": float(os.getenv("HERMES_API_TIMEOUT", 1800.0)),
         }
 
         if self.max_tokens is not None:
diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
index 1d60d6db4..b6aaedf72 100644
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -637,7 +637,7 @@ class TestBuildApiKwargs:
         kwargs = agent._build_api_kwargs(messages)
         assert kwargs["model"] == agent.model
         assert kwargs["messages"] is messages
-        assert kwargs["timeout"] == 900.0
+        assert kwargs["timeout"] == 1800.0
 
     def test_provider_preferences_injected(self, agent):
         agent.providers_allowed = ["Anthropic"]