fix: make vision_analyze timeout configurable via config.yaml (#2480)

Reads auxiliary.vision.timeout from config.yaml (default: 30s) and
passes it to async_call_llm. Useful for slow local vision models
that need more than 30 seconds.

Setting is in config.yaml (not .env) since it's not a secret:

  auxiliary:
    vision:
      timeout: 120

Based on PR #2306.

Co-authored-by: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
This commit is contained in:
Teknium
2026-03-22 05:28:24 -07:00
committed by GitHub
parent a2276177a3
commit 6435d69a6d
3 changed files with 14 additions and 1 deletions

View File

@@ -182,6 +182,7 @@ DEFAULT_CONFIG = {
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
"base_url": "", # direct OpenAI-compatible endpoint (takes precedence over provider)
"api_key": "", # API key for base_url (falls back to OPENAI_API_KEY)
"timeout": 30, # seconds — increase for slow local vision models
},
"web_extract": {
"provider": "auto",

View File

@@ -298,12 +298,23 @@ async def vision_analyze_tool(
logger.info("Processing image with vision model...")
# Call the vision API via centralized router
# Call the vision API via centralized router.
# Read timeout from config.yaml (auxiliary.vision.timeout), default 30s.
vision_timeout = 30.0
try:
from hermes_cli.config import load_config
_cfg = load_config()
_vt = _cfg.get("auxiliary", {}).get("vision", {}).get("timeout")
if _vt is not None:
vision_timeout = float(_vt)
except Exception:
pass
call_kwargs = {
"task": "vision",
"messages": messages,
"temperature": 0.1,
"max_tokens": 2000,
"timeout": vision_timeout,
}
if model:
call_kwargs["model"] = model

View File

@@ -907,6 +907,7 @@ auxiliary:
model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash"
base_url: "" # Custom OpenAI-compatible endpoint (overrides provider)
api_key: "" # API key for base_url (falls back to OPENAI_API_KEY)
timeout: 30 # seconds — increase for slow local vision models
# Web page summarization + browser page text extraction
web_extract: