diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 2a0c346a5..fcd2eb12f 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1458,6 +1458,29 @@ def _resolve_task_provider_model( return "auto", resolved_model, None, None +_DEFAULT_AUX_TIMEOUT = 30.0 + + +def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float: + """Read timeout from auxiliary.{task}.timeout in config, falling back to *default*.""" + if not task: + return default + try: + from hermes_cli.config import load_config + config = load_config() + except ImportError: + return default + aux = config.get("auxiliary", {}) if isinstance(config, dict) else {} + task_config = aux.get(task, {}) if isinstance(aux, dict) else {} + raw = task_config.get("timeout") + if raw is not None: + try: + return float(raw) + except (ValueError, TypeError): + pass + return default + + def _build_call_kwargs( provider: str, model: str, @@ -1515,7 +1538,7 @@ def call_llm( temperature: float = None, max_tokens: int = None, tools: list = None, - timeout: float = 30.0, + timeout: float = None, extra_body: dict = None, ) -> Any: """Centralized synchronous LLM call. @@ -1533,7 +1556,7 @@ def call_llm( temperature: Sampling temperature (None = provider default). max_tokens: Max output tokens (handles max_tokens vs max_completion_tokens). tools: Tool definitions (for function calling). - timeout: Request timeout in seconds. + timeout: Request timeout in seconds (None = read from auxiliary.{task}.timeout config). extra_body: Additional request body fields. Returns: @@ -1598,10 +1621,12 @@ def call_llm( f"No LLM provider configured for task={task} provider={resolved_provider}. " f"Run: hermes setup") + effective_timeout = timeout if timeout is not None else _get_task_timeout(task) + kwargs = _build_call_kwargs( resolved_provider, final_model, messages, temperature=temperature, max_tokens=max_tokens, - tools=tools, timeout=timeout, extra_body=extra_body, + tools=tools, timeout=effective_timeout, extra_body=extra_body, base_url=resolved_base_url) # Handle max_tokens vs max_completion_tokens retry @@ -1683,7 +1708,7 @@ async def async_call_llm( temperature: float = None, max_tokens: int = None, tools: list = None, - timeout: float = 30.0, + timeout: float = None, extra_body: dict = None, ) -> Any: """Centralized asynchronous LLM call. @@ -1744,10 +1769,12 @@ async def async_call_llm( f"No LLM provider configured for task={task} provider={resolved_provider}. " f"Run: hermes setup") + effective_timeout = timeout if timeout is not None else _get_task_timeout(task) + kwargs = _build_call_kwargs( resolved_provider, final_model, messages, temperature=temperature, max_tokens=max_tokens, - tools=tools, timeout=timeout, extra_body=extra_body, + tools=tools, timeout=effective_timeout, extra_body=extra_body, base_url=resolved_base_url) try: diff --git a/agent/context_compressor.py b/agent/context_compressor.py index a39b19359..033c15dc9 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -347,7 +347,7 @@ Write only the summary body. Do not include any preamble or prefix.""" "messages": [{"role": "user", "content": prompt}], "temperature": 0.3, "max_tokens": summary_budget * 2, - "timeout": 45.0, + # timeout resolved from auxiliary.compression.timeout config by call_llm } if self.summary_model: call_kwargs["model"] = self.summary_model diff --git a/agent/title_generator.py b/agent/title_generator.py index 9a18aab58..741fe8b09 100644 --- a/agent/title_generator.py +++ b/agent/title_generator.py @@ -19,7 +19,7 @@ _TITLE_PROMPT = ( ) -def generate_title(user_message: str, assistant_response: str, timeout: float = 15.0) -> Optional[str]: +def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]: """Generate a session title from the first exchange. Uses the auxiliary LLM client (cheapest/fastest available model). diff --git a/hermes_cli/config.py b/hermes_cli/config.py index dfa95aa54..e97436360 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -227,42 +227,49 @@ DEFAULT_CONFIG = { "model": "", "base_url": "", "api_key": "", + "timeout": 30, # seconds — increase for slow local models }, "compression": { "provider": "auto", "model": "", "base_url": "", "api_key": "", + "timeout": 120, # seconds — compression summarises large contexts; increase for local models }, "session_search": { "provider": "auto", "model": "", "base_url": "", "api_key": "", + "timeout": 30, }, "skills_hub": { "provider": "auto", "model": "", "base_url": "", "api_key": "", + "timeout": 30, }, "approval": { "provider": "auto", "model": "", # fast/cheap model recommended (e.g. gemini-flash, haiku) "base_url": "", "api_key": "", + "timeout": 30, }, "mcp": { "provider": "auto", "model": "", "base_url": "", "api_key": "", + "timeout": 30, }, "flush_memories": { "provider": "auto", "model": "", "base_url": "", "api_key": "", + "timeout": 30, }, },