fix: web_extract fast-fail on scrape timeout + summarizer resilience

- Firecrawl scrape: 60s timeout via asyncio.wait_for + to_thread (previously could hang indefinitely) - Summarizer retries: 6 → 2 (one retry), reads timeout from auxiliary.web_extract.timeout config (default 360s / 6min) - Summarizer failure: falls back to truncated raw content (~5000 chars) instead of useless error message, with guidance about config/model - Config default: auxiliary.web_extract.timeout bumped 30 → 360s for local model compatibility Addresses Discord reports of agent hanging during web_extract.
2026-04-05 11:16:33 -07:00
parent c100ad874c
commit 20b4060dbf
2 changed files with 43 additions and 9 deletions
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -315,7 +315,7 @@ DEFAULT_CONFIG = {
            "model": "",
            "base_url": "",
            "api_key": "",
-            "timeout": 30,         # seconds — increase for slow local models
+            "timeout": 360,        # seconds (6min) — per-attempt LLM summarization timeout; increase for slow local models
        },
        "compression": {
            "provider": "auto",
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -554,8 +554,24 @@ async def process_content_with_llm(
        return processed_content
        
    except Exception as e:
-        logger.debug("Error processing content with LLM: %s", e)
-        return f"[Failed to process content: {str(e)[:100]}. Content size: {len(content):,} chars]"
+        logger.warning(
+            "web_extract LLM summarization failed (%s). "
+            "Tip: increase auxiliary.web_extract.timeout in config.yaml "
+            "or switch to a faster auxiliary model.",
+            str(e)[:120],
+        )
+        # Fall back to truncated raw content instead of returning a useless
+        # error message.  The first ~5000 chars are almost always more useful
+        # to the model than "[Failed to process content: ...]".
+        truncated = content[:MAX_OUTPUT_SIZE]
+        if len(content) > MAX_OUTPUT_SIZE:
+            truncated += (
+                f"\n\n[Content truncated — showing first {MAX_OUTPUT_SIZE:,} of "
+                f"{len(content):,} chars. LLM summarization timed out. "
+                f"To fix: increase auxiliary.web_extract.timeout in config.yaml, "
+                f"or use a faster auxiliary model. Use browser_navigate for the full page.]"
+            )
+        return truncated


 async def _call_summarizer_llm(
@@ -620,8 +636,9 @@ Your goal is to preserve ALL important information while reducing length. Never

 Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""

-    # Call the LLM with retry logic
-    max_retries = 6
+    # Call the LLM with retry logic — keep retries low since summarization
+    # is a nice-to-have; the caller falls back to truncated content on failure.
+    max_retries = 2
    retry_delay = 2
    last_error = None

@@ -640,6 +657,9 @@ Create a markdown summary that captures all key information in a well-organized,
                ],
                "temperature": 0.1,
                "max_tokens": max_tokens,
+                # No explicit timeout — async_call_llm reads auxiliary.web_extract.timeout
+                # from config (default 360s / 6min).  Users with slow local models can
+                # increase it in config.yaml.
            }
            if extra_body:
                call_kwargs["extra_body"] = extra_body
@@ -1264,10 +1284,24 @@ async def web_extract_tool(

                    try:
                        logger.info("Scraping: %s", url)
-                        scrape_result = _get_firecrawl_client().scrape(
-                            url=url,
-                            formats=formats
-                        )
+                        # Run synchronous Firecrawl scrape in a thread with a
+                        # 60s timeout so a hung fetch doesn't block the session.
+                        try:
+                            scrape_result = await asyncio.wait_for(
+                                asyncio.to_thread(
+                                    _get_firecrawl_client().scrape,
+                                    url=url,
+                                    formats=formats,
+                                ),
+                                timeout=60,
+                            )
+                        except asyncio.TimeoutError:
+                            logger.warning("Firecrawl scrape timed out for %s", url)
+                            results.append({
+                                "url": url, "title": "", "content": "",
+                                "error": "Scrape timed out after 60s — page may be too large or unresponsive. Try browser_navigate instead.",
+                            })
+                            continue

                        scrape_payload = _extract_scrape_payload(scrape_result)
                        metadata = scrape_payload.get("metadata", {})