fix: web_extract fast-fail on scrape timeout + summarizer resilience

- Firecrawl scrape: 60s timeout via asyncio.wait_for + to_thread
  (previously could hang indefinitely)
- Summarizer retries: 6 → 2 (one retry), reads timeout from
  auxiliary.web_extract.timeout config (default 360s / 6min)
- Summarizer failure: falls back to truncated raw content (~5000 chars)
  instead of useless error message, with guidance about config/model
- Config default: auxiliary.web_extract.timeout bumped 30 → 360s
  for local model compatibility

Addresses Discord reports of agent hanging during web_extract.
This commit is contained in:
Teknium
2026-04-05 11:16:33 -07:00
parent c100ad874c
commit 20b4060dbf
2 changed files with 43 additions and 9 deletions

View File

@@ -315,7 +315,7 @@ DEFAULT_CONFIG = {
"model": "",
"base_url": "",
"api_key": "",
"timeout": 30, # seconds increase for slow local models
"timeout": 360, # seconds (6min) — per-attempt LLM summarization timeout; increase for slow local models
},
"compression": {
"provider": "auto",

View File

@@ -554,8 +554,24 @@ async def process_content_with_llm(
return processed_content
except Exception as e:
logger.debug("Error processing content with LLM: %s", e)
return f"[Failed to process content: {str(e)[:100]}. Content size: {len(content):,} chars]"
logger.warning(
"web_extract LLM summarization failed (%s). "
"Tip: increase auxiliary.web_extract.timeout in config.yaml "
"or switch to a faster auxiliary model.",
str(e)[:120],
)
# Fall back to truncated raw content instead of returning a useless
# error message. The first ~5000 chars are almost always more useful
# to the model than "[Failed to process content: ...]".
truncated = content[:MAX_OUTPUT_SIZE]
if len(content) > MAX_OUTPUT_SIZE:
truncated += (
f"\n\n[Content truncated — showing first {MAX_OUTPUT_SIZE:,} of "
f"{len(content):,} chars. LLM summarization timed out. "
f"To fix: increase auxiliary.web_extract.timeout in config.yaml, "
f"or use a faster auxiliary model. Use browser_navigate for the full page.]"
)
return truncated
async def _call_summarizer_llm(
@@ -620,8 +636,9 @@ Your goal is to preserve ALL important information while reducing length. Never
Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
# Call the LLM with retry logic
max_retries = 6
# Call the LLM with retry logic — keep retries low since summarization
# is a nice-to-have; the caller falls back to truncated content on failure.
max_retries = 2
retry_delay = 2
last_error = None
@@ -640,6 +657,9 @@ Create a markdown summary that captures all key information in a well-organized,
],
"temperature": 0.1,
"max_tokens": max_tokens,
# No explicit timeout — async_call_llm reads auxiliary.web_extract.timeout
# from config (default 360s / 6min). Users with slow local models can
# increase it in config.yaml.
}
if extra_body:
call_kwargs["extra_body"] = extra_body
@@ -1264,10 +1284,24 @@ async def web_extract_tool(
try:
logger.info("Scraping: %s", url)
scrape_result = _get_firecrawl_client().scrape(
url=url,
formats=formats
)
# Run synchronous Firecrawl scrape in a thread with a
# 60s timeout so a hung fetch doesn't block the session.
try:
scrape_result = await asyncio.wait_for(
asyncio.to_thread(
_get_firecrawl_client().scrape,
url=url,
formats=formats,
),
timeout=60,
)
except asyncio.TimeoutError:
logger.warning("Firecrawl scrape timed out for %s", url)
results.append({
"url": url, "title": "", "content": "",
"error": "Scrape timed out after 60s — page may be too large or unresponsive. Try browser_navigate instead.",
})
continue
scrape_payload = _extract_scrape_payload(scrape_result)
metadata = scrape_payload.get("metadata", {})