fix: guard aux LLM calls against None content + reasoning fallback + retry (salvage #3389) (#3449)

Salvage of #3389 by @binhnt92 with reasoning fallback and retry logic added on top. All 7 auxiliary LLM call sites now use extract_content_or_reasoning() which mirrors the main agent loop's behavior: extract content, strip think blocks, fall back to structured reasoning fields, retry on empty. Closes #3389.
2026-03-27 15:28:19 -07:00
parent ab09f6b568
commit 658692799d
7 changed files with 414 additions and 14 deletions
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@@ -21,7 +21,7 @@ import json
 import logging
 from typing import Dict, Any, List, Optional, Union

-from agent.auxiliary_client import async_call_llm
+from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
 MAX_SESSION_CHARS = 100_000
 MAX_SUMMARY_TOKENS = 10000

@@ -161,7 +161,15 @@ async def _summarize_session(
                temperature=0.1,
                max_tokens=MAX_SUMMARY_TOKENS,
            )
-            return response.choices[0].message.content.strip()
+            content = extract_content_or_reasoning(response)
+            if content:
+                return content
+            # Reasoning-only / empty — let the retry loop handle it
+            logging.warning("Session search LLM returned empty content (attempt %d/%d)", attempt + 1, max_retries)
+            if attempt < max_retries - 1:
+                await asyncio.sleep(1 * (attempt + 1))
+                continue
+            return content
        except RuntimeError:
            logging.warning("No auxiliary model available for session summarization")
            return None