Salvage of #3389 by @binhnt92 with reasoning fallback and retry logic added on top. All 7 auxiliary LLM call sites now use extract_content_or_reasoning() which mirrors the main agent loop's behavior: extract content, strip think blocks, fall back to structured reasoning fields, retry on empty. Closes #3389.
This commit is contained in:
@@ -1616,6 +1616,62 @@ def call_llm(
|
||||
raise
|
||||
|
||||
|
||||
def extract_content_or_reasoning(response) -> str:
|
||||
"""Extract content from an LLM response, falling back to reasoning fields.
|
||||
|
||||
Mirrors the main agent loop's behavior when a reasoning model (DeepSeek-R1,
|
||||
Qwen-QwQ, etc.) returns ``content=None`` with reasoning in structured fields.
|
||||
|
||||
Resolution order:
|
||||
1. ``message.content`` — strip inline think/reasoning blocks, check for
|
||||
remaining non-whitespace text.
|
||||
2. ``message.reasoning`` / ``message.reasoning_content`` — direct
|
||||
structured reasoning fields (DeepSeek, Moonshot, Novita, etc.).
|
||||
3. ``message.reasoning_details`` — OpenRouter unified array format.
|
||||
|
||||
Returns the best available text, or ``""`` if nothing found.
|
||||
"""
|
||||
import re
|
||||
|
||||
msg = response.choices[0].message
|
||||
content = (msg.content or "").strip()
|
||||
|
||||
if content:
|
||||
# Strip inline think/reasoning blocks (mirrors _strip_think_blocks)
|
||||
cleaned = re.sub(
|
||||
r"<(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>"
|
||||
r".*?"
|
||||
r"</(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>",
|
||||
"", content, flags=re.DOTALL | re.IGNORECASE,
|
||||
).strip()
|
||||
if cleaned:
|
||||
return cleaned
|
||||
|
||||
# Content is empty or reasoning-only — try structured reasoning fields
|
||||
reasoning_parts: list[str] = []
|
||||
for field in ("reasoning", "reasoning_content"):
|
||||
val = getattr(msg, field, None)
|
||||
if val and isinstance(val, str) and val.strip() and val not in reasoning_parts:
|
||||
reasoning_parts.append(val.strip())
|
||||
|
||||
details = getattr(msg, "reasoning_details", None)
|
||||
if details and isinstance(details, list):
|
||||
for detail in details:
|
||||
if isinstance(detail, dict):
|
||||
summary = (
|
||||
detail.get("summary")
|
||||
or detail.get("content")
|
||||
or detail.get("text")
|
||||
)
|
||||
if summary and summary not in reasoning_parts:
|
||||
reasoning_parts.append(summary.strip() if isinstance(summary, str) else str(summary))
|
||||
|
||||
if reasoning_parts:
|
||||
return "\n\n".join(reasoning_parts)
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
async def async_call_llm(
|
||||
task: str = None,
|
||||
*,
|
||||
|
||||
294
tests/tools/test_llm_content_none_guard.py
Normal file
294
tests/tools/test_llm_content_none_guard.py
Normal file
@@ -0,0 +1,294 @@
|
||||
"""Tests for None guard on response.choices[0].message.content.strip().
|
||||
|
||||
OpenAI-compatible APIs return ``message.content = None`` when the model
|
||||
responds with tool calls only or reasoning-only output (e.g. DeepSeek-R1,
|
||||
Qwen-QwQ via OpenRouter with ``reasoning.enabled = True``). Calling
|
||||
``.strip()`` on ``None`` raises ``AttributeError``.
|
||||
|
||||
These tests verify that every call site handles ``content is None`` safely,
|
||||
and that ``extract_content_or_reasoning()`` falls back to structured
|
||||
reasoning fields when content is empty.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import types
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.auxiliary_client import extract_content_or_reasoning
|
||||
|
||||
|
||||
# ── helpers ────────────────────────────────────────────────────────────────
|
||||
|
||||
def _make_response(content, **msg_attrs):
|
||||
"""Build a minimal OpenAI-compatible ChatCompletion response stub.
|
||||
|
||||
Extra keyword args are set as attributes on the message object
|
||||
(e.g. reasoning="...", reasoning_content="...", reasoning_details=[...]).
|
||||
"""
|
||||
message = types.SimpleNamespace(content=content, tool_calls=None, **msg_attrs)
|
||||
choice = types.SimpleNamespace(message=message)
|
||||
return types.SimpleNamespace(choices=[choice])
|
||||
|
||||
|
||||
def _run(coro):
|
||||
"""Run an async coroutine synchronously."""
|
||||
return asyncio.get_event_loop().run_until_complete(coro)
|
||||
|
||||
|
||||
# ── mixture_of_agents_tool — reference model (line 146) ───────────────────
|
||||
|
||||
class TestMoAReferenceModelContentNone:
|
||||
"""tools/mixture_of_agents_tool.py — _query_model()"""
|
||||
|
||||
def test_none_content_raises_before_fix(self):
|
||||
"""Demonstrate that None content from a reasoning model crashes."""
|
||||
response = _make_response(None)
|
||||
|
||||
# Simulate the exact line: response.choices[0].message.content.strip()
|
||||
with pytest.raises(AttributeError):
|
||||
response.choices[0].message.content.strip()
|
||||
|
||||
def test_none_content_safe_with_or_guard(self):
|
||||
"""The ``or ""`` guard should convert None to empty string."""
|
||||
response = _make_response(None)
|
||||
|
||||
content = (response.choices[0].message.content or "").strip()
|
||||
assert content == ""
|
||||
|
||||
def test_normal_content_unaffected(self):
|
||||
"""Regular string content should pass through unchanged."""
|
||||
response = _make_response(" Hello world ")
|
||||
|
||||
content = (response.choices[0].message.content or "").strip()
|
||||
assert content == "Hello world"
|
||||
|
||||
|
||||
# ── mixture_of_agents_tool — aggregator (line 214) ────────────────────────
|
||||
|
||||
class TestMoAAggregatorContentNone:
|
||||
"""tools/mixture_of_agents_tool.py — _run_aggregator()"""
|
||||
|
||||
def test_none_content_raises_before_fix(self):
|
||||
response = _make_response(None)
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
response.choices[0].message.content.strip()
|
||||
|
||||
def test_none_content_safe_with_or_guard(self):
|
||||
response = _make_response(None)
|
||||
|
||||
content = (response.choices[0].message.content or "").strip()
|
||||
assert content == ""
|
||||
|
||||
|
||||
# ── web_tools — LLM content processor (line 419) ─────────────────────────
|
||||
|
||||
class TestWebToolsProcessorContentNone:
|
||||
"""tools/web_tools.py — _process_with_llm() return line"""
|
||||
|
||||
def test_none_content_raises_before_fix(self):
|
||||
response = _make_response(None)
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
response.choices[0].message.content.strip()
|
||||
|
||||
def test_none_content_safe_with_or_guard(self):
|
||||
response = _make_response(None)
|
||||
|
||||
content = (response.choices[0].message.content or "").strip()
|
||||
assert content == ""
|
||||
|
||||
|
||||
# ── web_tools — synthesis/summarization (line 538) ────────────────────────
|
||||
|
||||
class TestWebToolsSynthesisContentNone:
|
||||
"""tools/web_tools.py — synthesize_content() final_summary line"""
|
||||
|
||||
def test_none_content_raises_before_fix(self):
|
||||
response = _make_response(None)
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
response.choices[0].message.content.strip()
|
||||
|
||||
def test_none_content_safe_with_or_guard(self):
|
||||
response = _make_response(None)
|
||||
|
||||
content = (response.choices[0].message.content or "").strip()
|
||||
assert content == ""
|
||||
|
||||
|
||||
# ── vision_tools (line 350) ───────────────────────────────────────────────
|
||||
|
||||
class TestVisionToolsContentNone:
|
||||
"""tools/vision_tools.py — analyze_image() analysis extraction"""
|
||||
|
||||
def test_none_content_raises_before_fix(self):
|
||||
response = _make_response(None)
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
response.choices[0].message.content.strip()
|
||||
|
||||
def test_none_content_safe_with_or_guard(self):
|
||||
response = _make_response(None)
|
||||
|
||||
content = (response.choices[0].message.content or "").strip()
|
||||
assert content == ""
|
||||
|
||||
|
||||
# ── skills_guard (line 963) ───────────────────────────────────────────────
|
||||
|
||||
class TestSkillsGuardContentNone:
|
||||
"""tools/skills_guard.py — _llm_audit_skill() llm_text extraction"""
|
||||
|
||||
def test_none_content_raises_before_fix(self):
|
||||
response = _make_response(None)
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
response.choices[0].message.content.strip()
|
||||
|
||||
def test_none_content_safe_with_or_guard(self):
|
||||
response = _make_response(None)
|
||||
|
||||
content = (response.choices[0].message.content or "").strip()
|
||||
assert content == ""
|
||||
|
||||
|
||||
# ── session_search_tool (line 164) ────────────────────────────────────────
|
||||
|
||||
class TestSessionSearchContentNone:
|
||||
"""tools/session_search_tool.py — _summarize_session() return line"""
|
||||
|
||||
def test_none_content_raises_before_fix(self):
|
||||
response = _make_response(None)
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
response.choices[0].message.content.strip()
|
||||
|
||||
def test_none_content_safe_with_or_guard(self):
|
||||
response = _make_response(None)
|
||||
|
||||
content = (response.choices[0].message.content or "").strip()
|
||||
assert content == ""
|
||||
|
||||
|
||||
# ── integration: verify the actual source lines are guarded ───────────────
|
||||
|
||||
class TestSourceLinesAreGuarded:
|
||||
"""Read the actual source files and verify the fix is applied.
|
||||
|
||||
These tests will FAIL before the fix (bare .content.strip()) and
|
||||
PASS after ((.content or "").strip()).
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _read_file(rel_path: str) -> str:
|
||||
import os
|
||||
base = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
||||
with open(os.path.join(base, rel_path)) as f:
|
||||
return f.read()
|
||||
|
||||
def test_mixture_of_agents_reference_model_guarded(self):
|
||||
src = self._read_file("tools/mixture_of_agents_tool.py")
|
||||
# The unguarded pattern should NOT exist
|
||||
assert ".message.content.strip()" not in src, (
|
||||
"tools/mixture_of_agents_tool.py still has unguarded "
|
||||
".content.strip() — apply `(... or \"\").strip()` guard"
|
||||
)
|
||||
|
||||
def test_web_tools_guarded(self):
|
||||
src = self._read_file("tools/web_tools.py")
|
||||
assert ".message.content.strip()" not in src, (
|
||||
"tools/web_tools.py still has unguarded "
|
||||
".content.strip() — apply `(... or \"\").strip()` guard"
|
||||
)
|
||||
|
||||
def test_vision_tools_guarded(self):
|
||||
src = self._read_file("tools/vision_tools.py")
|
||||
assert ".message.content.strip()" not in src, (
|
||||
"tools/vision_tools.py still has unguarded "
|
||||
".content.strip() — apply `(... or \"\").strip()` guard"
|
||||
)
|
||||
|
||||
def test_skills_guard_guarded(self):
|
||||
src = self._read_file("tools/skills_guard.py")
|
||||
assert ".message.content.strip()" not in src, (
|
||||
"tools/skills_guard.py still has unguarded "
|
||||
".content.strip() — apply `(... or \"\").strip()` guard"
|
||||
)
|
||||
|
||||
def test_session_search_tool_guarded(self):
|
||||
src = self._read_file("tools/session_search_tool.py")
|
||||
assert ".message.content.strip()" not in src, (
|
||||
"tools/session_search_tool.py still has unguarded "
|
||||
".content.strip() — apply `(... or \"\").strip()` guard"
|
||||
)
|
||||
|
||||
|
||||
# ── extract_content_or_reasoning() ────────────────────────────────────────
|
||||
|
||||
class TestExtractContentOrReasoning:
|
||||
"""agent/auxiliary_client.py — extract_content_or_reasoning()"""
|
||||
|
||||
def test_normal_content_returned(self):
|
||||
response = _make_response(" Hello world ")
|
||||
assert extract_content_or_reasoning(response) == "Hello world"
|
||||
|
||||
def test_none_content_returns_empty(self):
|
||||
response = _make_response(None)
|
||||
assert extract_content_or_reasoning(response) == ""
|
||||
|
||||
def test_empty_string_returns_empty(self):
|
||||
response = _make_response("")
|
||||
assert extract_content_or_reasoning(response) == ""
|
||||
|
||||
def test_think_blocks_stripped_with_remaining_content(self):
|
||||
response = _make_response("<think>internal reasoning</think>The answer is 42.")
|
||||
assert extract_content_or_reasoning(response) == "The answer is 42."
|
||||
|
||||
def test_think_only_content_falls_back_to_reasoning_field(self):
|
||||
"""When content is only think blocks, fall back to structured reasoning."""
|
||||
response = _make_response(
|
||||
"<think>some reasoning</think>",
|
||||
reasoning="The actual reasoning output",
|
||||
)
|
||||
assert extract_content_or_reasoning(response) == "The actual reasoning output"
|
||||
|
||||
def test_none_content_with_reasoning_field(self):
|
||||
"""DeepSeek-R1 pattern: content=None, reasoning='...'"""
|
||||
response = _make_response(None, reasoning="Step 1: analyze the problem...")
|
||||
assert extract_content_or_reasoning(response) == "Step 1: analyze the problem..."
|
||||
|
||||
def test_none_content_with_reasoning_content_field(self):
|
||||
"""Moonshot/Novita pattern: content=None, reasoning_content='...'"""
|
||||
response = _make_response(None, reasoning_content="Let me think about this...")
|
||||
assert extract_content_or_reasoning(response) == "Let me think about this..."
|
||||
|
||||
def test_none_content_with_reasoning_details(self):
|
||||
"""OpenRouter unified format: reasoning_details=[{summary: ...}]"""
|
||||
response = _make_response(None, reasoning_details=[
|
||||
{"type": "reasoning.summary", "summary": "The key insight is..."},
|
||||
])
|
||||
assert extract_content_or_reasoning(response) == "The key insight is..."
|
||||
|
||||
def test_reasoning_fields_not_duplicated(self):
|
||||
"""When reasoning and reasoning_content have the same value, don't duplicate."""
|
||||
response = _make_response(None, reasoning="same text", reasoning_content="same text")
|
||||
assert extract_content_or_reasoning(response) == "same text"
|
||||
|
||||
def test_multiple_reasoning_sources_combined(self):
|
||||
"""Different reasoning sources are joined with double newline."""
|
||||
response = _make_response(
|
||||
None,
|
||||
reasoning="First part",
|
||||
reasoning_content="Second part",
|
||||
)
|
||||
result = extract_content_or_reasoning(response)
|
||||
assert "First part" in result
|
||||
assert "Second part" in result
|
||||
|
||||
def test_content_preferred_over_reasoning(self):
|
||||
"""When both content and reasoning exist, content wins."""
|
||||
response = _make_response("Actual answer", reasoning="Internal reasoning")
|
||||
assert extract_content_or_reasoning(response) == "Actual answer"
|
||||
@@ -52,6 +52,7 @@ import asyncio
|
||||
import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key
|
||||
from agent.auxiliary_client import extract_content_or_reasoning
|
||||
from tools.debug_helpers import DebugSession
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -143,7 +144,13 @@ async def _run_reference_model_safe(
|
||||
|
||||
response = await _get_openrouter_client().chat.completions.create(**api_params)
|
||||
|
||||
content = response.choices[0].message.content.strip()
|
||||
content = extract_content_or_reasoning(response)
|
||||
if not content:
|
||||
# Reasoning-only response — let the retry loop handle it
|
||||
logger.warning("%s returned empty content (attempt %s/%s), retrying", model, attempt + 1, max_retries)
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(min(2 ** (attempt + 1), 60))
|
||||
continue
|
||||
logger.info("%s responded (%s characters)", model, len(content))
|
||||
return model, content, True
|
||||
|
||||
@@ -211,7 +218,14 @@ async def _run_aggregator_model(
|
||||
|
||||
response = await _get_openrouter_client().chat.completions.create(**api_params)
|
||||
|
||||
content = response.choices[0].message.content.strip()
|
||||
content = extract_content_or_reasoning(response)
|
||||
|
||||
# Retry once on empty content (reasoning-only response)
|
||||
if not content:
|
||||
logger.warning("Aggregator returned empty content, retrying once")
|
||||
response = await _get_openrouter_client().chat.completions.create(**api_params)
|
||||
content = extract_content_or_reasoning(response)
|
||||
|
||||
logger.info("Aggregation complete (%s characters)", len(content))
|
||||
return content
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ import json
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
|
||||
from agent.auxiliary_client import async_call_llm
|
||||
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
|
||||
MAX_SESSION_CHARS = 100_000
|
||||
MAX_SUMMARY_TOKENS = 10000
|
||||
|
||||
@@ -161,7 +161,15 @@ async def _summarize_session(
|
||||
temperature=0.1,
|
||||
max_tokens=MAX_SUMMARY_TOKENS,
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
content = extract_content_or_reasoning(response)
|
||||
if content:
|
||||
return content
|
||||
# Reasoning-only / empty — let the retry loop handle it
|
||||
logging.warning("Session search LLM returned empty content (attempt %d/%d)", attempt + 1, max_retries)
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(1 * (attempt + 1))
|
||||
continue
|
||||
return content
|
||||
except RuntimeError:
|
||||
logging.warning("No auxiliary model available for session summarization")
|
||||
return None
|
||||
|
||||
@@ -948,9 +948,9 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
|
||||
|
||||
# Call the LLM via the centralized provider router
|
||||
try:
|
||||
from agent.auxiliary_client import call_llm
|
||||
from agent.auxiliary_client import call_llm, extract_content_or_reasoning
|
||||
|
||||
response = call_llm(
|
||||
call_kwargs = dict(
|
||||
provider="openrouter",
|
||||
model=model,
|
||||
messages=[{
|
||||
@@ -960,7 +960,13 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
|
||||
temperature=0,
|
||||
max_tokens=1000,
|
||||
)
|
||||
llm_text = response.choices[0].message.content.strip()
|
||||
response = call_llm(**call_kwargs)
|
||||
llm_text = extract_content_or_reasoning(response)
|
||||
|
||||
# Retry once on empty content (reasoning-only response)
|
||||
if not llm_text:
|
||||
response = call_llm(**call_kwargs)
|
||||
llm_text = extract_content_or_reasoning(response)
|
||||
except Exception:
|
||||
# LLM audit is best-effort — don't block install if the call fails
|
||||
return static_result
|
||||
|
||||
@@ -37,7 +37,7 @@ from pathlib import Path
|
||||
from typing import Any, Awaitable, Dict, Optional
|
||||
from urllib.parse import urlparse
|
||||
import httpx
|
||||
from agent.auxiliary_client import async_call_llm
|
||||
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
|
||||
from tools.debug_helpers import DebugSession
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -346,8 +346,15 @@ async def vision_analyze_tool(
|
||||
call_kwargs["model"] = model
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
|
||||
# Extract the analysis
|
||||
analysis = response.choices[0].message.content.strip()
|
||||
# Extract the analysis — fall back to reasoning if content is empty
|
||||
analysis = extract_content_or_reasoning(response)
|
||||
|
||||
# Retry once on empty content (reasoning-only response)
|
||||
if not analysis:
|
||||
logger.warning("Vision LLM returned empty content, retrying once")
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
analysis = extract_content_or_reasoning(response)
|
||||
|
||||
analysis_length = len(analysis)
|
||||
|
||||
logger.info("Image analysis completed (%s characters)", analysis_length)
|
||||
|
||||
@@ -44,7 +44,7 @@ import asyncio
|
||||
from typing import List, Dict, Any, Optional
|
||||
import httpx
|
||||
from firecrawl import Firecrawl
|
||||
from agent.auxiliary_client import async_call_llm
|
||||
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
|
||||
from tools.debug_helpers import DebugSession
|
||||
from tools.url_safety import is_safe_url
|
||||
from tools.website_policy import check_website_access
|
||||
@@ -416,7 +416,16 @@ Create a markdown summary that captures all key information in a well-organized,
|
||||
if model:
|
||||
call_kwargs["model"] = model
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
return response.choices[0].message.content.strip()
|
||||
content = extract_content_or_reasoning(response)
|
||||
if content:
|
||||
return content
|
||||
# Reasoning-only / empty response — let the retry loop handle it
|
||||
logger.warning("LLM returned empty content (attempt %d/%d), retrying", attempt + 1, max_retries)
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(retry_delay)
|
||||
retry_delay = min(retry_delay * 2, 60)
|
||||
continue
|
||||
return content # Return whatever we got after exhausting retries
|
||||
except RuntimeError:
|
||||
logger.warning("No auxiliary model available for web content processing")
|
||||
return None
|
||||
@@ -535,8 +544,14 @@ Create a single, unified markdown summary."""
|
||||
if model:
|
||||
call_kwargs["model"] = model
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
final_summary = response.choices[0].message.content.strip()
|
||||
|
||||
final_summary = extract_content_or_reasoning(response)
|
||||
|
||||
# Retry once on empty content (reasoning-only response)
|
||||
if not final_summary:
|
||||
logger.warning("Synthesis LLM returned empty content, retrying once")
|
||||
response = await async_call_llm(**call_kwargs)
|
||||
final_summary = extract_content_or_reasoning(response)
|
||||
|
||||
# Enforce hard cap
|
||||
if len(final_summary) > max_output_size:
|
||||
final_summary = final_summary[:max_output_size] + "\n\n[... summary truncated for context management ...]"
|
||||
|
||||
Reference in New Issue
Block a user