diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 1c1b98692..2a0c346a5 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1616,6 +1616,62 @@ def call_llm( raise +def extract_content_or_reasoning(response) -> str: + """Extract content from an LLM response, falling back to reasoning fields. + + Mirrors the main agent loop's behavior when a reasoning model (DeepSeek-R1, + Qwen-QwQ, etc.) returns ``content=None`` with reasoning in structured fields. + + Resolution order: + 1. ``message.content`` — strip inline think/reasoning blocks, check for + remaining non-whitespace text. + 2. ``message.reasoning`` / ``message.reasoning_content`` — direct + structured reasoning fields (DeepSeek, Moonshot, Novita, etc.). + 3. ``message.reasoning_details`` — OpenRouter unified array format. + + Returns the best available text, or ``""`` if nothing found. + """ + import re + + msg = response.choices[0].message + content = (msg.content or "").strip() + + if content: + # Strip inline think/reasoning blocks (mirrors _strip_think_blocks) + cleaned = re.sub( + r"<(?:think|thinking|reasoning|REASONING_SCRATCHPAD)>" + r".*?" + r"", + "", content, flags=re.DOTALL | re.IGNORECASE, + ).strip() + if cleaned: + return cleaned + + # Content is empty or reasoning-only — try structured reasoning fields + reasoning_parts: list[str] = [] + for field in ("reasoning", "reasoning_content"): + val = getattr(msg, field, None) + if val and isinstance(val, str) and val.strip() and val not in reasoning_parts: + reasoning_parts.append(val.strip()) + + details = getattr(msg, "reasoning_details", None) + if details and isinstance(details, list): + for detail in details: + if isinstance(detail, dict): + summary = ( + detail.get("summary") + or detail.get("content") + or detail.get("text") + ) + if summary and summary not in reasoning_parts: + reasoning_parts.append(summary.strip() if isinstance(summary, str) else str(summary)) + + if reasoning_parts: + return "\n\n".join(reasoning_parts) + + return "" + + async def async_call_llm( task: str = None, *, diff --git a/tests/tools/test_llm_content_none_guard.py b/tests/tools/test_llm_content_none_guard.py new file mode 100644 index 000000000..b0adea8c7 --- /dev/null +++ b/tests/tools/test_llm_content_none_guard.py @@ -0,0 +1,294 @@ +"""Tests for None guard on response.choices[0].message.content.strip(). + +OpenAI-compatible APIs return ``message.content = None`` when the model +responds with tool calls only or reasoning-only output (e.g. DeepSeek-R1, +Qwen-QwQ via OpenRouter with ``reasoning.enabled = True``). Calling +``.strip()`` on ``None`` raises ``AttributeError``. + +These tests verify that every call site handles ``content is None`` safely, +and that ``extract_content_or_reasoning()`` falls back to structured +reasoning fields when content is empty. +""" + +import asyncio +import types +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from agent.auxiliary_client import extract_content_or_reasoning + + +# ── helpers ──────────────────────────────────────────────────────────────── + +def _make_response(content, **msg_attrs): + """Build a minimal OpenAI-compatible ChatCompletion response stub. + + Extra keyword args are set as attributes on the message object + (e.g. reasoning="...", reasoning_content="...", reasoning_details=[...]). + """ + message = types.SimpleNamespace(content=content, tool_calls=None, **msg_attrs) + choice = types.SimpleNamespace(message=message) + return types.SimpleNamespace(choices=[choice]) + + +def _run(coro): + """Run an async coroutine synchronously.""" + return asyncio.get_event_loop().run_until_complete(coro) + + +# ── mixture_of_agents_tool — reference model (line 146) ─────────────────── + +class TestMoAReferenceModelContentNone: + """tools/mixture_of_agents_tool.py — _query_model()""" + + def test_none_content_raises_before_fix(self): + """Demonstrate that None content from a reasoning model crashes.""" + response = _make_response(None) + + # Simulate the exact line: response.choices[0].message.content.strip() + with pytest.raises(AttributeError): + response.choices[0].message.content.strip() + + def test_none_content_safe_with_or_guard(self): + """The ``or ""`` guard should convert None to empty string.""" + response = _make_response(None) + + content = (response.choices[0].message.content or "").strip() + assert content == "" + + def test_normal_content_unaffected(self): + """Regular string content should pass through unchanged.""" + response = _make_response(" Hello world ") + + content = (response.choices[0].message.content or "").strip() + assert content == "Hello world" + + +# ── mixture_of_agents_tool — aggregator (line 214) ──────────────────────── + +class TestMoAAggregatorContentNone: + """tools/mixture_of_agents_tool.py — _run_aggregator()""" + + def test_none_content_raises_before_fix(self): + response = _make_response(None) + + with pytest.raises(AttributeError): + response.choices[0].message.content.strip() + + def test_none_content_safe_with_or_guard(self): + response = _make_response(None) + + content = (response.choices[0].message.content or "").strip() + assert content == "" + + +# ── web_tools — LLM content processor (line 419) ───────────────────────── + +class TestWebToolsProcessorContentNone: + """tools/web_tools.py — _process_with_llm() return line""" + + def test_none_content_raises_before_fix(self): + response = _make_response(None) + + with pytest.raises(AttributeError): + response.choices[0].message.content.strip() + + def test_none_content_safe_with_or_guard(self): + response = _make_response(None) + + content = (response.choices[0].message.content or "").strip() + assert content == "" + + +# ── web_tools — synthesis/summarization (line 538) ──────────────────────── + +class TestWebToolsSynthesisContentNone: + """tools/web_tools.py — synthesize_content() final_summary line""" + + def test_none_content_raises_before_fix(self): + response = _make_response(None) + + with pytest.raises(AttributeError): + response.choices[0].message.content.strip() + + def test_none_content_safe_with_or_guard(self): + response = _make_response(None) + + content = (response.choices[0].message.content or "").strip() + assert content == "" + + +# ── vision_tools (line 350) ─────────────────────────────────────────────── + +class TestVisionToolsContentNone: + """tools/vision_tools.py — analyze_image() analysis extraction""" + + def test_none_content_raises_before_fix(self): + response = _make_response(None) + + with pytest.raises(AttributeError): + response.choices[0].message.content.strip() + + def test_none_content_safe_with_or_guard(self): + response = _make_response(None) + + content = (response.choices[0].message.content or "").strip() + assert content == "" + + +# ── skills_guard (line 963) ─────────────────────────────────────────────── + +class TestSkillsGuardContentNone: + """tools/skills_guard.py — _llm_audit_skill() llm_text extraction""" + + def test_none_content_raises_before_fix(self): + response = _make_response(None) + + with pytest.raises(AttributeError): + response.choices[0].message.content.strip() + + def test_none_content_safe_with_or_guard(self): + response = _make_response(None) + + content = (response.choices[0].message.content or "").strip() + assert content == "" + + +# ── session_search_tool (line 164) ──────────────────────────────────────── + +class TestSessionSearchContentNone: + """tools/session_search_tool.py — _summarize_session() return line""" + + def test_none_content_raises_before_fix(self): + response = _make_response(None) + + with pytest.raises(AttributeError): + response.choices[0].message.content.strip() + + def test_none_content_safe_with_or_guard(self): + response = _make_response(None) + + content = (response.choices[0].message.content or "").strip() + assert content == "" + + +# ── integration: verify the actual source lines are guarded ─────────────── + +class TestSourceLinesAreGuarded: + """Read the actual source files and verify the fix is applied. + + These tests will FAIL before the fix (bare .content.strip()) and + PASS after ((.content or "").strip()). + """ + + @staticmethod + def _read_file(rel_path: str) -> str: + import os + base = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + with open(os.path.join(base, rel_path)) as f: + return f.read() + + def test_mixture_of_agents_reference_model_guarded(self): + src = self._read_file("tools/mixture_of_agents_tool.py") + # The unguarded pattern should NOT exist + assert ".message.content.strip()" not in src, ( + "tools/mixture_of_agents_tool.py still has unguarded " + ".content.strip() — apply `(... or \"\").strip()` guard" + ) + + def test_web_tools_guarded(self): + src = self._read_file("tools/web_tools.py") + assert ".message.content.strip()" not in src, ( + "tools/web_tools.py still has unguarded " + ".content.strip() — apply `(... or \"\").strip()` guard" + ) + + def test_vision_tools_guarded(self): + src = self._read_file("tools/vision_tools.py") + assert ".message.content.strip()" not in src, ( + "tools/vision_tools.py still has unguarded " + ".content.strip() — apply `(... or \"\").strip()` guard" + ) + + def test_skills_guard_guarded(self): + src = self._read_file("tools/skills_guard.py") + assert ".message.content.strip()" not in src, ( + "tools/skills_guard.py still has unguarded " + ".content.strip() — apply `(... or \"\").strip()` guard" + ) + + def test_session_search_tool_guarded(self): + src = self._read_file("tools/session_search_tool.py") + assert ".message.content.strip()" not in src, ( + "tools/session_search_tool.py still has unguarded " + ".content.strip() — apply `(... or \"\").strip()` guard" + ) + + +# ── extract_content_or_reasoning() ──────────────────────────────────────── + +class TestExtractContentOrReasoning: + """agent/auxiliary_client.py — extract_content_or_reasoning()""" + + def test_normal_content_returned(self): + response = _make_response(" Hello world ") + assert extract_content_or_reasoning(response) == "Hello world" + + def test_none_content_returns_empty(self): + response = _make_response(None) + assert extract_content_or_reasoning(response) == "" + + def test_empty_string_returns_empty(self): + response = _make_response("") + assert extract_content_or_reasoning(response) == "" + + def test_think_blocks_stripped_with_remaining_content(self): + response = _make_response("internal reasoningThe answer is 42.") + assert extract_content_or_reasoning(response) == "The answer is 42." + + def test_think_only_content_falls_back_to_reasoning_field(self): + """When content is only think blocks, fall back to structured reasoning.""" + response = _make_response( + "some reasoning", + reasoning="The actual reasoning output", + ) + assert extract_content_or_reasoning(response) == "The actual reasoning output" + + def test_none_content_with_reasoning_field(self): + """DeepSeek-R1 pattern: content=None, reasoning='...'""" + response = _make_response(None, reasoning="Step 1: analyze the problem...") + assert extract_content_or_reasoning(response) == "Step 1: analyze the problem..." + + def test_none_content_with_reasoning_content_field(self): + """Moonshot/Novita pattern: content=None, reasoning_content='...'""" + response = _make_response(None, reasoning_content="Let me think about this...") + assert extract_content_or_reasoning(response) == "Let me think about this..." + + def test_none_content_with_reasoning_details(self): + """OpenRouter unified format: reasoning_details=[{summary: ...}]""" + response = _make_response(None, reasoning_details=[ + {"type": "reasoning.summary", "summary": "The key insight is..."}, + ]) + assert extract_content_or_reasoning(response) == "The key insight is..." + + def test_reasoning_fields_not_duplicated(self): + """When reasoning and reasoning_content have the same value, don't duplicate.""" + response = _make_response(None, reasoning="same text", reasoning_content="same text") + assert extract_content_or_reasoning(response) == "same text" + + def test_multiple_reasoning_sources_combined(self): + """Different reasoning sources are joined with double newline.""" + response = _make_response( + None, + reasoning="First part", + reasoning_content="Second part", + ) + result = extract_content_or_reasoning(response) + assert "First part" in result + assert "Second part" in result + + def test_content_preferred_over_reasoning(self): + """When both content and reasoning exist, content wins.""" + response = _make_response("Actual answer", reasoning="Internal reasoning") + assert extract_content_or_reasoning(response) == "Actual answer" diff --git a/tools/mixture_of_agents_tool.py b/tools/mixture_of_agents_tool.py index 18d8840c1..9367a3f1e 100644 --- a/tools/mixture_of_agents_tool.py +++ b/tools/mixture_of_agents_tool.py @@ -52,6 +52,7 @@ import asyncio import datetime from typing import Dict, Any, List, Optional from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key +from agent.auxiliary_client import extract_content_or_reasoning from tools.debug_helpers import DebugSession logger = logging.getLogger(__name__) @@ -143,7 +144,13 @@ async def _run_reference_model_safe( response = await _get_openrouter_client().chat.completions.create(**api_params) - content = response.choices[0].message.content.strip() + content = extract_content_or_reasoning(response) + if not content: + # Reasoning-only response — let the retry loop handle it + logger.warning("%s returned empty content (attempt %s/%s), retrying", model, attempt + 1, max_retries) + if attempt < max_retries - 1: + await asyncio.sleep(min(2 ** (attempt + 1), 60)) + continue logger.info("%s responded (%s characters)", model, len(content)) return model, content, True @@ -211,7 +218,14 @@ async def _run_aggregator_model( response = await _get_openrouter_client().chat.completions.create(**api_params) - content = response.choices[0].message.content.strip() + content = extract_content_or_reasoning(response) + + # Retry once on empty content (reasoning-only response) + if not content: + logger.warning("Aggregator returned empty content, retrying once") + response = await _get_openrouter_client().chat.completions.create(**api_params) + content = extract_content_or_reasoning(response) + logger.info("Aggregation complete (%s characters)", len(content)) return content diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index 235585270..d7cabcc9e 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -21,7 +21,7 @@ import json import logging from typing import Dict, Any, List, Optional, Union -from agent.auxiliary_client import async_call_llm +from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning MAX_SESSION_CHARS = 100_000 MAX_SUMMARY_TOKENS = 10000 @@ -161,7 +161,15 @@ async def _summarize_session( temperature=0.1, max_tokens=MAX_SUMMARY_TOKENS, ) - return response.choices[0].message.content.strip() + content = extract_content_or_reasoning(response) + if content: + return content + # Reasoning-only / empty — let the retry loop handle it + logging.warning("Session search LLM returned empty content (attempt %d/%d)", attempt + 1, max_retries) + if attempt < max_retries - 1: + await asyncio.sleep(1 * (attempt + 1)) + continue + return content except RuntimeError: logging.warning("No auxiliary model available for session summarization") return None diff --git a/tools/skills_guard.py b/tools/skills_guard.py index 217863af5..d22b7d294 100644 --- a/tools/skills_guard.py +++ b/tools/skills_guard.py @@ -948,9 +948,9 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult, # Call the LLM via the centralized provider router try: - from agent.auxiliary_client import call_llm + from agent.auxiliary_client import call_llm, extract_content_or_reasoning - response = call_llm( + call_kwargs = dict( provider="openrouter", model=model, messages=[{ @@ -960,7 +960,13 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult, temperature=0, max_tokens=1000, ) - llm_text = response.choices[0].message.content.strip() + response = call_llm(**call_kwargs) + llm_text = extract_content_or_reasoning(response) + + # Retry once on empty content (reasoning-only response) + if not llm_text: + response = call_llm(**call_kwargs) + llm_text = extract_content_or_reasoning(response) except Exception: # LLM audit is best-effort — don't block install if the call fails return static_result diff --git a/tools/vision_tools.py b/tools/vision_tools.py index fe81032b0..d8b96bc4e 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -37,7 +37,7 @@ from pathlib import Path from typing import Any, Awaitable, Dict, Optional from urllib.parse import urlparse import httpx -from agent.auxiliary_client import async_call_llm +from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning from tools.debug_helpers import DebugSession logger = logging.getLogger(__name__) @@ -346,8 +346,15 @@ async def vision_analyze_tool( call_kwargs["model"] = model response = await async_call_llm(**call_kwargs) - # Extract the analysis - analysis = response.choices[0].message.content.strip() + # Extract the analysis — fall back to reasoning if content is empty + analysis = extract_content_or_reasoning(response) + + # Retry once on empty content (reasoning-only response) + if not analysis: + logger.warning("Vision LLM returned empty content, retrying once") + response = await async_call_llm(**call_kwargs) + analysis = extract_content_or_reasoning(response) + analysis_length = len(analysis) logger.info("Image analysis completed (%s characters)", analysis_length) diff --git a/tools/web_tools.py b/tools/web_tools.py index 38ad8cf00..3677930d8 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -44,7 +44,7 @@ import asyncio from typing import List, Dict, Any, Optional import httpx from firecrawl import Firecrawl -from agent.auxiliary_client import async_call_llm +from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning from tools.debug_helpers import DebugSession from tools.url_safety import is_safe_url from tools.website_policy import check_website_access @@ -416,7 +416,16 @@ Create a markdown summary that captures all key information in a well-organized, if model: call_kwargs["model"] = model response = await async_call_llm(**call_kwargs) - return response.choices[0].message.content.strip() + content = extract_content_or_reasoning(response) + if content: + return content + # Reasoning-only / empty response — let the retry loop handle it + logger.warning("LLM returned empty content (attempt %d/%d), retrying", attempt + 1, max_retries) + if attempt < max_retries - 1: + await asyncio.sleep(retry_delay) + retry_delay = min(retry_delay * 2, 60) + continue + return content # Return whatever we got after exhausting retries except RuntimeError: logger.warning("No auxiliary model available for web content processing") return None @@ -535,8 +544,14 @@ Create a single, unified markdown summary.""" if model: call_kwargs["model"] = model response = await async_call_llm(**call_kwargs) - final_summary = response.choices[0].message.content.strip() - + final_summary = extract_content_or_reasoning(response) + + # Retry once on empty content (reasoning-only response) + if not final_summary: + logger.warning("Synthesis LLM returned empty content, retrying once") + response = await async_call_llm(**call_kwargs) + final_summary = extract_content_or_reasoning(response) + # Enforce hard cap if len(final_summary) > max_output_size: final_summary = final_summary[:max_output_size] + "\n\n[... summary truncated for context management ...]"