diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index d19d50da6..81342f6bb 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -26,8 +26,16 @@ logger = logging.getLogger(__name__) THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000} -# Beta headers required for OAuth/subscription auth -_OAUTH_BETAS = ["oauth-2025-04-20"] +# Beta headers for enhanced features (sent with ALL auth types) +_COMMON_BETAS = [ + "interleaved-thinking-2025-05-14", + "fine-grained-tool-streaming-2025-05-14", +] + +# Additional beta headers required for OAuth/subscription auth +_OAUTH_ONLY_BETAS = [ + "oauth-2025-04-20", +] def _is_oauth_token(key: str) -> bool: @@ -54,12 +62,15 @@ def build_anthropic_client(api_key: str, base_url: str = None): kwargs["base_url"] = base_url if _is_oauth_token(api_key): - # OAuth access token / setup-token → Bearer auth + beta header + # OAuth access token / setup-token → Bearer auth + beta headers + all_betas = _COMMON_BETAS + _OAUTH_ONLY_BETAS kwargs["auth_token"] = api_key - kwargs["default_headers"] = {"anthropic-beta": ",".join(_OAUTH_BETAS)} + kwargs["default_headers"] = {"anthropic-beta": ",".join(all_betas)} else: - # Regular API key → x-api-key header + # Regular API key → x-api-key header + common betas kwargs["api_key"] = api_key + if _COMMON_BETAS: + kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)} return _anthropic_sdk.Anthropic(**kwargs) @@ -173,6 +184,58 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]: return result +def _convert_vision_content(content: Any) -> Any: + """Convert OpenAI multimodal content blocks to Anthropic format. + + OpenAI format: [{"type": "image_url", "image_url": {"url": "data:...;base64,..."}}] + Anthropic format: [{"type": "image", "source": {"type": "base64", ...}}] + """ + if not isinstance(content, list): + return content + + result = [] + for block in content: + if not isinstance(block, dict): + result.append(block) + continue + + if block.get("type") == "image_url": + image_url = block.get("image_url", {}) + url = image_url.get("url", "") if isinstance(image_url, dict) else "" + + if url.startswith("data:"): + # data:image/png;base64,iVBOR... + try: + header, b64_data = url.split(",", 1) + media_type = header.split(":")[1].split(";")[0] + result.append({ + "type": "image", + "source": { + "type": "base64", + "media_type": media_type, + "data": b64_data, + }, + }) + except (ValueError, IndexError): + logger.warning("Could not parse data URL for image, skipping") + else: + # Regular URL — Anthropic supports url source type + result.append({ + "type": "image", + "source": { + "type": "url", + "url": url, + }, + }) + elif block.get("type") == "text": + result.append({"type": "text", "text": block.get("text", "")}) + else: + # Pass through unknown block types + result.append(block) + + return result + + def convert_messages_to_anthropic( messages: List[Dict], ) -> Tuple[Optional[Any], List[Dict]]: @@ -241,8 +304,9 @@ def convert_messages_to_anthropic( result.append({"role": "user", "content": [tool_result]}) continue - # Regular user message - result.append({"role": "user", "content": content}) + # Regular user message — convert vision content if multimodal + converted = _convert_vision_content(content) if isinstance(content, list) else content + result.append({"role": "user", "content": converted}) # Strip orphaned tool_use blocks (no matching tool_result follows) tool_result_ids = set() @@ -261,6 +325,40 @@ def convert_messages_to_anthropic( if not m["content"]: m["content"] = [{"type": "text", "text": "(tool call removed)"}] + # Enforce strict role alternation (Anthropic rejects consecutive same-role messages) + fixed = [] + for m in result: + if fixed and fixed[-1]["role"] == m["role"]: + if m["role"] == "user": + # Merge consecutive user messages + prev_content = fixed[-1]["content"] + curr_content = m["content"] + if isinstance(prev_content, str) and isinstance(curr_content, str): + fixed[-1]["content"] = prev_content + "\n" + curr_content + elif isinstance(prev_content, list) and isinstance(curr_content, list): + fixed[-1]["content"] = prev_content + curr_content + else: + # Mixed types — wrap string in list + if isinstance(prev_content, str): + prev_content = [{"type": "text", "text": prev_content}] + if isinstance(curr_content, str): + curr_content = [{"type": "text", "text": curr_content}] + fixed[-1]["content"] = prev_content + curr_content + else: + # Consecutive assistant messages — merge text content + prev_blocks = fixed[-1]["content"] + curr_blocks = m["content"] + if isinstance(prev_blocks, list) and isinstance(curr_blocks, list): + fixed[-1]["content"] = prev_blocks + curr_blocks + elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str): + fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks + else: + # Keep the later message + fixed[-1] = m + else: + fixed.append(m) + result = fixed + return system, result @@ -270,6 +368,7 @@ def build_anthropic_kwargs( tools: Optional[List[Dict]], max_tokens: Optional[int], reasoning_config: Optional[Dict[str, Any]], + tool_choice: Optional[str] = None, ) -> Dict[str, Any]: """Build kwargs for anthropic.messages.create().""" system, anthropic_messages = convert_messages_to_anthropic(messages) @@ -289,6 +388,16 @@ def build_anthropic_kwargs( if anthropic_tools: kwargs["tools"] = anthropic_tools + # Map OpenAI tool_choice to Anthropic format + if tool_choice == "auto" or tool_choice is None: + kwargs["tool_choice"] = {"type": "auto"} + elif tool_choice == "required": + kwargs["tool_choice"] = {"type": "any"} + elif tool_choice == "none": + pass # Don't send tool_choice — Anthropic will use tools if needed + elif isinstance(tool_choice, str): + # Specific tool name + kwargs["tool_choice"] = {"type": "tool", "name": tool_choice} # Map reasoning_config to Anthropic's thinking parameter if reasoning_config and isinstance(reasoning_config, dict): diff --git a/run_agent.py b/run_agent.py index 20a05d88b..4c9798140 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3830,9 +3830,15 @@ class AIAgent: # Log cache hit stats when prompt caching is active if self._use_prompt_caching: - details = getattr(response.usage, 'prompt_tokens_details', None) - cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0 - written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0 + if self.api_mode == "anthropic_messages": + # Anthropic uses cache_read_input_tokens / cache_creation_input_tokens + cached = getattr(response.usage, 'cache_read_input_tokens', 0) or 0 + written = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0 + else: + # OpenRouter uses prompt_tokens_details.cached_tokens + details = getattr(response.usage, 'prompt_tokens_details', None) + cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0 + written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0 prompt = usage_dict["prompt_tokens"] hit_pct = (cached / prompt * 100) if prompt > 0 else 0 if not self.quiet_mode: @@ -3882,6 +3888,19 @@ class AIAgent: if self._try_refresh_nous_client_credentials(force=True): print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...") continue + if ( + self.api_mode == "anthropic_messages" + and status_code == 401 + and hasattr(self, '_anthropic_api_key') + ): + # Try re-reading Claude Code credentials (they may have been refreshed) + from agent.anthropic_adapter import resolve_anthropic_token, build_anthropic_client + new_token = resolve_anthropic_token() + if new_token and new_token != self._anthropic_api_key: + self._anthropic_api_key = new_token + self._anthropic_client = build_anthropic_client(new_token) + print(f"{self.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request...") + continue retry_count += 1 elapsed_time = time.time() - api_start_time diff --git a/tests/test_anthropic_adapter.py b/tests/test_anthropic_adapter.py index 54c722f93..e00c60712 100644 --- a/tests/test_anthropic_adapter.py +++ b/tests/test_anthropic_adapter.py @@ -43,7 +43,10 @@ class TestBuildAnthropicClient: build_anthropic_client("sk-ant-oat01-" + "x" * 60) kwargs = mock_sdk.Anthropic.call_args[1] assert "auth_token" in kwargs - assert "oauth-2025-04-20" in kwargs["default_headers"]["anthropic-beta"] + betas = kwargs["default_headers"]["anthropic-beta"] + assert "oauth-2025-04-20" in betas + assert "interleaved-thinking-2025-05-14" in betas + assert "fine-grained-tool-streaming-2025-05-14" in betas assert "api_key" not in kwargs def test_api_key_uses_api_key(self): @@ -52,6 +55,10 @@ class TestBuildAnthropicClient: kwargs = mock_sdk.Anthropic.call_args[1] assert kwargs["api_key"] == "sk-ant-api03-something" assert "auth_token" not in kwargs + # API key auth should still get common betas + betas = kwargs["default_headers"]["anthropic-beta"] + assert "interleaved-thinking-2025-05-14" in betas + assert "oauth-2025-04-20" not in betas # OAuth-only beta NOT present def test_custom_base_url(self): with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk: @@ -404,3 +411,119 @@ class TestNormalizeResponse: ) assert msg.content is None assert len(msg.tool_calls) == 1 + + +# --------------------------------------------------------------------------- +# Vision content conversion +# --------------------------------------------------------------------------- + + +class TestVisionContentConversion: + def test_base64_image(self): + from agent.anthropic_adapter import _convert_vision_content + + content = [ + {"type": "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR"}}, + ] + result = _convert_vision_content(content) + assert result[0] == {"type": "text", "text": "What's in this image?"} + assert result[1]["type"] == "image" + assert result[1]["source"]["type"] == "base64" + assert result[1]["source"]["media_type"] == "image/png" + assert result[1]["source"]["data"] == "iVBOR" + + def test_url_image(self): + from agent.anthropic_adapter import _convert_vision_content + + content = [ + {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}}, + ] + result = _convert_vision_content(content) + assert result[0]["type"] == "image" + assert result[0]["source"]["type"] == "url" + assert result[0]["source"]["url"] == "https://example.com/img.png" + + def test_passthrough_non_list(self): + from agent.anthropic_adapter import _convert_vision_content + + assert _convert_vision_content("plain text") == "plain text" + + +# --------------------------------------------------------------------------- +# Role alternation +# --------------------------------------------------------------------------- + + +class TestRoleAlternation: + def test_merges_consecutive_user_messages(self): + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "user", "content": "World"}, + ] + _, result = convert_messages_to_anthropic(messages) + assert len(result) == 1 + assert result[0]["role"] == "user" + assert "Hello" in result[0]["content"] + assert "World" in result[0]["content"] + + def test_preserves_proper_alternation(self): + messages = [ + {"role": "user", "content": "Hi"}, + {"role": "assistant", "content": "Hello!"}, + {"role": "user", "content": "How are you?"}, + ] + _, result = convert_messages_to_anthropic(messages) + assert len(result) == 3 + assert [m["role"] for m in result] == ["user", "assistant", "user"] + + +# --------------------------------------------------------------------------- +# Tool choice +# --------------------------------------------------------------------------- + + +class TestToolChoice: + _DUMMY_TOOL = [ + { + "type": "function", + "function": { + "name": "test", + "description": "x", + "parameters": {"type": "object", "properties": {}}, + }, + } + ] + + def test_auto_tool_choice(self): + kwargs = build_anthropic_kwargs( + model="claude-sonnet-4-20250514", + messages=[{"role": "user", "content": "Hi"}], + tools=self._DUMMY_TOOL, + max_tokens=4096, + reasoning_config=None, + tool_choice="auto", + ) + assert kwargs["tool_choice"] == {"type": "auto"} + + def test_required_tool_choice(self): + kwargs = build_anthropic_kwargs( + model="claude-sonnet-4-20250514", + messages=[{"role": "user", "content": "Hi"}], + tools=self._DUMMY_TOOL, + max_tokens=4096, + reasoning_config=None, + tool_choice="required", + ) + assert kwargs["tool_choice"] == {"type": "any"} + + def test_specific_tool_choice(self): + kwargs = build_anthropic_kwargs( + model="claude-sonnet-4-20250514", + messages=[{"role": "user", "content": "Hi"}], + tools=self._DUMMY_TOOL, + max_tokens=4096, + reasoning_config=None, + tool_choice="search", + ) + assert kwargs["tool_choice"] == {"type": "tool", "name": "search"}