On the native Anthropic Messages API path, convert_messages_to_anthropic() moves top-level cache_control on role:tool messages inside the tool_result block. On OpenRouter (chat_completions), no such conversion happens — the unexpected top-level field causes a silent hang on the second tool call. Add native_anthropic parameter to _apply_cache_marker() and apply_anthropic_cache_control(). When False (OpenRouter), role:tool messages are skipped entirely. When True (native Anthropic), existing behaviour is preserved. Fixes #2362
73 lines
2.2 KiB
Python
73 lines
2.2 KiB
Python
"""Anthropic prompt caching (system_and_3 strategy).
|
|
|
|
Reduces input token costs by ~75% on multi-turn conversations by caching
|
|
the conversation prefix. Uses 4 cache_control breakpoints (Anthropic max):
|
|
1. System prompt (stable across all turns)
|
|
2-4. Last 3 non-system messages (rolling window)
|
|
|
|
Pure functions -- no class state, no AIAgent dependency.
|
|
"""
|
|
|
|
import copy
|
|
from typing import Any, Dict, List
|
|
|
|
|
|
def _apply_cache_marker(msg: dict, cache_marker: dict, native_anthropic: bool = False) -> None:
|
|
"""Add cache_control to a single message, handling all format variations."""
|
|
role = msg.get("role", "")
|
|
content = msg.get("content")
|
|
|
|
if role == "tool":
|
|
if native_anthropic:
|
|
msg["cache_control"] = cache_marker
|
|
return
|
|
|
|
if content is None or content == "":
|
|
msg["cache_control"] = cache_marker
|
|
return
|
|
|
|
if isinstance(content, str):
|
|
msg["content"] = [
|
|
{"type": "text", "text": content, "cache_control": cache_marker}
|
|
]
|
|
return
|
|
|
|
if isinstance(content, list) and content:
|
|
last = content[-1]
|
|
if isinstance(last, dict):
|
|
last["cache_control"] = cache_marker
|
|
|
|
|
|
def apply_anthropic_cache_control(
|
|
api_messages: List[Dict[str, Any]],
|
|
cache_ttl: str = "5m",
|
|
native_anthropic: bool = False,
|
|
) -> List[Dict[str, Any]]:
|
|
"""Apply system_and_3 caching strategy to messages for Anthropic models.
|
|
|
|
Places up to 4 cache_control breakpoints: system prompt + last 3 non-system messages.
|
|
|
|
Returns:
|
|
Deep copy of messages with cache_control breakpoints injected.
|
|
"""
|
|
messages = copy.deepcopy(api_messages)
|
|
if not messages:
|
|
return messages
|
|
|
|
marker = {"type": "ephemeral"}
|
|
if cache_ttl == "1h":
|
|
marker["ttl"] = "1h"
|
|
|
|
breakpoints_used = 0
|
|
|
|
if messages[0].get("role") == "system":
|
|
_apply_cache_marker(messages[0], marker, native_anthropic=native_anthropic)
|
|
breakpoints_used += 1
|
|
|
|
remaining = 4 - breakpoints_used
|
|
non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"]
|
|
for idx in non_sys[-remaining:]:
|
|
_apply_cache_marker(messages[idx], marker, native_anthropic=native_anthropic)
|
|
|
|
return messages
|