fix(anthropic): send fast mode speed via extra_body
This commit is contained in:
@@ -1230,9 +1230,10 @@ def build_anthropic_kwargs(
|
||||
When *base_url* points to a third-party Anthropic-compatible endpoint,
|
||||
thinking block signatures are stripped (they are Anthropic-proprietary).
|
||||
|
||||
When *fast_mode* is True, adds ``speed: "fast"`` and the fast-mode beta
|
||||
header for ~2.5x faster output throughput on Opus 4.6. Currently only
|
||||
supported on native Anthropic endpoints (not third-party compatible ones).
|
||||
When *fast_mode* is True, adds ``extra_body["speed"] = "fast"`` and the
|
||||
fast-mode beta header for ~2.5x faster output throughput on Opus 4.6.
|
||||
Currently only supported on native Anthropic endpoints (not third-party
|
||||
compatible ones).
|
||||
"""
|
||||
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
|
||||
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
||||
@@ -1333,11 +1334,11 @@ def build_anthropic_kwargs(
|
||||
kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
|
||||
|
||||
# ── Fast mode (Opus 4.6 only) ────────────────────────────────────
|
||||
# Adds speed:"fast" + the fast-mode beta header for ~2.5x output speed.
|
||||
# Only for native Anthropic endpoints — third-party providers would
|
||||
# reject the unknown beta header and speed parameter.
|
||||
# Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
|
||||
# output speed. Only for native Anthropic endpoints — third-party
|
||||
# providers would reject the unknown beta header and speed parameter.
|
||||
if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
|
||||
kwargs["speed"] = "fast"
|
||||
kwargs.setdefault("extra_body", {})["speed"] = "fast"
|
||||
# Build extra_headers with ALL applicable betas (the per-request
|
||||
# extra_headers override the client-level anthropic-beta header).
|
||||
betas = list(_common_betas_for_base_url(base_url))
|
||||
|
||||
@@ -369,7 +369,8 @@ class TestAnthropicFastModeAdapter(unittest.TestCase):
|
||||
reasoning_config=None,
|
||||
fast_mode=True,
|
||||
)
|
||||
assert kwargs.get("speed") == "fast"
|
||||
assert kwargs.get("extra_body", {}).get("speed") == "fast"
|
||||
assert "speed" not in kwargs
|
||||
assert "extra_headers" in kwargs
|
||||
assert _FAST_MODE_BETA in kwargs["extra_headers"].get("anthropic-beta", "")
|
||||
|
||||
@@ -384,6 +385,7 @@ class TestAnthropicFastModeAdapter(unittest.TestCase):
|
||||
reasoning_config=None,
|
||||
fast_mode=False,
|
||||
)
|
||||
assert kwargs.get("extra_body", {}).get("speed") is None
|
||||
assert "speed" not in kwargs
|
||||
assert "extra_headers" not in kwargs
|
||||
|
||||
@@ -400,9 +402,24 @@ class TestAnthropicFastModeAdapter(unittest.TestCase):
|
||||
base_url="https://api.minimax.io/anthropic/v1",
|
||||
)
|
||||
# Third-party endpoints should NOT get speed or fast-mode beta
|
||||
assert kwargs.get("extra_body", {}).get("speed") is None
|
||||
assert "speed" not in kwargs
|
||||
assert "extra_headers" not in kwargs
|
||||
|
||||
def test_fast_mode_kwargs_are_safe_for_sdk_unpacking(self):
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="claude-opus-4-6",
|
||||
messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}],
|
||||
tools=None,
|
||||
max_tokens=None,
|
||||
reasoning_config=None,
|
||||
fast_mode=True,
|
||||
)
|
||||
assert "speed" not in kwargs
|
||||
assert kwargs.get("extra_body", {}).get("speed") == "fast"
|
||||
|
||||
|
||||
class TestConfigDefault(unittest.TestCase):
|
||||
def test_default_config_has_service_tier(self):
|
||||
|
||||
Reference in New Issue
Block a user