diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index fb5044137..a2a052d0a 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -35,6 +35,54 @@ ADAPTIVE_EFFORT_MAP = { "minimal": "low", } +# ── Max output token limits per Anthropic model ─────────────────────── +# Source: Anthropic docs + Cline model catalog. Anthropic's API requires +# max_tokens as a mandatory field. Previously we hardcoded 16384, which +# starves thinking-enabled models (thinking tokens count toward the limit). +_ANTHROPIC_OUTPUT_LIMITS = { + # Claude 4.6 + "claude-opus-4-6": 128_000, + "claude-sonnet-4-6": 64_000, + # Claude 4.5 + "claude-opus-4-5": 64_000, + "claude-sonnet-4-5": 64_000, + "claude-haiku-4-5": 64_000, + # Claude 4 + "claude-opus-4": 32_000, + "claude-sonnet-4": 64_000, + # Claude 3.7 + "claude-3-7-sonnet": 128_000, + # Claude 3.5 + "claude-3-5-sonnet": 8_192, + "claude-3-5-haiku": 8_192, + # Claude 3 + "claude-3-opus": 4_096, + "claude-3-sonnet": 4_096, + "claude-3-haiku": 4_096, +} + +# For any model not in the table, assume the highest current limit. +# Future Anthropic models are unlikely to have *less* output capacity. +_ANTHROPIC_DEFAULT_OUTPUT_LIMIT = 128_000 + + +def _get_anthropic_max_output(model: str) -> int: + """Look up the max output token limit for an Anthropic model. + + Uses substring matching against _ANTHROPIC_OUTPUT_LIMITS so date-stamped + model IDs (claude-sonnet-4-5-20250929) and variant suffixes (:1m, :fast) + resolve correctly. Longest-prefix match wins to avoid e.g. "claude-3-5" + matching before "claude-3-5-sonnet". + """ + m = model.lower() + best_key = "" + best_val = _ANTHROPIC_DEFAULT_OUTPUT_LIMIT + for key, val in _ANTHROPIC_OUTPUT_LIMITS.items(): + if key in m and len(key) > len(best_key): + best_key = key + best_val = val + return best_val + def _supports_adaptive_thinking(model: str) -> bool: """Return True for Claude 4.6 models that support adaptive thinking.""" @@ -818,9 +866,15 @@ def build_anthropic_kwargs( tool_choice: Optional[str] = None, is_oauth: bool = False, preserve_dots: bool = False, + context_length: Optional[int] = None, ) -> Dict[str, Any]: """Build kwargs for anthropic.messages.create(). + When *max_tokens* is None, the model's native output limit is used + (e.g. 128K for Opus 4.6, 64K for Sonnet 4.6). If *context_length* + is provided, the effective limit is clamped so it doesn't exceed + the context window. + When *is_oauth* is True, applies Claude Code compatibility transforms: system prompt prefix, tool name prefixing, and prompt sanitization. @@ -831,7 +885,12 @@ def build_anthropic_kwargs( anthropic_tools = convert_tools_to_anthropic(tools) if tools else [] model = normalize_model_name(model, preserve_dots=preserve_dots) - effective_max_tokens = max_tokens or 16384 + effective_max_tokens = max_tokens or _get_anthropic_max_output(model) + + # Clamp to context window if the user set a lower context_length + # (e.g. custom endpoint with limited capacity). + if context_length and effective_max_tokens > context_length: + effective_max_tokens = max(context_length - 1, 1) # ── OAuth: Claude Code identity ────────────────────────────────── if is_oauth: diff --git a/run_agent.py b/run_agent.py index 8a3fcf614..6a0eb7442 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4378,6 +4378,10 @@ class AIAgent: if self.api_mode == "anthropic_messages": from agent.anthropic_adapter import build_anthropic_kwargs anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages) + # Pass context_length so the adapter can clamp max_tokens if the + # user configured a smaller context window than the model's output limit. + ctx_len = getattr(self, "context_compressor", None) + ctx_len = ctx_len.context_length if ctx_len else None return build_anthropic_kwargs( model=self.model, messages=anthropic_messages, @@ -4386,6 +4390,7 @@ class AIAgent: reasoning_config=self.reasoning_config, is_oauth=self._is_anthropic_oauth, preserve_dots=self._anthropic_preserve_dots(), + context_length=ctx_len, ) if self.api_mode == "codex_responses": diff --git a/tests/test_anthropic_adapter.py b/tests/test_anthropic_adapter.py index 00f780988..7e2e1c767 100644 --- a/tests/test_anthropic_adapter.py +++ b/tests/test_anthropic_adapter.py @@ -926,7 +926,8 @@ class TestBuildAnthropicKwargs: ) assert "thinking" not in kwargs - def test_default_max_tokens(self): + def test_default_max_tokens_uses_model_output_limit(self): + """When max_tokens is None, use the model's native output limit.""" kwargs = build_anthropic_kwargs( model="claude-sonnet-4-20250514", messages=[{"role": "user", "content": "Hi"}], @@ -934,7 +935,135 @@ class TestBuildAnthropicKwargs: max_tokens=None, reasoning_config=None, ) - assert kwargs["max_tokens"] == 16384 + assert kwargs["max_tokens"] == 64_000 # Sonnet 4 output limit + + def test_default_max_tokens_opus_4_6(self): + kwargs = build_anthropic_kwargs( + model="claude-opus-4-6", + messages=[{"role": "user", "content": "Hi"}], + tools=None, + max_tokens=None, + reasoning_config=None, + ) + assert kwargs["max_tokens"] == 128_000 + + def test_default_max_tokens_sonnet_4_6(self): + kwargs = build_anthropic_kwargs( + model="claude-sonnet-4-6", + messages=[{"role": "user", "content": "Hi"}], + tools=None, + max_tokens=None, + reasoning_config=None, + ) + assert kwargs["max_tokens"] == 64_000 + + def test_default_max_tokens_date_stamped_model(self): + """Date-stamped model IDs should resolve via substring match.""" + kwargs = build_anthropic_kwargs( + model="claude-sonnet-4-5-20250929", + messages=[{"role": "user", "content": "Hi"}], + tools=None, + max_tokens=None, + reasoning_config=None, + ) + assert kwargs["max_tokens"] == 64_000 + + def test_default_max_tokens_older_model(self): + kwargs = build_anthropic_kwargs( + model="claude-3-5-sonnet-20241022", + messages=[{"role": "user", "content": "Hi"}], + tools=None, + max_tokens=None, + reasoning_config=None, + ) + assert kwargs["max_tokens"] == 8_192 + + def test_default_max_tokens_unknown_model_uses_highest(self): + """Unknown future models should get the highest known limit.""" + kwargs = build_anthropic_kwargs( + model="claude-ultra-5-20260101", + messages=[{"role": "user", "content": "Hi"}], + tools=None, + max_tokens=None, + reasoning_config=None, + ) + assert kwargs["max_tokens"] == 128_000 + + def test_explicit_max_tokens_overrides_default(self): + """User-specified max_tokens should be respected.""" + kwargs = build_anthropic_kwargs( + model="claude-opus-4-6", + messages=[{"role": "user", "content": "Hi"}], + tools=None, + max_tokens=4096, + reasoning_config=None, + ) + assert kwargs["max_tokens"] == 4096 + + def test_context_length_clamp(self): + """max_tokens should be clamped to context_length if it's smaller.""" + kwargs = build_anthropic_kwargs( + model="claude-opus-4-6", # 128K output + messages=[{"role": "user", "content": "Hi"}], + tools=None, + max_tokens=None, + reasoning_config=None, + context_length=50000, + ) + assert kwargs["max_tokens"] == 49999 # context_length - 1 + + def test_context_length_no_clamp_when_larger(self): + """No clamping when context_length exceeds output limit.""" + kwargs = build_anthropic_kwargs( + model="claude-sonnet-4-6", # 64K output + messages=[{"role": "user", "content": "Hi"}], + tools=None, + max_tokens=None, + reasoning_config=None, + context_length=200000, + ) + assert kwargs["max_tokens"] == 64_000 + + +# --------------------------------------------------------------------------- +# Model output limit lookup +# --------------------------------------------------------------------------- + + +class TestGetAnthropicMaxOutput: + def test_opus_4_6(self): + from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-opus-4-6") == 128_000 + + def test_opus_4_6_variant(self): + from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-opus-4-6:1m:fast") == 128_000 + + def test_sonnet_4_6(self): + from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-sonnet-4-6") == 64_000 + + def test_sonnet_4_date_stamped(self): + from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-sonnet-4-20250514") == 64_000 + + def test_claude_3_5_sonnet(self): + from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-3-5-sonnet-20241022") == 8_192 + + def test_claude_3_opus(self): + from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-3-opus-20240229") == 4_096 + + def test_unknown_future_model(self): + from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("claude-ultra-5-20260101") == 128_000 + + def test_longest_prefix_wins(self): + """'claude-3-5-sonnet' should match before 'claude-3-5'.""" + from agent.anthropic_adapter import _get_anthropic_max_output + # claude-3-5-sonnet (8192) should win over a hypothetical shorter match + assert _get_anthropic_max_output("claude-3-5-sonnet-20241022") == 8_192 # ---------------------------------------------------------------------------