Merge pull request #1128 from ASRagab/fix/adaptive-thinking-budget-tokens

fix: use adaptive thinking without budget_tokens for Claude 4.6 models
2026-03-12 19:32:46 -07:00
parent 9dfa81ab4b 3dc148ab6f
commit 15911d70c0
2 changed files with 51 additions and 8 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -25,6 +25,19 @@ except ImportError:
 logger = logging.getLogger(__name__)

 THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
+ADAPTIVE_EFFORT_MAP = {
+    "xhigh": "max",
+    "high": "high",
+    "medium": "medium",
+    "low": "low",
+    "minimal": "low",
+}
+
+
+def _supports_adaptive_thinking(model: str) -> bool:
+    """Return True for Claude 4.6 models that support adaptive thinking."""
+    return any(v in model for v in ("4-6", "4.6"))
+

 # Beta headers for enhanced features (sent with ALL auth types)
 _COMMON_BETAS = [
@@ -398,20 +411,23 @@ def build_anthropic_kwargs(
            # Specific tool name
            kwargs["tool_choice"] = {"type": "tool", "name": tool_choice}

-    # Map reasoning_config to Anthropic's thinking parameter
-    # Newer models (4.6+) prefer "adaptive" thinking; older models use "enabled"
+    # Map reasoning_config to Anthropic's thinking parameter.
+    # Claude 4.6 models use adaptive thinking + output_config.effort.
+    # Older models use manual thinking with budget_tokens.
    if reasoning_config and isinstance(reasoning_config, dict):
        if reasoning_config.get("enabled") is not False:
-            effort = reasoning_config.get("effort", "medium")
+            effort = str(reasoning_config.get("effort", "medium")).lower()
            budget = THINKING_BUDGET.get(effort, 8000)
-            # Use adaptive thinking for 4.5+ models (they deprecate type=enabled)
-            if any(v in model for v in ("4-6", "4-5", "4.6", "4.5")):
-                kwargs["thinking"] = {"type": "adaptive", "budget_tokens": budget}
+            if _supports_adaptive_thinking(model):
+                kwargs["thinking"] = {"type": "adaptive"}
+                kwargs["output_config"] = {
+                    "effort": ADAPTIVE_EFFORT_MAP.get(effort, "medium")
+                }
            else:
                kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
                # Anthropic requires temperature=1 when thinking is enabled on older models
                kwargs["temperature"] = 1
-            kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
+                kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)

    return kwargs

--- a/tests/test_anthropic_adapter.py
+++ b/tests/test_anthropic_adapter.py
@@ -314,7 +314,7 @@ class TestBuildAnthropicKwargs:
        )
        assert kwargs["model"] == "claude-sonnet-4-20250514"

-    def test_reasoning_config_maps_to_thinking(self):
+    def test_reasoning_config_maps_to_manual_thinking_for_pre_4_6_models(self):
        kwargs = build_anthropic_kwargs(
            model="claude-sonnet-4-20250514",
            messages=[{"role": "user", "content": "think hard"}],
@@ -324,7 +324,34 @@ class TestBuildAnthropicKwargs:
        )
        assert kwargs["thinking"]["type"] == "enabled"
        assert kwargs["thinking"]["budget_tokens"] == 16000
+        assert kwargs["temperature"] == 1
        assert kwargs["max_tokens"] >= 16000 + 4096
+        assert "output_config" not in kwargs
+
+    def test_reasoning_config_maps_to_adaptive_thinking_for_4_6_models(self):
+        kwargs = build_anthropic_kwargs(
+            model="claude-opus-4-6",
+            messages=[{"role": "user", "content": "think hard"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "high"},
+        )
+        assert kwargs["thinking"] == {"type": "adaptive"}
+        assert kwargs["output_config"] == {"effort": "high"}
+        assert "budget_tokens" not in kwargs["thinking"]
+        assert "temperature" not in kwargs
+        assert kwargs["max_tokens"] == 4096
+
+    def test_reasoning_config_maps_xhigh_to_max_effort_for_4_6_models(self):
+        kwargs = build_anthropic_kwargs(
+            model="claude-sonnet-4-6",
+            messages=[{"role": "user", "content": "think harder"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "xhigh"},
+        )
+        assert kwargs["thinking"] == {"type": "adaptive"}
+        assert kwargs["output_config"] == {"effort": "max"}

    def test_reasoning_disabled(self):
        kwargs = build_anthropic_kwargs(