diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index fb5044137..a2a052d0a 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -35,6 +35,54 @@ ADAPTIVE_EFFORT_MAP = {
     "minimal": "low",
 }
 
+# ── Max output token limits per Anthropic model ───────────────────────
+# Source: Anthropic docs + Cline model catalog.  Anthropic's API requires
+# max_tokens as a mandatory field.  Previously we hardcoded 16384, which
+# starves thinking-enabled models (thinking tokens count toward the limit).
+_ANTHROPIC_OUTPUT_LIMITS = {
+    # Claude 4.6
+    "claude-opus-4-6":   128_000,
+    "claude-sonnet-4-6":  64_000,
+    # Claude 4.5
+    "claude-opus-4-5":    64_000,
+    "claude-sonnet-4-5":  64_000,
+    "claude-haiku-4-5":   64_000,
+    # Claude 4
+    "claude-opus-4":      32_000,
+    "claude-sonnet-4":    64_000,
+    # Claude 3.7
+    "claude-3-7-sonnet": 128_000,
+    # Claude 3.5
+    "claude-3-5-sonnet":   8_192,
+    "claude-3-5-haiku":    8_192,
+    # Claude 3
+    "claude-3-opus":       4_096,
+    "claude-3-sonnet":     4_096,
+    "claude-3-haiku":      4_096,
+}
+
+# For any model not in the table, assume the highest current limit.
+# Future Anthropic models are unlikely to have *less* output capacity.
+_ANTHROPIC_DEFAULT_OUTPUT_LIMIT = 128_000
+
+
+def _get_anthropic_max_output(model: str) -> int:
+    """Look up the max output token limit for an Anthropic model.
+
+    Uses substring matching against _ANTHROPIC_OUTPUT_LIMITS so date-stamped
+    model IDs (claude-sonnet-4-5-20250929) and variant suffixes (:1m, :fast)
+    resolve correctly.  Longest-prefix match wins to avoid e.g. "claude-3-5"
+    matching before "claude-3-5-sonnet".
+    """
+    m = model.lower()
+    best_key = ""
+    best_val = _ANTHROPIC_DEFAULT_OUTPUT_LIMIT
+    for key, val in _ANTHROPIC_OUTPUT_LIMITS.items():
+        if key in m and len(key) > len(best_key):
+            best_key = key
+            best_val = val
+    return best_val
+
 
 def _supports_adaptive_thinking(model: str) -> bool:
     """Return True for Claude 4.6 models that support adaptive thinking."""
@@ -818,9 +866,15 @@ def build_anthropic_kwargs(
     tool_choice: Optional[str] = None,
     is_oauth: bool = False,
     preserve_dots: bool = False,
+    context_length: Optional[int] = None,
 ) -> Dict[str, Any]:
     """Build kwargs for anthropic.messages.create().
 
+    When *max_tokens* is None, the model's native output limit is used
+    (e.g. 128K for Opus 4.6, 64K for Sonnet 4.6).  If *context_length*
+    is provided, the effective limit is clamped so it doesn't exceed
+    the context window.
+
     When *is_oauth* is True, applies Claude Code compatibility transforms:
     system prompt prefix, tool name prefixing, and prompt sanitization.
 
@@ -831,7 +885,12 @@ def build_anthropic_kwargs(
     anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
 
     model = normalize_model_name(model, preserve_dots=preserve_dots)
-    effective_max_tokens = max_tokens or 16384
+    effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
+
+    # Clamp to context window if the user set a lower context_length
+    # (e.g. custom endpoint with limited capacity).
+    if context_length and effective_max_tokens > context_length:
+        effective_max_tokens = max(context_length - 1, 1)
 
     # ── OAuth: Claude Code identity ──────────────────────────────────
     if is_oauth:
diff --git a/run_agent.py b/run_agent.py
index 8a3fcf614..6a0eb7442 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -4378,6 +4378,10 @@ class AIAgent:
         if self.api_mode == "anthropic_messages":
             from agent.anthropic_adapter import build_anthropic_kwargs
             anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages)
+            # Pass context_length so the adapter can clamp max_tokens if the
+            # user configured a smaller context window than the model's output limit.
+            ctx_len = getattr(self, "context_compressor", None)
+            ctx_len = ctx_len.context_length if ctx_len else None
             return build_anthropic_kwargs(
                 model=self.model,
                 messages=anthropic_messages,
@@ -4386,6 +4390,7 @@ class AIAgent:
                 reasoning_config=self.reasoning_config,
                 is_oauth=self._is_anthropic_oauth,
                 preserve_dots=self._anthropic_preserve_dots(),
+                context_length=ctx_len,
             )
 
         if self.api_mode == "codex_responses":
diff --git a/tests/test_anthropic_adapter.py b/tests/test_anthropic_adapter.py
index 00f780988..7e2e1c767 100644
--- a/tests/test_anthropic_adapter.py
+++ b/tests/test_anthropic_adapter.py
@@ -926,7 +926,8 @@ class TestBuildAnthropicKwargs:
         )
         assert "thinking" not in kwargs
 
-    def test_default_max_tokens(self):
+    def test_default_max_tokens_uses_model_output_limit(self):
+        """When max_tokens is None, use the model's native output limit."""
         kwargs = build_anthropic_kwargs(
             model="claude-sonnet-4-20250514",
             messages=[{"role": "user", "content": "Hi"}],
@@ -934,7 +935,135 @@ class TestBuildAnthropicKwargs:
             max_tokens=None,
             reasoning_config=None,
         )
-        assert kwargs["max_tokens"] == 16384
+        assert kwargs["max_tokens"] == 64_000  # Sonnet 4 output limit
+
+    def test_default_max_tokens_opus_4_6(self):
+        kwargs = build_anthropic_kwargs(
+            model="claude-opus-4-6",
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=None,
+            max_tokens=None,
+            reasoning_config=None,
+        )
+        assert kwargs["max_tokens"] == 128_000
+
+    def test_default_max_tokens_sonnet_4_6(self):
+        kwargs = build_anthropic_kwargs(
+            model="claude-sonnet-4-6",
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=None,
+            max_tokens=None,
+            reasoning_config=None,
+        )
+        assert kwargs["max_tokens"] == 64_000
+
+    def test_default_max_tokens_date_stamped_model(self):
+        """Date-stamped model IDs should resolve via substring match."""
+        kwargs = build_anthropic_kwargs(
+            model="claude-sonnet-4-5-20250929",
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=None,
+            max_tokens=None,
+            reasoning_config=None,
+        )
+        assert kwargs["max_tokens"] == 64_000
+
+    def test_default_max_tokens_older_model(self):
+        kwargs = build_anthropic_kwargs(
+            model="claude-3-5-sonnet-20241022",
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=None,
+            max_tokens=None,
+            reasoning_config=None,
+        )
+        assert kwargs["max_tokens"] == 8_192
+
+    def test_default_max_tokens_unknown_model_uses_highest(self):
+        """Unknown future models should get the highest known limit."""
+        kwargs = build_anthropic_kwargs(
+            model="claude-ultra-5-20260101",
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=None,
+            max_tokens=None,
+            reasoning_config=None,
+        )
+        assert kwargs["max_tokens"] == 128_000
+
+    def test_explicit_max_tokens_overrides_default(self):
+        """User-specified max_tokens should be respected."""
+        kwargs = build_anthropic_kwargs(
+            model="claude-opus-4-6",
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config=None,
+        )
+        assert kwargs["max_tokens"] == 4096
+
+    def test_context_length_clamp(self):
+        """max_tokens should be clamped to context_length if it's smaller."""
+        kwargs = build_anthropic_kwargs(
+            model="claude-opus-4-6",  # 128K output
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=None,
+            max_tokens=None,
+            reasoning_config=None,
+            context_length=50000,
+        )
+        assert kwargs["max_tokens"] == 49999  # context_length - 1
+
+    def test_context_length_no_clamp_when_larger(self):
+        """No clamping when context_length exceeds output limit."""
+        kwargs = build_anthropic_kwargs(
+            model="claude-sonnet-4-6",  # 64K output
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=None,
+            max_tokens=None,
+            reasoning_config=None,
+            context_length=200000,
+        )
+        assert kwargs["max_tokens"] == 64_000
+
+
+# ---------------------------------------------------------------------------
+# Model output limit lookup
+# ---------------------------------------------------------------------------
+
+
+class TestGetAnthropicMaxOutput:
+    def test_opus_4_6(self):
+        from agent.anthropic_adapter import _get_anthropic_max_output
+        assert _get_anthropic_max_output("claude-opus-4-6") == 128_000
+
+    def test_opus_4_6_variant(self):
+        from agent.anthropic_adapter import _get_anthropic_max_output
+        assert _get_anthropic_max_output("claude-opus-4-6:1m:fast") == 128_000
+
+    def test_sonnet_4_6(self):
+        from agent.anthropic_adapter import _get_anthropic_max_output
+        assert _get_anthropic_max_output("claude-sonnet-4-6") == 64_000
+
+    def test_sonnet_4_date_stamped(self):
+        from agent.anthropic_adapter import _get_anthropic_max_output
+        assert _get_anthropic_max_output("claude-sonnet-4-20250514") == 64_000
+
+    def test_claude_3_5_sonnet(self):
+        from agent.anthropic_adapter import _get_anthropic_max_output
+        assert _get_anthropic_max_output("claude-3-5-sonnet-20241022") == 8_192
+
+    def test_claude_3_opus(self):
+        from agent.anthropic_adapter import _get_anthropic_max_output
+        assert _get_anthropic_max_output("claude-3-opus-20240229") == 4_096
+
+    def test_unknown_future_model(self):
+        from agent.anthropic_adapter import _get_anthropic_max_output
+        assert _get_anthropic_max_output("claude-ultra-5-20260101") == 128_000
+
+    def test_longest_prefix_wins(self):
+        """'claude-3-5-sonnet' should match before 'claude-3-5'."""
+        from agent.anthropic_adapter import _get_anthropic_max_output
+        # claude-3-5-sonnet (8192) should win over a hypothetical shorter match
+        assert _get_anthropic_max_output("claude-3-5-sonnet-20241022") == 8_192
 
 
 # ---------------------------------------------------------------------------