fix(agent): update context compressor limits after fallback activation (#3305)

When _try_activate_fallback() switches to the fallback model, it updates the agent's model/provider/client but never touches self.context_compressor. The compressor keeps the primary model's context_length and threshold_tokens, so compression decisions use wrong limits — a 200K primary → 32K fallback still uses 200K-based thresholds, causing oversized sessions to overflow the fallback. Update the compressor's model, credentials, context_length, and threshold_tokens after fallback activation using get_model_context_length() for the new model. Cherry-picked from PR #3202 by binhnt92. Co-authored-by: binhnt92 <binhnt.ht.92@gmail.com>
2026-03-26 18:10:50 -07:00
parent 18d28c63a7
commit 60fdb58ce4
2 changed files with 108 additions and 0 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -4134,6 +4134,25 @@ class AIAgent:
                or is_native_anthropic
            )

+            # Update context compressor limits for the fallback model.
+            # Without this, compression decisions use the primary model's
+            # context window (e.g. 200K) instead of the fallback's (e.g. 32K),
+            # causing oversized sessions to overflow the fallback.
+            if hasattr(self, 'context_compressor') and self.context_compressor:
+                from agent.model_metadata import get_model_context_length
+                fb_context_length = get_model_context_length(
+                    self.model, base_url=self.base_url,
+                    api_key=self.api_key, provider=self.provider,
+                )
+                self.context_compressor.model = self.model
+                self.context_compressor.base_url = self.base_url
+                self.context_compressor.api_key = self.api_key
+                self.context_compressor.provider = self.provider
+                self.context_compressor.context_length = fb_context_length
+                self.context_compressor.threshold_tokens = int(
+                    fb_context_length * self.context_compressor.threshold_percent
+                )
+
            self._emit_status(
                f"🔄 Primary model failed — switching to fallback: "
                f"{fb_model} via {fb_provider}"
--- a/tests/test_compressor_fallback_update.py
+++ b/tests/test_compressor_fallback_update.py
@@ -0,0 +1,89 @@
+"""Tests that _try_activate_fallback updates the context compressor."""
+
+from unittest.mock import MagicMock, patch
+
+from run_agent import AIAgent
+from agent.context_compressor import ContextCompressor
+
+
+def _make_agent_with_compressor() -> AIAgent:
+    """Build a minimal AIAgent with a context_compressor, skipping __init__."""
+    agent = AIAgent.__new__(AIAgent)
+
+    # Primary model settings
+    agent.model = "primary-model"
+    agent.provider = "openrouter"
+    agent.base_url = "https://openrouter.ai/api/v1"
+    agent.api_key = "sk-primary"
+    agent.api_mode = "chat_completions"
+    agent.client = MagicMock()
+    agent.quiet_mode = True
+
+    # Fallback config
+    agent._fallback_activated = False
+    agent._fallback_model = {
+        "provider": "openai",
+        "model": "gpt-4o",
+    }
+
+    # Context compressor with primary model values
+    compressor = ContextCompressor(
+        model="primary-model",
+        threshold_percent=0.50,
+        base_url="https://openrouter.ai/api/v1",
+        api_key="sk-primary",
+        provider="openrouter",
+        quiet_mode=True,
+    )
+    agent.context_compressor = compressor
+
+    return agent
+
+
+@patch("agent.auxiliary_client.resolve_provider_client")
+@patch("agent.model_metadata.get_model_context_length", return_value=128_000)
+def test_compressor_updated_on_fallback(mock_ctx_len, mock_resolve):
+    """After fallback activation, the compressor must reflect the fallback model."""
+    agent = _make_agent_with_compressor()
+
+    assert agent.context_compressor.model == "primary-model"
+
+    fb_client = MagicMock()
+    fb_client.base_url = "https://api.openai.com/v1"
+    fb_client.api_key = "sk-fallback"
+    mock_resolve.return_value = (fb_client, None)
+
+    agent._is_direct_openai_url = lambda url: "api.openai.com" in url
+    agent._emit_status = lambda msg: None
+
+    result = agent._try_activate_fallback()
+
+    assert result is True
+    assert agent._fallback_activated is True
+
+    c = agent.context_compressor
+    assert c.model == "gpt-4o"
+    assert c.base_url == "https://api.openai.com/v1"
+    assert c.api_key == "sk-fallback"
+    assert c.provider == "openai"
+    assert c.context_length == 128_000
+    assert c.threshold_tokens == int(128_000 * c.threshold_percent)
+
+
+@patch("agent.auxiliary_client.resolve_provider_client")
+@patch("agent.model_metadata.get_model_context_length", return_value=128_000)
+def test_compressor_not_present_does_not_crash(mock_ctx_len, mock_resolve):
+    """If the agent has no compressor, fallback should still succeed."""
+    agent = _make_agent_with_compressor()
+    agent.context_compressor = None
+
+    fb_client = MagicMock()
+    fb_client.base_url = "https://api.openai.com/v1"
+    fb_client.api_key = "sk-fallback"
+    mock_resolve.return_value = (fb_client, None)
+
+    agent._is_direct_openai_url = lambda url: "api.openai.com" in url
+    agent._emit_status = lambda msg: None
+
+    result = agent._try_activate_fallback()
+    assert result is True