From 9f81c11ba08b25dc7dc0f7d7393db9bbd17000ec Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 17 Mar 2026 04:21:16 -0700 Subject: [PATCH] feat: eager fallback to backup model on rate-limit errors (#1730) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a fallback model is configured, switch to it immediately upon detecting rate-limit conditions (429, quota exhaustion, empty/malformed responses) instead of exhausting all retries with exponential backoff. Two eager-fallback checks: 1. Invalid/empty API responses — fallback attempted before retry loop 2. HTTP 429 / rate-limit keyword detection — fallback before backoff Both guarded by _fallback_activated for one-shot semantics. Cherry-picked from PR #1413 by usvimal. Co-authored-by: usvimal --- run_agent.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/run_agent.py b/run_agent.py index 3a0938667..1133d0771 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5132,6 +5132,13 @@ class AIAgent: # This is often rate limiting or provider returning malformed response retry_count += 1 + # Eager fallback: empty/malformed responses are a common + # rate-limit symptom. Switch to fallback immediately + # rather than retrying with extended backoff. + if not self._fallback_activated and self._try_activate_fallback(): + retry_count = 0 + continue + # Check for error field in response (some providers include this) error_msg = "Unknown" provider_name = "Unknown" @@ -5485,6 +5492,24 @@ class AIAgent: # A 413 is a payload-size error — the correct response is to # compress history and retry, not abort immediately. status_code = getattr(api_error, "status_code", None) + + # Eager fallback for rate-limit errors (429 or quota exhaustion). + # When a fallback model is configured, switch immediately instead + # of burning through retries with exponential backoff -- the + # primary provider won't recover within the retry window. + is_rate_limited = ( + status_code == 429 + or "rate limit" in error_msg + or "too many requests" in error_msg + or "rate_limit" in error_msg + or "usage limit" in error_msg + or "quota" in error_msg + ) + if is_rate_limited and not self._fallback_activated: + if self._try_activate_fallback(): + retry_count = 0 + continue + is_payload_too_large = ( status_code == 413 or 'request entity too large' in error_msg