From da02a4e283e4fc7aca249d19d0652925cbe5177b Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 6 Apr 2026 12:41:40 -0700 Subject: [PATCH] =?UTF-8?q?fix:=20auxiliary=20client=20payment=20fallback?= =?UTF-8?q?=20=E2=80=94=20retry=20with=20next=20provider=20on=20402=20(#55?= =?UTF-8?q?99)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user runs out of OpenRouter credits and switches to Codex (or any other provider), auxiliary tasks (compression, vision, web_extract) would still try OpenRouter first and fail with 402. Two fixes: 1. Payment fallback in call_llm(): When a resolved provider returns HTTP 402 or a credit-related error, automatically retry with the next available provider in the auto-detection chain. Skips the depleted provider and tries Nous → Custom → Codex → API-key providers. 2. Remove hardcoded OpenRouter fallback: The old code fell back specifically to OpenRouter when auto/custom resolution returned no client. Now falls back to the full auto-detection chain, which handles any available provider — not just OpenRouter. Also extracts _get_provider_chain() as a shared function (replaces inline tuple in _resolve_auto and the new fallback), built at call time so test patches on _try_* functions remain visible. Adds 16 tests covering _is_payment_error(), _get_provider_chain(), _try_payment_fallback(), and call_llm() integration with 402 retry. --- agent/auxiliary_client.py | 131 +++++++++++++++++-- tests/agent/test_auxiliary_client.py | 184 +++++++++++++++++++++++++++ 2 files changed, 304 insertions(+), 11 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 5cceeb9e3..95d5def0a 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -34,6 +34,12 @@ than the provider's default. Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL, AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a custom OpenAI-compatible endpoint without touching the main model settings. + +Payment / credit exhaustion fallback: + When a resolved provider returns HTTP 402 or a credit-related error, + call_llm() automatically retries with the next available provider in the + auto-detection chain. This handles the common case where a user depletes + their OpenRouter balance but has Codex OAuth or another provider available. """ import json @@ -874,10 +880,90 @@ _AUTO_PROVIDER_LABELS = { "_resolve_api_key_provider": "api-key", } - _AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"}) +def _get_provider_chain() -> List[tuple]: + """Return the ordered provider detection chain. + + Built at call time (not module level) so that test patches + on the ``_try_*`` functions are picked up correctly. + """ + return [ + ("openrouter", _try_openrouter), + ("nous", _try_nous), + ("local/custom", _try_custom_endpoint), + ("openai-codex", _try_codex), + ("api-key", _resolve_api_key_provider), + ] + + +def _is_payment_error(exc: Exception) -> bool: + """Detect payment/credit/quota exhaustion errors. + + Returns True for HTTP 402 (Payment Required) and for 429/other errors + whose message indicates billing exhaustion rather than rate limiting. + """ + status = getattr(exc, "status_code", None) + if status == 402: + return True + err_lower = str(exc).lower() + # OpenRouter and other providers include "credits" or "afford" in 402 bodies, + # but sometimes wrap them in 429 or other codes. + if status in (402, 429, None): + if any(kw in err_lower for kw in ("credits", "insufficient funds", + "can only afford", "billing", + "payment required")): + return True + return False + + +def _try_payment_fallback( + failed_provider: str, + task: str = None, +) -> Tuple[Optional[Any], Optional[str], str]: + """Try alternative providers after a payment/credit error. + + Iterates the standard auto-detection chain, skipping the provider that + returned a payment error. + + Returns: + (client, model, provider_label) or (None, None, "") if no fallback. + """ + # Normalise the failed provider label for matching. + skip = failed_provider.lower().strip() + # Also skip Step-1 main-provider path if it maps to the same backend. + # (e.g. main_provider="openrouter" → skip "openrouter" in chain) + main_provider = _read_main_provider() + skip_labels = {skip} + if main_provider and main_provider.lower() in skip: + skip_labels.add(main_provider.lower()) + # Map common resolved_provider values back to chain labels. + _alias_to_label = {"openrouter": "openrouter", "nous": "nous", + "openai-codex": "openai-codex", "codex": "openai-codex", + "custom": "local/custom", "local/custom": "local/custom"} + skip_chain_labels = {_alias_to_label.get(s, s) for s in skip_labels} + + tried = [] + for label, try_fn in _get_provider_chain(): + if label in skip_chain_labels: + continue + client, model = try_fn() + if client is not None: + logger.info( + "Auxiliary %s: payment error on %s — falling back to %s (%s)", + task or "call", failed_provider, label, model or "default", + ) + return client, model, label + tried.append(label) + + logger.warning( + "Auxiliary %s: payment error on %s and no fallback available (tried: %s)", + task or "call", failed_provider, ", ".join(tried), + ) + return None, None, "" + + def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]: """Full auto-detection chain. @@ -905,10 +991,7 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]: # ── Step 2: aggregator / fallback chain ────────────────────────────── tried = [] - for try_fn in (_try_openrouter, _try_nous, _try_custom_endpoint, - _try_codex, _resolve_api_key_provider): - fn_name = getattr(try_fn, "__name__", "unknown") - label = _AUTO_PROVIDER_LABELS.get(fn_name, fn_name) + for label, try_fn in _get_provider_chain(): client, model = try_fn() if client is not None: if tried: @@ -1786,12 +1869,15 @@ def call_llm( f"was found. Set the {_explicit.upper()}_API_KEY environment " f"variable, or switch to a different provider with `hermes model`." ) - # For auto/custom, fall back to OpenRouter + # For auto/custom with no credentials, try the full auto chain + # rather than hardcoding OpenRouter (which may be depleted). + # Pass model=None so each provider uses its own default — + # resolved_model may be an OpenRouter-format slug that doesn't + # work on other providers. if not resolved_base_url: - logger.info("Auxiliary %s: provider %s unavailable, falling back to openrouter", + logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain", task or "call", resolved_provider) - client, final_model = _get_cached_client( - "openrouter", resolved_model or _OPENROUTER_MODEL) + client, final_model = _get_cached_client("auto") if client is None: raise RuntimeError( f"No LLM provider configured for task={task} provider={resolved_provider}. " @@ -1812,7 +1898,7 @@ def call_llm( tools=tools, timeout=effective_timeout, extra_body=extra_body, base_url=resolved_base_url) - # Handle max_tokens vs max_completion_tokens retry + # Handle max_tokens vs max_completion_tokens retry, then payment fallback. try: return client.chat.completions.create(**kwargs) except Exception as first_err: @@ -1820,7 +1906,30 @@ def call_llm( if "max_tokens" in err_str or "unsupported_parameter" in err_str: kwargs.pop("max_tokens", None) kwargs["max_completion_tokens"] = max_tokens - return client.chat.completions.create(**kwargs) + try: + return client.chat.completions.create(**kwargs) + except Exception as retry_err: + # If the max_tokens retry also hits a payment error, + # fall through to the payment fallback below. + if not _is_payment_error(retry_err): + raise + first_err = retry_err + + # ── Payment / credit exhaustion fallback ────────────────────── + # When the resolved provider returns 402 or a credit-related error, + # try alternative providers instead of giving up. This handles the + # common case where a user runs out of OpenRouter credits but has + # Codex OAuth or another provider available. + if _is_payment_error(first_err): + fb_client, fb_model, fb_label = _try_payment_fallback( + resolved_provider, task) + if fb_client is not None: + fb_kwargs = _build_call_kwargs( + fb_label, fb_model, messages, + temperature=temperature, max_tokens=max_tokens, + tools=tools, timeout=effective_timeout, + extra_body=extra_body) + return fb_client.chat.completions.create(**fb_kwargs) raise diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index eb03a64c9..32f481988 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -14,8 +14,12 @@ from agent.auxiliary_client import ( resolve_vision_provider_client, resolve_provider_client, auxiliary_max_tokens_param, + call_llm, _read_codex_access_token, _get_auxiliary_provider, + _get_provider_chain, + _is_payment_error, + _try_payment_fallback, _resolve_forced_provider, _resolve_auto, ) @@ -1106,3 +1110,183 @@ class TestAuxiliaryMaxTokensParam: patch("agent.auxiliary_client._read_codex_access_token", return_value=None): result = auxiliary_max_tokens_param(1024) assert result == {"max_tokens": 1024} + + +# ── Payment / credit exhaustion fallback ───────────────────────────────── + + +class TestIsPaymentError: + """_is_payment_error detects 402 and credit-related errors.""" + + def test_402_status_code(self): + exc = Exception("Payment Required") + exc.status_code = 402 + assert _is_payment_error(exc) is True + + def test_402_with_credits_message(self): + exc = Exception("You requested up to 65535 tokens, but can only afford 8029") + exc.status_code = 402 + assert _is_payment_error(exc) is True + + def test_429_with_credits_message(self): + exc = Exception("insufficient credits remaining") + exc.status_code = 429 + assert _is_payment_error(exc) is True + + def test_429_without_credits_message_is_not_payment(self): + """Normal rate limits should NOT be treated as payment errors.""" + exc = Exception("Rate limit exceeded, try again in 2 seconds") + exc.status_code = 429 + assert _is_payment_error(exc) is False + + def test_generic_500_is_not_payment(self): + exc = Exception("Internal server error") + exc.status_code = 500 + assert _is_payment_error(exc) is False + + def test_no_status_code_with_billing_message(self): + exc = Exception("billing: payment required for this request") + assert _is_payment_error(exc) is True + + def test_no_status_code_no_message(self): + exc = Exception("connection reset") + assert _is_payment_error(exc) is False + + +class TestGetProviderChain: + """_get_provider_chain() resolves functions at call time (testable).""" + + def test_returns_five_entries(self): + chain = _get_provider_chain() + assert len(chain) == 5 + labels = [label for label, _ in chain] + assert labels == ["openrouter", "nous", "local/custom", "openai-codex", "api-key"] + + def test_picks_up_patched_functions(self): + """Patches on _try_* functions must be visible in the chain.""" + sentinel = lambda: ("patched", "model") + with patch("agent.auxiliary_client._try_openrouter", sentinel): + chain = _get_provider_chain() + assert chain[0] == ("openrouter", sentinel) + + +class TestTryPaymentFallback: + """_try_payment_fallback skips the failed provider and tries alternatives.""" + + def test_skips_failed_provider(self): + mock_client = MagicMock() + with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \ + patch("agent.auxiliary_client._try_nous", return_value=(mock_client, "nous-model")), \ + patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"): + client, model, label = _try_payment_fallback("openrouter", task="compression") + assert client is mock_client + assert model == "nous-model" + assert label == "nous" + + def test_returns_none_when_no_fallback(self): + with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \ + patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \ + patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \ + patch("agent.auxiliary_client._try_codex", return_value=(None, None)), \ + patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \ + patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"): + client, model, label = _try_payment_fallback("openrouter") + assert client is None + assert label == "" + + def test_codex_alias_maps_to_chain_label(self): + """'codex' should map to 'openai-codex' in the skip set.""" + mock_client = MagicMock() + with patch("agent.auxiliary_client._try_openrouter", return_value=(mock_client, "or-model")), \ + patch("agent.auxiliary_client._try_codex", return_value=(None, None)), \ + patch("agent.auxiliary_client._read_main_provider", return_value="openai-codex"): + client, model, label = _try_payment_fallback("openai-codex", task="vision") + assert client is mock_client + assert label == "openrouter" + + def test_skips_to_codex_when_or_and_nous_fail(self): + mock_codex = MagicMock() + with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \ + patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \ + patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \ + patch("agent.auxiliary_client._try_codex", return_value=(mock_codex, "gpt-5.2-codex")), \ + patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"): + client, model, label = _try_payment_fallback("openrouter") + assert client is mock_codex + assert model == "gpt-5.2-codex" + assert label == "openai-codex" + + +class TestCallLlmPaymentFallback: + """call_llm() retries with a different provider on 402 / payment errors.""" + + def _make_402_error(self, msg="Payment Required: insufficient credits"): + exc = Exception(msg) + exc.status_code = 402 + return exc + + def test_402_triggers_fallback(self, monkeypatch): + """When the primary provider returns 402, call_llm tries the next one.""" + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + + primary_client = MagicMock() + primary_client.chat.completions.create.side_effect = self._make_402_error() + + fallback_client = MagicMock() + fallback_response = MagicMock() + fallback_client.chat.completions.create.return_value = fallback_response + + with patch("agent.auxiliary_client._get_cached_client", + return_value=(primary_client, "google/gemini-3-flash-preview")), \ + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("openrouter", "google/gemini-3-flash-preview", None, None)), \ + patch("agent.auxiliary_client._try_payment_fallback", + return_value=(fallback_client, "gpt-5.2-codex", "openai-codex")) as mock_fb: + result = call_llm( + task="compression", + messages=[{"role": "user", "content": "hello"}], + ) + + assert result is fallback_response + mock_fb.assert_called_once_with("openrouter", "compression") + # Fallback call should use the fallback model + fb_kwargs = fallback_client.chat.completions.create.call_args.kwargs + assert fb_kwargs["model"] == "gpt-5.2-codex" + + def test_non_payment_error_not_caught(self, monkeypatch): + """Non-payment errors (500, connection, etc.) should NOT trigger fallback.""" + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + + primary_client = MagicMock() + server_err = Exception("Internal Server Error") + server_err.status_code = 500 + primary_client.chat.completions.create.side_effect = server_err + + with patch("agent.auxiliary_client._get_cached_client", + return_value=(primary_client, "google/gemini-3-flash-preview")), \ + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("openrouter", "google/gemini-3-flash-preview", None, None)): + with pytest.raises(Exception, match="Internal Server Error"): + call_llm( + task="compression", + messages=[{"role": "user", "content": "hello"}], + ) + + def test_402_with_no_fallback_reraises(self, monkeypatch): + """When 402 hits and no fallback is available, the original error propagates.""" + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + + primary_client = MagicMock() + primary_client.chat.completions.create.side_effect = self._make_402_error() + + with patch("agent.auxiliary_client._get_cached_client", + return_value=(primary_client, "google/gemini-3-flash-preview")), \ + patch("agent.auxiliary_client._resolve_task_provider_model", + return_value=("openrouter", "google/gemini-3-flash-preview", None, None)), \ + patch("agent.auxiliary_client._try_payment_fallback", + return_value=(None, None, "")): + with pytest.raises(Exception, match="insufficient credits"): + call_llm( + task="compression", + messages=[{"role": "user", "content": "hello"}], + )