feat: centralized provider router + fix Codex vision bypass + vision error handling
Three interconnected fixes for auxiliary client infrastructure:
1. CENTRALIZED PROVIDER ROUTER (auxiliary_client.py)
Add resolve_provider_client(provider, model, async_mode) — a single
entry point for creating properly configured clients. Given a provider
name and optional model, it handles auth lookup (env vars, OAuth
tokens, auth.json), base URL resolution, provider-specific headers,
and API format differences (Chat Completions vs Responses API for
Codex). All auxiliary consumers should route through this instead of
ad-hoc env var lookups.
Refactored get_text_auxiliary_client, get_async_text_auxiliary_client,
and get_vision_auxiliary_client to use the router internally.
2. FIX CODEX VISION BYPASS (vision_tools.py)
vision_tools.py was constructing a raw AsyncOpenAI client from the
sync vision client's api_key/base_url, completely bypassing the Codex
Responses API adapter. When the vision provider resolved to Codex,
the raw client would hit chatgpt.com/backend-api/codex with
chat.completions.create() which only supports the Responses API.
Fix: Added get_async_vision_auxiliary_client() which properly wraps
Codex into AsyncCodexAuxiliaryClient. vision_tools.py now uses this
instead of manual client construction.
3. FIX COMPRESSION FALLBACK + VISION ERROR HANDLING
- context_compressor.py: Removed _get_fallback_client() which blindly
looked for OPENAI_API_KEY + OPENAI_BASE_URL (fails for Codex OAuth,
API-key providers, users without OPENAI_BASE_URL set). Replaced
with fallback loop through resolve_provider_client() for each
known provider, with same-provider dedup.
- vision_tools.py: Added error detection for vision capability
failures. Returns clear message to the model when the configured
model doesn't support vision, instead of a generic error.
Addresses #886
This commit is contained in:
@@ -499,6 +499,188 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
return None, None
|
||||
|
||||
|
||||
# ── Centralized Provider Router ─────────────────────────────────────────────
|
||||
#
|
||||
# resolve_provider_client() is the single entry point for creating a properly
|
||||
# configured client given a (provider, model) pair. It handles auth lookup,
|
||||
# base URL resolution, provider-specific headers, and API format differences
|
||||
# (Chat Completions vs Responses API for Codex).
|
||||
#
|
||||
# All auxiliary consumer code should go through this or the public helpers
|
||||
# below — never look up auth env vars ad-hoc.
|
||||
|
||||
|
||||
def _to_async_client(sync_client, model: str):
|
||||
"""Convert a sync client to its async counterpart, preserving Codex routing."""
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
if isinstance(sync_client, CodexAuxiliaryClient):
|
||||
return AsyncCodexAuxiliaryClient(sync_client), model
|
||||
|
||||
async_kwargs = {
|
||||
"api_key": sync_client.api_key,
|
||||
"base_url": str(sync_client.base_url),
|
||||
}
|
||||
base_lower = str(sync_client.base_url).lower()
|
||||
if "openrouter" in base_lower:
|
||||
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
elif "api.kimi.com" in base_lower:
|
||||
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
||||
return AsyncOpenAI(**async_kwargs), model
|
||||
|
||||
|
||||
def resolve_provider_client(
|
||||
provider: str,
|
||||
model: str = None,
|
||||
async_mode: bool = False,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Central router: given a provider name and optional model, return a
|
||||
configured client with the correct auth, base URL, and API format.
|
||||
|
||||
The returned client always exposes ``.chat.completions.create()`` — for
|
||||
Codex/Responses API providers, an adapter handles the translation
|
||||
transparently.
|
||||
|
||||
Args:
|
||||
provider: Provider identifier. One of:
|
||||
"openrouter", "nous", "openai-codex" (or "codex"),
|
||||
"zai", "kimi-coding", "minimax", "minimax-cn", "nous-api",
|
||||
"custom" (OPENAI_BASE_URL + OPENAI_API_KEY),
|
||||
"auto" (full auto-detection chain).
|
||||
model: Model slug override. If None, uses the provider's default
|
||||
auxiliary model.
|
||||
async_mode: If True, return an async-compatible client.
|
||||
|
||||
Returns:
|
||||
(client, resolved_model) or (None, None) if auth is unavailable.
|
||||
"""
|
||||
# Normalise aliases
|
||||
provider = (provider or "auto").strip().lower()
|
||||
if provider == "codex":
|
||||
provider = "openai-codex"
|
||||
if provider == "main":
|
||||
provider = "custom"
|
||||
|
||||
# ── Auto: try all providers in priority order ────────────────────
|
||||
if provider == "auto":
|
||||
client, resolved = _resolve_auto()
|
||||
if client is None:
|
||||
return None, None
|
||||
final_model = model or resolved
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── OpenRouter ───────────────────────────────────────────────────
|
||||
if provider == "openrouter":
|
||||
client, default = _try_openrouter()
|
||||
if client is None:
|
||||
logger.warning("resolve_provider_client: openrouter requested "
|
||||
"but OPENROUTER_API_KEY not set")
|
||||
return None, None
|
||||
final_model = model or default
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── Nous Portal (OAuth) ──────────────────────────────────────────
|
||||
if provider == "nous":
|
||||
client, default = _try_nous()
|
||||
if client is None:
|
||||
logger.warning("resolve_provider_client: nous requested "
|
||||
"but Nous Portal not configured (run: hermes login)")
|
||||
return None, None
|
||||
final_model = model or default
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
|
||||
if provider == "openai-codex":
|
||||
client, default = _try_codex()
|
||||
if client is None:
|
||||
logger.warning("resolve_provider_client: openai-codex requested "
|
||||
"but no Codex OAuth token found (run: hermes model)")
|
||||
return None, None
|
||||
final_model = model or default
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
||||
if provider == "custom":
|
||||
# Try custom first, then codex, then API-key providers
|
||||
for try_fn in (_try_custom_endpoint, _try_codex,
|
||||
_resolve_api_key_provider):
|
||||
client, default = try_fn()
|
||||
if client is not None:
|
||||
final_model = model or default
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning("resolve_provider_client: custom/main requested "
|
||||
"but no endpoint credentials found")
|
||||
return None, None
|
||||
|
||||
# ── API-key providers from PROVIDER_REGISTRY ─────────────────────
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY, _resolve_kimi_base_url
|
||||
except ImportError:
|
||||
logger.debug("hermes_cli.auth not available for provider %s", provider)
|
||||
return None, None
|
||||
|
||||
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||
if pconfig is None:
|
||||
logger.warning("resolve_provider_client: unknown provider %r", provider)
|
||||
return None, None
|
||||
|
||||
if pconfig.auth_type == "api_key":
|
||||
# Find the first configured API key
|
||||
api_key = ""
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
api_key = os.getenv(env_var, "").strip()
|
||||
if api_key:
|
||||
break
|
||||
if not api_key:
|
||||
logger.warning("resolve_provider_client: provider %s has no API "
|
||||
"key configured (tried: %s)",
|
||||
provider, ", ".join(pconfig.api_key_env_vars))
|
||||
return None, None
|
||||
|
||||
# Resolve base URL (env override → provider-specific logic → default)
|
||||
base_url_override = os.getenv(pconfig.base_url_env_var, "").strip() if pconfig.base_url_env_var else ""
|
||||
if provider == "kimi-coding":
|
||||
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, base_url_override)
|
||||
elif base_url_override:
|
||||
base_url = base_url_override
|
||||
else:
|
||||
base_url = pconfig.inference_base_url
|
||||
|
||||
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
|
||||
final_model = model or default_model
|
||||
|
||||
# Provider-specific headers
|
||||
headers = {}
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
headers["User-Agent"] = "KimiCLI/1.0"
|
||||
|
||||
client = OpenAI(api_key=api_key, base_url=base_url,
|
||||
**({"default_headers": headers} if headers else {}))
|
||||
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
|
||||
# OAuth providers — route through their specific try functions
|
||||
if provider == "nous":
|
||||
return resolve_provider_client("nous", model, async_mode)
|
||||
if provider == "openai-codex":
|
||||
return resolve_provider_client("openai-codex", model, async_mode)
|
||||
# nous-api is api_key type so it's handled above
|
||||
logger.warning("resolve_provider_client: OAuth provider %s not "
|
||||
"directly supported, try 'auto'", provider)
|
||||
return None, None
|
||||
|
||||
logger.warning("resolve_provider_client: unhandled auth_type %s for %s",
|
||||
pconfig.auth_type, provider)
|
||||
return None, None
|
||||
|
||||
|
||||
# ── Public API ──────────────────────────────────────────────────────────────
|
||||
|
||||
def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
@@ -513,8 +695,8 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
|
||||
"""
|
||||
forced = _get_auxiliary_provider(task)
|
||||
if forced != "auto":
|
||||
return _resolve_forced_provider(forced)
|
||||
return _resolve_auto()
|
||||
return resolve_provider_client(forced)
|
||||
return resolve_provider_client("auto")
|
||||
|
||||
|
||||
def get_async_text_auxiliary_client(task: str = ""):
|
||||
@@ -524,24 +706,10 @@ def get_async_text_auxiliary_client(task: str = ""):
|
||||
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
||||
Returns (None, None) when no provider is available.
|
||||
"""
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
sync_client, model = get_text_auxiliary_client(task)
|
||||
if sync_client is None:
|
||||
return None, None
|
||||
|
||||
if isinstance(sync_client, CodexAuxiliaryClient):
|
||||
return AsyncCodexAuxiliaryClient(sync_client), model
|
||||
|
||||
async_kwargs = {
|
||||
"api_key": sync_client.api_key,
|
||||
"base_url": str(sync_client.base_url),
|
||||
}
|
||||
if "openrouter" in str(sync_client.base_url).lower():
|
||||
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
elif "api.kimi.com" in str(sync_client.base_url).lower():
|
||||
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
||||
return AsyncOpenAI(**async_kwargs), model
|
||||
forced = _get_auxiliary_provider(task)
|
||||
if forced != "auto":
|
||||
return resolve_provider_client(forced, async_mode=True)
|
||||
return resolve_provider_client("auto", async_mode=True)
|
||||
|
||||
|
||||
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
@@ -559,7 +727,7 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""
|
||||
forced = _get_auxiliary_provider("vision")
|
||||
if forced != "auto":
|
||||
return _resolve_forced_provider(forced)
|
||||
return resolve_provider_client(forced)
|
||||
# Auto: try providers known to support multimodal first, then fall
|
||||
# back to the user's custom endpoint. Many local models (Qwen-VL,
|
||||
# LLaVA, Pixtral, etc.) support vision — skipping them entirely
|
||||
@@ -573,6 +741,21 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
return None, None
|
||||
|
||||
|
||||
def get_async_vision_auxiliary_client():
|
||||
"""Return (async_client, model_slug) for async vision consumers.
|
||||
|
||||
Properly handles Codex routing — unlike manually constructing
|
||||
AsyncOpenAI from a sync client, this preserves the Responses API
|
||||
adapter for Codex providers.
|
||||
|
||||
Returns (None, None) when no provider is available.
|
||||
"""
|
||||
sync_client, model = get_vision_auxiliary_client()
|
||||
if sync_client is None:
|
||||
return None, None
|
||||
return _to_async_client(sync_client, model)
|
||||
|
||||
|
||||
def get_auxiliary_extra_body() -> dict:
|
||||
"""Return extra_body kwargs for auxiliary API calls.
|
||||
|
||||
|
||||
@@ -127,20 +127,38 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to generate context summary with auxiliary model: {e}")
|
||||
|
||||
# 2. Fallback: try the user's main model endpoint
|
||||
fallback_client, fallback_model = self._get_fallback_client()
|
||||
if fallback_client is not None:
|
||||
# 2. Fallback: re-try via the centralized provider router.
|
||||
# This covers all configured providers (Codex OAuth, API-key
|
||||
# providers, etc.) without ad-hoc env var lookups.
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
fallback_providers = ["custom", "openrouter", "nous", "codex"]
|
||||
for fb_provider in fallback_providers:
|
||||
try:
|
||||
logger.info("Retrying context summary with main model (%s)", fallback_model)
|
||||
summary = self._call_summary_model(fallback_client, fallback_model, prompt)
|
||||
self.client = fallback_client
|
||||
self.summary_model = fallback_model
|
||||
fb_client, fb_model = resolve_provider_client(
|
||||
fb_provider, model=self.model)
|
||||
if fb_client is None:
|
||||
continue
|
||||
# Don't retry the same client that just failed
|
||||
if (self.client is not None
|
||||
and hasattr(fb_client, "base_url")
|
||||
and hasattr(self.client, "base_url")
|
||||
and str(fb_client.base_url) == str(self.client.base_url)):
|
||||
continue
|
||||
logger.info("Retrying context summary with fallback provider "
|
||||
"%s (%s)", fb_provider, fb_model)
|
||||
summary = self._call_summary_model(fb_client, fb_model, prompt)
|
||||
# Promote successful fallback for future compressions
|
||||
self.client = fb_client
|
||||
self.summary_model = fb_model
|
||||
return summary
|
||||
except Exception as fallback_err:
|
||||
logging.warning(f"Main model summary also failed: {fallback_err}")
|
||||
logging.warning("Fallback provider %s failed: %s",
|
||||
fb_provider, fallback_err)
|
||||
|
||||
# 3. All models failed — return None so the caller drops turns without a summary
|
||||
logging.warning("Context compression: no model available for summary. Middle turns will be dropped without summary.")
|
||||
# 3. All providers failed — return None so the caller drops turns
|
||||
# without a summary.
|
||||
logging.warning("Context compression: no provider available for "
|
||||
"summary. Middle turns will be dropped without summary.")
|
||||
return None
|
||||
|
||||
def _call_summary_model(self, client, model: str, prompt: str) -> str:
|
||||
@@ -170,35 +188,6 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
summary = "[CONTEXT SUMMARY]: " + summary
|
||||
return summary
|
||||
|
||||
def _get_fallback_client(self):
|
||||
"""Try to build a fallback client from the main model's endpoint config.
|
||||
|
||||
When the primary auxiliary client fails (e.g. stale OpenRouter key), this
|
||||
creates a client using the user's active custom endpoint (OPENAI_BASE_URL)
|
||||
so compression can still produce a real summary instead of a static string.
|
||||
|
||||
Returns (client, model) or (None, None).
|
||||
"""
|
||||
custom_base = os.getenv("OPENAI_BASE_URL")
|
||||
custom_key = os.getenv("OPENAI_API_KEY")
|
||||
if not custom_base or not custom_key:
|
||||
return None, None
|
||||
|
||||
# Don't fallback to the same provider that just failed
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
if custom_base.rstrip("/") == OPENROUTER_BASE_URL.rstrip("/"):
|
||||
return None, None
|
||||
|
||||
model = os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or self.model
|
||||
try:
|
||||
from openai import OpenAI as _OpenAI
|
||||
client = _OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
logger.debug("Built fallback auxiliary client: %s via %s", model, custom_base)
|
||||
return client, model
|
||||
except Exception as exc:
|
||||
logger.debug("Could not build fallback auxiliary client: %s", exc)
|
||||
return None, None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Tool-call / tool-result pair integrity helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -37,27 +37,15 @@ from pathlib import Path
|
||||
from typing import Any, Awaitable, Dict, Optional
|
||||
from urllib.parse import urlparse
|
||||
import httpx
|
||||
from openai import AsyncOpenAI
|
||||
from agent.auxiliary_client import get_vision_auxiliary_client
|
||||
from agent.auxiliary_client import get_async_vision_auxiliary_client
|
||||
from tools.debug_helpers import DebugSession
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Resolve vision auxiliary client at module level; build an async wrapper.
|
||||
_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
|
||||
_aux_async_client: AsyncOpenAI | None = None
|
||||
if _aux_sync_client is not None:
|
||||
_async_kwargs = {
|
||||
"api_key": _aux_sync_client.api_key,
|
||||
"base_url": str(_aux_sync_client.base_url),
|
||||
}
|
||||
if "openrouter" in str(_aux_sync_client.base_url).lower():
|
||||
_async_kwargs["default_headers"] = {
|
||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
}
|
||||
_aux_async_client = AsyncOpenAI(**_async_kwargs)
|
||||
# Resolve vision auxiliary client at module level.
|
||||
# Uses get_async_vision_auxiliary_client() which properly handles Codex
|
||||
# routing (Responses API adapter) instead of raw AsyncOpenAI construction.
|
||||
_aux_async_client, DEFAULT_VISION_MODEL = get_async_vision_auxiliary_client()
|
||||
|
||||
_debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
|
||||
|
||||
@@ -359,10 +347,28 @@ async def vision_analyze_tool(
|
||||
error_msg = f"Error analyzing image: {str(e)}"
|
||||
logger.error("%s", error_msg, exc_info=True)
|
||||
|
||||
# Detect vision capability errors — give the model a clear message
|
||||
# so it can inform the user instead of a cryptic API error.
|
||||
err_str = str(e).lower()
|
||||
if any(hint in err_str for hint in (
|
||||
"does not support", "not support image", "invalid_request",
|
||||
"content_policy", "image_url", "multimodal",
|
||||
"unrecognized request argument", "image input",
|
||||
)):
|
||||
analysis = (
|
||||
f"{model} does not support vision or our request was not "
|
||||
f"accepted by the server. Error: {e}"
|
||||
)
|
||||
else:
|
||||
analysis = (
|
||||
"There was a problem with the request and the image could not "
|
||||
f"be analyzed. Error: {e}"
|
||||
)
|
||||
|
||||
# Prepare error response
|
||||
result = {
|
||||
"success": False,
|
||||
"analysis": "There was a problem with the request and the image could not be analyzed."
|
||||
"analysis": analysis,
|
||||
}
|
||||
|
||||
debug_call_data["error"] = error_msg
|
||||
|
||||
Reference in New Issue
Block a user