diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 57c3c1186..4571520af 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -499,6 +499,188 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]: return None, None +# ── Centralized Provider Router ───────────────────────────────────────────── +# +# resolve_provider_client() is the single entry point for creating a properly +# configured client given a (provider, model) pair. It handles auth lookup, +# base URL resolution, provider-specific headers, and API format differences +# (Chat Completions vs Responses API for Codex). +# +# All auxiliary consumer code should go through this or the public helpers +# below — never look up auth env vars ad-hoc. + + +def _to_async_client(sync_client, model: str): + """Convert a sync client to its async counterpart, preserving Codex routing.""" + from openai import AsyncOpenAI + + if isinstance(sync_client, CodexAuxiliaryClient): + return AsyncCodexAuxiliaryClient(sync_client), model + + async_kwargs = { + "api_key": sync_client.api_key, + "base_url": str(sync_client.base_url), + } + base_lower = str(sync_client.base_url).lower() + if "openrouter" in base_lower: + async_kwargs["default_headers"] = dict(_OR_HEADERS) + elif "api.kimi.com" in base_lower: + async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"} + return AsyncOpenAI(**async_kwargs), model + + +def resolve_provider_client( + provider: str, + model: str = None, + async_mode: bool = False, +) -> Tuple[Optional[Any], Optional[str]]: + """Central router: given a provider name and optional model, return a + configured client with the correct auth, base URL, and API format. + + The returned client always exposes ``.chat.completions.create()`` — for + Codex/Responses API providers, an adapter handles the translation + transparently. + + Args: + provider: Provider identifier. One of: + "openrouter", "nous", "openai-codex" (or "codex"), + "zai", "kimi-coding", "minimax", "minimax-cn", "nous-api", + "custom" (OPENAI_BASE_URL + OPENAI_API_KEY), + "auto" (full auto-detection chain). + model: Model slug override. If None, uses the provider's default + auxiliary model. + async_mode: If True, return an async-compatible client. + + Returns: + (client, resolved_model) or (None, None) if auth is unavailable. + """ + # Normalise aliases + provider = (provider or "auto").strip().lower() + if provider == "codex": + provider = "openai-codex" + if provider == "main": + provider = "custom" + + # ── Auto: try all providers in priority order ──────────────────── + if provider == "auto": + client, resolved = _resolve_auto() + if client is None: + return None, None + final_model = model or resolved + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) + + # ── OpenRouter ─────────────────────────────────────────────────── + if provider == "openrouter": + client, default = _try_openrouter() + if client is None: + logger.warning("resolve_provider_client: openrouter requested " + "but OPENROUTER_API_KEY not set") + return None, None + final_model = model or default + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) + + # ── Nous Portal (OAuth) ────────────────────────────────────────── + if provider == "nous": + client, default = _try_nous() + if client is None: + logger.warning("resolve_provider_client: nous requested " + "but Nous Portal not configured (run: hermes login)") + return None, None + final_model = model or default + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) + + # ── OpenAI Codex (OAuth → Responses API) ───────────────────────── + if provider == "openai-codex": + client, default = _try_codex() + if client is None: + logger.warning("resolve_provider_client: openai-codex requested " + "but no Codex OAuth token found (run: hermes model)") + return None, None + final_model = model or default + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) + + # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ─────────── + if provider == "custom": + # Try custom first, then codex, then API-key providers + for try_fn in (_try_custom_endpoint, _try_codex, + _resolve_api_key_provider): + client, default = try_fn() + if client is not None: + final_model = model or default + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) + logger.warning("resolve_provider_client: custom/main requested " + "but no endpoint credentials found") + return None, None + + # ── API-key providers from PROVIDER_REGISTRY ───────────────────── + try: + from hermes_cli.auth import PROVIDER_REGISTRY, _resolve_kimi_base_url + except ImportError: + logger.debug("hermes_cli.auth not available for provider %s", provider) + return None, None + + pconfig = PROVIDER_REGISTRY.get(provider) + if pconfig is None: + logger.warning("resolve_provider_client: unknown provider %r", provider) + return None, None + + if pconfig.auth_type == "api_key": + # Find the first configured API key + api_key = "" + for env_var in pconfig.api_key_env_vars: + api_key = os.getenv(env_var, "").strip() + if api_key: + break + if not api_key: + logger.warning("resolve_provider_client: provider %s has no API " + "key configured (tried: %s)", + provider, ", ".join(pconfig.api_key_env_vars)) + return None, None + + # Resolve base URL (env override → provider-specific logic → default) + base_url_override = os.getenv(pconfig.base_url_env_var, "").strip() if pconfig.base_url_env_var else "" + if provider == "kimi-coding": + base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, base_url_override) + elif base_url_override: + base_url = base_url_override + else: + base_url = pconfig.inference_base_url + + default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "") + final_model = model or default_model + + # Provider-specific headers + headers = {} + if "api.kimi.com" in base_url.lower(): + headers["User-Agent"] = "KimiCLI/1.0" + + client = OpenAI(api_key=api_key, base_url=base_url, + **({"default_headers": headers} if headers else {})) + logger.debug("resolve_provider_client: %s (%s)", provider, final_model) + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) + + elif pconfig.auth_type in ("oauth_device_code", "oauth_external"): + # OAuth providers — route through their specific try functions + if provider == "nous": + return resolve_provider_client("nous", model, async_mode) + if provider == "openai-codex": + return resolve_provider_client("openai-codex", model, async_mode) + # nous-api is api_key type so it's handled above + logger.warning("resolve_provider_client: OAuth provider %s not " + "directly supported, try 'auto'", provider) + return None, None + + logger.warning("resolve_provider_client: unhandled auth_type %s for %s", + pconfig.auth_type, provider) + return None, None + + # ── Public API ────────────────────────────────────────────────────────────── def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]: @@ -513,8 +695,8 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona """ forced = _get_auxiliary_provider(task) if forced != "auto": - return _resolve_forced_provider(forced) - return _resolve_auto() + return resolve_provider_client(forced) + return resolve_provider_client("auto") def get_async_text_auxiliary_client(task: str = ""): @@ -524,24 +706,10 @@ def get_async_text_auxiliary_client(task: str = ""): (AsyncCodexAuxiliaryClient, model) which wraps the Responses API. Returns (None, None) when no provider is available. """ - from openai import AsyncOpenAI - - sync_client, model = get_text_auxiliary_client(task) - if sync_client is None: - return None, None - - if isinstance(sync_client, CodexAuxiliaryClient): - return AsyncCodexAuxiliaryClient(sync_client), model - - async_kwargs = { - "api_key": sync_client.api_key, - "base_url": str(sync_client.base_url), - } - if "openrouter" in str(sync_client.base_url).lower(): - async_kwargs["default_headers"] = dict(_OR_HEADERS) - elif "api.kimi.com" in str(sync_client.base_url).lower(): - async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"} - return AsyncOpenAI(**async_kwargs), model + forced = _get_auxiliary_provider(task) + if forced != "auto": + return resolve_provider_client(forced, async_mode=True) + return resolve_provider_client("auto", async_mode=True) def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: @@ -559,7 +727,7 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: """ forced = _get_auxiliary_provider("vision") if forced != "auto": - return _resolve_forced_provider(forced) + return resolve_provider_client(forced) # Auto: try providers known to support multimodal first, then fall # back to the user's custom endpoint. Many local models (Qwen-VL, # LLaVA, Pixtral, etc.) support vision — skipping them entirely @@ -573,6 +741,21 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: return None, None +def get_async_vision_auxiliary_client(): + """Return (async_client, model_slug) for async vision consumers. + + Properly handles Codex routing — unlike manually constructing + AsyncOpenAI from a sync client, this preserves the Responses API + adapter for Codex providers. + + Returns (None, None) when no provider is available. + """ + sync_client, model = get_vision_auxiliary_client() + if sync_client is None: + return None, None + return _to_async_client(sync_client, model) + + def get_auxiliary_extra_body() -> dict: """Return extra_body kwargs for auxiliary API calls. diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 01aa2af80..fae483fd8 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -127,20 +127,38 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" except Exception as e: logging.warning(f"Failed to generate context summary with auxiliary model: {e}") - # 2. Fallback: try the user's main model endpoint - fallback_client, fallback_model = self._get_fallback_client() - if fallback_client is not None: + # 2. Fallback: re-try via the centralized provider router. + # This covers all configured providers (Codex OAuth, API-key + # providers, etc.) without ad-hoc env var lookups. + from agent.auxiliary_client import resolve_provider_client + fallback_providers = ["custom", "openrouter", "nous", "codex"] + for fb_provider in fallback_providers: try: - logger.info("Retrying context summary with main model (%s)", fallback_model) - summary = self._call_summary_model(fallback_client, fallback_model, prompt) - self.client = fallback_client - self.summary_model = fallback_model + fb_client, fb_model = resolve_provider_client( + fb_provider, model=self.model) + if fb_client is None: + continue + # Don't retry the same client that just failed + if (self.client is not None + and hasattr(fb_client, "base_url") + and hasattr(self.client, "base_url") + and str(fb_client.base_url) == str(self.client.base_url)): + continue + logger.info("Retrying context summary with fallback provider " + "%s (%s)", fb_provider, fb_model) + summary = self._call_summary_model(fb_client, fb_model, prompt) + # Promote successful fallback for future compressions + self.client = fb_client + self.summary_model = fb_model return summary except Exception as fallback_err: - logging.warning(f"Main model summary also failed: {fallback_err}") + logging.warning("Fallback provider %s failed: %s", + fb_provider, fallback_err) - # 3. All models failed — return None so the caller drops turns without a summary - logging.warning("Context compression: no model available for summary. Middle turns will be dropped without summary.") + # 3. All providers failed — return None so the caller drops turns + # without a summary. + logging.warning("Context compression: no provider available for " + "summary. Middle turns will be dropped without summary.") return None def _call_summary_model(self, client, model: str, prompt: str) -> str: @@ -170,35 +188,6 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" summary = "[CONTEXT SUMMARY]: " + summary return summary - def _get_fallback_client(self): - """Try to build a fallback client from the main model's endpoint config. - - When the primary auxiliary client fails (e.g. stale OpenRouter key), this - creates a client using the user's active custom endpoint (OPENAI_BASE_URL) - so compression can still produce a real summary instead of a static string. - - Returns (client, model) or (None, None). - """ - custom_base = os.getenv("OPENAI_BASE_URL") - custom_key = os.getenv("OPENAI_API_KEY") - if not custom_base or not custom_key: - return None, None - - # Don't fallback to the same provider that just failed - from hermes_constants import OPENROUTER_BASE_URL - if custom_base.rstrip("/") == OPENROUTER_BASE_URL.rstrip("/"): - return None, None - - model = os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or self.model - try: - from openai import OpenAI as _OpenAI - client = _OpenAI(api_key=custom_key, base_url=custom_base) - logger.debug("Built fallback auxiliary client: %s via %s", model, custom_base) - return client, model - except Exception as exc: - logger.debug("Could not build fallback auxiliary client: %s", exc) - return None, None - # ------------------------------------------------------------------ # Tool-call / tool-result pair integrity helpers # ------------------------------------------------------------------ diff --git a/tools/vision_tools.py b/tools/vision_tools.py index bfde51ec5..ee89b58a4 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -37,27 +37,15 @@ from pathlib import Path from typing import Any, Awaitable, Dict, Optional from urllib.parse import urlparse import httpx -from openai import AsyncOpenAI -from agent.auxiliary_client import get_vision_auxiliary_client +from agent.auxiliary_client import get_async_vision_auxiliary_client from tools.debug_helpers import DebugSession logger = logging.getLogger(__name__) -# Resolve vision auxiliary client at module level; build an async wrapper. -_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client() -_aux_async_client: AsyncOpenAI | None = None -if _aux_sync_client is not None: - _async_kwargs = { - "api_key": _aux_sync_client.api_key, - "base_url": str(_aux_sync_client.base_url), - } - if "openrouter" in str(_aux_sync_client.base_url).lower(): - _async_kwargs["default_headers"] = { - "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", - "X-OpenRouter-Title": "Hermes Agent", - "X-OpenRouter-Categories": "productivity,cli-agent", - } - _aux_async_client = AsyncOpenAI(**_async_kwargs) +# Resolve vision auxiliary client at module level. +# Uses get_async_vision_auxiliary_client() which properly handles Codex +# routing (Responses API adapter) instead of raw AsyncOpenAI construction. +_aux_async_client, DEFAULT_VISION_MODEL = get_async_vision_auxiliary_client() _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG") @@ -359,10 +347,28 @@ async def vision_analyze_tool( error_msg = f"Error analyzing image: {str(e)}" logger.error("%s", error_msg, exc_info=True) + # Detect vision capability errors — give the model a clear message + # so it can inform the user instead of a cryptic API error. + err_str = str(e).lower() + if any(hint in err_str for hint in ( + "does not support", "not support image", "invalid_request", + "content_policy", "image_url", "multimodal", + "unrecognized request argument", "image input", + )): + analysis = ( + f"{model} does not support vision or our request was not " + f"accepted by the server. Error: {e}" + ) + else: + analysis = ( + "There was a problem with the request and the image could not " + f"be analyzed. Error: {e}" + ) + # Prepare error response result = { "success": False, - "analysis": "There was a problem with the request and the image could not be analyzed." + "analysis": analysis, } debug_call_data["error"] = error_msg