Vision auto-mode previously only tried OpenRouter, Nous, and Codex for multimodal — deliberately skipping custom endpoints with the assumption they 'may not handle vision input.' This caused silent failures for users running local multimodal models (Qwen-VL, LLaVA, Pixtral, etc.) without any cloud API keys. Now custom endpoints are tried as a last resort in auto mode. If the model doesn't support vision, the API call fails gracefully — but users with local vision models no longer need to manually set auxiliary.vision.provider: main in config.yaml. Reported by @Spadav and @kotyKD.
601 lines
23 KiB
Python
601 lines
23 KiB
Python
"""Shared auxiliary OpenAI client for cheap/fast side tasks.
|
|
|
|
Provides a single resolution chain so every consumer (context compression,
|
|
session search, web extraction, vision analysis, browser vision) picks up
|
|
the best available backend without duplicating fallback logic.
|
|
|
|
Resolution order for text tasks (auto mode):
|
|
1. OpenRouter (OPENROUTER_API_KEY)
|
|
2. Nous Portal (~/.hermes/auth.json active provider)
|
|
3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
|
|
4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
|
|
wrapped to look like a chat.completions client)
|
|
5. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
|
|
— checked via PROVIDER_REGISTRY entries with auth_type='api_key'
|
|
6. None
|
|
|
|
Resolution order for vision/multimodal tasks (auto mode):
|
|
1. OpenRouter
|
|
2. Nous Portal
|
|
3. None (steps 3-5 are skipped — they may not support multimodal)
|
|
|
|
Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER,
|
|
CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task:
|
|
"openrouter", "nous", "codex", or "main" (= steps 3-5).
|
|
Default "auto" follows the chains above.
|
|
|
|
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
|
|
AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
|
|
than the provider's default.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
from pathlib import Path
|
|
from types import SimpleNamespace
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
from openai import OpenAI
|
|
|
|
from hermes_constants import OPENROUTER_BASE_URL
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
|
|
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
|
"zai": "glm-4.5-flash",
|
|
"kimi-coding": "kimi-k2-turbo-preview",
|
|
"minimax": "MiniMax-M2.5-highspeed",
|
|
"minimax-cn": "MiniMax-M2.5-highspeed",
|
|
}
|
|
|
|
# OpenRouter app attribution headers
|
|
_OR_HEADERS = {
|
|
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
|
"X-OpenRouter-Title": "Hermes Agent",
|
|
"X-OpenRouter-Categories": "productivity,cli-agent",
|
|
}
|
|
|
|
# Nous Portal extra_body for product attribution.
|
|
# Callers should pass this as extra_body in chat.completions.create()
|
|
# when the auxiliary client is backed by Nous Portal.
|
|
NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent"]}
|
|
|
|
# Set at resolve time — True if the auxiliary client points to Nous Portal
|
|
auxiliary_is_nous: bool = False
|
|
|
|
# Default auxiliary models per provider
|
|
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
|
|
_NOUS_MODEL = "gemini-3-flash"
|
|
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
|
|
_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
|
|
|
|
# Codex fallback: uses the Responses API (the only endpoint the Codex
|
|
# OAuth token can access) with a fast model for auxiliary tasks.
|
|
_CODEX_AUX_MODEL = "gpt-5.3-codex"
|
|
_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
|
|
|
|
|
# ── Codex Responses → chat.completions adapter ─────────────────────────────
|
|
# All auxiliary consumers call client.chat.completions.create(**kwargs) and
|
|
# read response.choices[0].message.content. This adapter translates those
|
|
# calls to the Codex Responses API so callers don't need any changes.
|
|
|
|
|
|
def _convert_content_for_responses(content: Any) -> Any:
|
|
"""Convert chat.completions content to Responses API format.
|
|
|
|
chat.completions uses:
|
|
{"type": "text", "text": "..."}
|
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
|
|
|
|
Responses API uses:
|
|
{"type": "input_text", "text": "..."}
|
|
{"type": "input_image", "image_url": "data:image/png;base64,..."}
|
|
|
|
If content is a plain string, it's returned as-is (the Responses API
|
|
accepts strings directly for text-only messages).
|
|
"""
|
|
if isinstance(content, str):
|
|
return content
|
|
if not isinstance(content, list):
|
|
return str(content) if content else ""
|
|
|
|
converted: List[Dict[str, Any]] = []
|
|
for part in content:
|
|
if not isinstance(part, dict):
|
|
continue
|
|
ptype = part.get("type", "")
|
|
if ptype == "text":
|
|
converted.append({"type": "input_text", "text": part.get("text", "")})
|
|
elif ptype == "image_url":
|
|
# chat.completions nests the URL: {"image_url": {"url": "..."}}
|
|
image_data = part.get("image_url", {})
|
|
url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
|
|
entry: Dict[str, Any] = {"type": "input_image", "image_url": url}
|
|
# Preserve detail if specified
|
|
detail = image_data.get("detail") if isinstance(image_data, dict) else None
|
|
if detail:
|
|
entry["detail"] = detail
|
|
converted.append(entry)
|
|
elif ptype in ("input_text", "input_image"):
|
|
# Already in Responses format — pass through
|
|
converted.append(part)
|
|
else:
|
|
# Unknown content type — try to preserve as text
|
|
text = part.get("text", "")
|
|
if text:
|
|
converted.append({"type": "input_text", "text": text})
|
|
|
|
return converted or ""
|
|
|
|
|
|
class _CodexCompletionsAdapter:
|
|
"""Drop-in shim that accepts chat.completions.create() kwargs and
|
|
routes them through the Codex Responses streaming API."""
|
|
|
|
def __init__(self, real_client: OpenAI, model: str):
|
|
self._client = real_client
|
|
self._model = model
|
|
|
|
def create(self, **kwargs) -> Any:
|
|
messages = kwargs.get("messages", [])
|
|
model = kwargs.get("model", self._model)
|
|
temperature = kwargs.get("temperature")
|
|
|
|
# Separate system/instructions from conversation messages.
|
|
# Convert chat.completions multimodal content blocks to Responses
|
|
# API format (input_text / input_image instead of text / image_url).
|
|
instructions = "You are a helpful assistant."
|
|
input_msgs: List[Dict[str, Any]] = []
|
|
for msg in messages:
|
|
role = msg.get("role", "user")
|
|
content = msg.get("content") or ""
|
|
if role == "system":
|
|
instructions = content if isinstance(content, str) else str(content)
|
|
else:
|
|
input_msgs.append({
|
|
"role": role,
|
|
"content": _convert_content_for_responses(content),
|
|
})
|
|
|
|
resp_kwargs: Dict[str, Any] = {
|
|
"model": model,
|
|
"instructions": instructions,
|
|
"input": input_msgs or [{"role": "user", "content": ""}],
|
|
"store": False,
|
|
}
|
|
|
|
# Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
|
|
# support max_output_tokens or temperature — omit to avoid 400 errors.
|
|
|
|
# Tools support for flush_memories and similar callers
|
|
tools = kwargs.get("tools")
|
|
if tools:
|
|
converted = []
|
|
for t in tools:
|
|
fn = t.get("function", {}) if isinstance(t, dict) else {}
|
|
name = fn.get("name")
|
|
if not name:
|
|
continue
|
|
converted.append({
|
|
"type": "function",
|
|
"name": name,
|
|
"description": fn.get("description", ""),
|
|
"parameters": fn.get("parameters", {}),
|
|
})
|
|
if converted:
|
|
resp_kwargs["tools"] = converted
|
|
|
|
# Stream and collect the response
|
|
text_parts: List[str] = []
|
|
tool_calls_raw: List[Any] = []
|
|
usage = None
|
|
|
|
try:
|
|
with self._client.responses.stream(**resp_kwargs) as stream:
|
|
for _event in stream:
|
|
pass
|
|
final = stream.get_final_response()
|
|
|
|
# Extract text and tool calls from the Responses output
|
|
for item in getattr(final, "output", []):
|
|
item_type = getattr(item, "type", None)
|
|
if item_type == "message":
|
|
for part in getattr(item, "content", []):
|
|
ptype = getattr(part, "type", None)
|
|
if ptype in ("output_text", "text"):
|
|
text_parts.append(getattr(part, "text", ""))
|
|
elif item_type == "function_call":
|
|
tool_calls_raw.append(SimpleNamespace(
|
|
id=getattr(item, "call_id", ""),
|
|
type="function",
|
|
function=SimpleNamespace(
|
|
name=getattr(item, "name", ""),
|
|
arguments=getattr(item, "arguments", "{}"),
|
|
),
|
|
))
|
|
|
|
resp_usage = getattr(final, "usage", None)
|
|
if resp_usage:
|
|
usage = SimpleNamespace(
|
|
prompt_tokens=getattr(resp_usage, "input_tokens", 0),
|
|
completion_tokens=getattr(resp_usage, "output_tokens", 0),
|
|
total_tokens=getattr(resp_usage, "total_tokens", 0),
|
|
)
|
|
except Exception as exc:
|
|
logger.debug("Codex auxiliary Responses API call failed: %s", exc)
|
|
raise
|
|
|
|
content = "".join(text_parts).strip() or None
|
|
|
|
# Build a response that looks like chat.completions
|
|
message = SimpleNamespace(
|
|
role="assistant",
|
|
content=content,
|
|
tool_calls=tool_calls_raw or None,
|
|
)
|
|
choice = SimpleNamespace(
|
|
index=0,
|
|
message=message,
|
|
finish_reason="stop" if not tool_calls_raw else "tool_calls",
|
|
)
|
|
return SimpleNamespace(
|
|
choices=[choice],
|
|
model=model,
|
|
usage=usage,
|
|
)
|
|
|
|
|
|
class _CodexChatShim:
|
|
"""Wraps the adapter to provide client.chat.completions.create()."""
|
|
|
|
def __init__(self, adapter: _CodexCompletionsAdapter):
|
|
self.completions = adapter
|
|
|
|
|
|
class CodexAuxiliaryClient:
|
|
"""OpenAI-client-compatible wrapper that routes through Codex Responses API.
|
|
|
|
Consumers can call client.chat.completions.create(**kwargs) as normal.
|
|
Also exposes .api_key and .base_url for introspection by async wrappers.
|
|
"""
|
|
|
|
def __init__(self, real_client: OpenAI, model: str):
|
|
self._real_client = real_client
|
|
adapter = _CodexCompletionsAdapter(real_client, model)
|
|
self.chat = _CodexChatShim(adapter)
|
|
self.api_key = real_client.api_key
|
|
self.base_url = real_client.base_url
|
|
|
|
def close(self):
|
|
self._real_client.close()
|
|
|
|
|
|
class _AsyncCodexCompletionsAdapter:
|
|
"""Async version of the Codex Responses adapter.
|
|
|
|
Wraps the sync adapter via asyncio.to_thread() so async consumers
|
|
(web_tools, session_search) can await it as normal.
|
|
"""
|
|
|
|
def __init__(self, sync_adapter: _CodexCompletionsAdapter):
|
|
self._sync = sync_adapter
|
|
|
|
async def create(self, **kwargs) -> Any:
|
|
import asyncio
|
|
return await asyncio.to_thread(self._sync.create, **kwargs)
|
|
|
|
|
|
class _AsyncCodexChatShim:
|
|
def __init__(self, adapter: _AsyncCodexCompletionsAdapter):
|
|
self.completions = adapter
|
|
|
|
|
|
class AsyncCodexAuxiliaryClient:
|
|
"""Async-compatible wrapper matching AsyncOpenAI.chat.completions.create()."""
|
|
|
|
def __init__(self, sync_wrapper: "CodexAuxiliaryClient"):
|
|
sync_adapter = sync_wrapper.chat.completions
|
|
async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter)
|
|
self.chat = _AsyncCodexChatShim(async_adapter)
|
|
self.api_key = sync_wrapper.api_key
|
|
self.base_url = sync_wrapper.base_url
|
|
|
|
|
|
def _read_nous_auth() -> Optional[dict]:
|
|
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
|
|
|
|
Returns the provider state dict if Nous is active with tokens,
|
|
otherwise None.
|
|
"""
|
|
try:
|
|
if not _AUTH_JSON_PATH.is_file():
|
|
return None
|
|
data = json.loads(_AUTH_JSON_PATH.read_text())
|
|
if data.get("active_provider") != "nous":
|
|
return None
|
|
provider = data.get("providers", {}).get("nous", {})
|
|
# Must have at least an access_token or agent_key
|
|
if not provider.get("agent_key") and not provider.get("access_token"):
|
|
return None
|
|
return provider
|
|
except Exception as exc:
|
|
logger.debug("Could not read Nous auth: %s", exc)
|
|
return None
|
|
|
|
|
|
def _nous_api_key(provider: dict) -> str:
|
|
"""Extract the best API key from a Nous provider state dict."""
|
|
return provider.get("agent_key") or provider.get("access_token", "")
|
|
|
|
|
|
def _nous_base_url() -> str:
|
|
"""Resolve the Nous inference base URL from env or default."""
|
|
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
|
|
|
|
|
|
def _read_codex_access_token() -> Optional[str]:
|
|
"""Read a valid Codex OAuth access token from Hermes auth store (~/.hermes/auth.json)."""
|
|
try:
|
|
from hermes_cli.auth import _read_codex_tokens
|
|
data = _read_codex_tokens()
|
|
tokens = data.get("tokens", {})
|
|
access_token = tokens.get("access_token")
|
|
if isinstance(access_token, str) and access_token.strip():
|
|
return access_token.strip()
|
|
return None
|
|
except Exception as exc:
|
|
logger.debug("Could not read Codex auth for auxiliary client: %s", exc)
|
|
return None
|
|
|
|
|
|
def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|
"""Try each API-key provider in PROVIDER_REGISTRY order.
|
|
|
|
Returns (client, model) for the first provider whose env var is set,
|
|
or (None, None) if none are configured.
|
|
"""
|
|
try:
|
|
from hermes_cli.auth import PROVIDER_REGISTRY
|
|
except ImportError:
|
|
logger.debug("Could not import PROVIDER_REGISTRY for API-key fallback")
|
|
return None, None
|
|
|
|
for provider_id, pconfig in PROVIDER_REGISTRY.items():
|
|
if pconfig.auth_type != "api_key":
|
|
continue
|
|
# Check if any of the provider's env vars are set
|
|
api_key = ""
|
|
for env_var in pconfig.api_key_env_vars:
|
|
val = os.getenv(env_var, "").strip()
|
|
if val:
|
|
api_key = val
|
|
break
|
|
if not api_key:
|
|
continue
|
|
# Resolve base URL (with optional env-var override)
|
|
# Kimi Code keys (sk-kimi-) need api.kimi.com/coding/v1
|
|
env_url = ""
|
|
if pconfig.base_url_env_var:
|
|
env_url = os.getenv(pconfig.base_url_env_var, "").strip()
|
|
if env_url:
|
|
base_url = env_url.rstrip("/")
|
|
elif provider_id == "kimi-coding" and api_key.startswith("sk-kimi-"):
|
|
base_url = "https://api.kimi.com/coding/v1"
|
|
else:
|
|
base_url = pconfig.inference_base_url
|
|
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
|
|
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
|
|
extra = {}
|
|
if "api.kimi.com" in base_url.lower():
|
|
extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
|
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
|
|
|
|
return None, None
|
|
|
|
|
|
# ── Provider resolution helpers ─────────────────────────────────────────────
|
|
|
|
def _get_auxiliary_provider(task: str = "") -> str:
|
|
"""Read the provider override for a specific auxiliary task.
|
|
|
|
Checks AUXILIARY_{TASK}_PROVIDER first (e.g. AUXILIARY_VISION_PROVIDER),
|
|
then CONTEXT_{TASK}_PROVIDER (for the compression section's summary_provider),
|
|
then falls back to "auto". Returns one of: "auto", "openrouter", "nous", "main".
|
|
"""
|
|
if task:
|
|
for prefix in ("AUXILIARY_", "CONTEXT_"):
|
|
val = os.getenv(f"{prefix}{task.upper()}_PROVIDER", "").strip().lower()
|
|
if val and val != "auto":
|
|
return val
|
|
return "auto"
|
|
|
|
|
|
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|
or_key = os.getenv("OPENROUTER_API_KEY")
|
|
if not or_key:
|
|
return None, None
|
|
logger.debug("Auxiliary client: OpenRouter")
|
|
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
|
|
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
|
|
|
|
|
def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|
nous = _read_nous_auth()
|
|
if not nous:
|
|
return None, None
|
|
global auxiliary_is_nous
|
|
auxiliary_is_nous = True
|
|
logger.debug("Auxiliary client: Nous Portal")
|
|
return (
|
|
OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
|
|
_NOUS_MODEL,
|
|
)
|
|
|
|
|
|
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|
custom_base = os.getenv("OPENAI_BASE_URL")
|
|
custom_key = os.getenv("OPENAI_API_KEY")
|
|
if not custom_base or not custom_key:
|
|
return None, None
|
|
model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
|
|
logger.debug("Auxiliary client: custom endpoint (%s)", model)
|
|
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
|
|
|
|
|
def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
|
codex_token = _read_codex_access_token()
|
|
if not codex_token:
|
|
return None, None
|
|
logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
|
|
real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
|
|
return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
|
|
|
|
|
|
def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[str]]:
|
|
"""Resolve a specific forced provider. Returns (None, None) if creds missing."""
|
|
if forced == "openrouter":
|
|
client, model = _try_openrouter()
|
|
if client is None:
|
|
logger.warning("auxiliary.provider=openrouter but OPENROUTER_API_KEY not set")
|
|
return client, model
|
|
|
|
if forced == "nous":
|
|
client, model = _try_nous()
|
|
if client is None:
|
|
logger.warning("auxiliary.provider=nous but Nous Portal not configured (run: hermes login)")
|
|
return client, model
|
|
|
|
if forced == "codex":
|
|
client, model = _try_codex()
|
|
if client is None:
|
|
logger.warning("auxiliary.provider=codex but no Codex OAuth token found (run: hermes model)")
|
|
return client, model
|
|
|
|
if forced == "main":
|
|
# "main" = skip OpenRouter/Nous, use the main chat model's credentials.
|
|
for try_fn in (_try_custom_endpoint, _try_codex, _resolve_api_key_provider):
|
|
client, model = try_fn()
|
|
if client is not None:
|
|
return client, model
|
|
logger.warning("auxiliary.provider=main but no main endpoint credentials found")
|
|
return None, None
|
|
|
|
# Unknown provider name — fall through to auto
|
|
logger.warning("Unknown auxiliary.provider=%r, falling back to auto", forced)
|
|
return None, None
|
|
|
|
|
|
def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|
"""Full auto-detection chain: OpenRouter → Nous → custom → Codex → API-key → None."""
|
|
for try_fn in (_try_openrouter, _try_nous, _try_custom_endpoint,
|
|
_try_codex, _resolve_api_key_provider):
|
|
client, model = try_fn()
|
|
if client is not None:
|
|
return client, model
|
|
logger.debug("Auxiliary client: none available")
|
|
return None, None
|
|
|
|
|
|
# ── Public API ──────────────────────────────────────────────────────────────
|
|
|
|
def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
|
|
"""Return (client, default_model_slug) for text-only auxiliary tasks.
|
|
|
|
Args:
|
|
task: Optional task name ("compression", "web_extract") to check
|
|
for a task-specific provider override.
|
|
|
|
Callers may override the returned model with a per-task env var
|
|
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
|
|
"""
|
|
forced = _get_auxiliary_provider(task)
|
|
if forced != "auto":
|
|
return _resolve_forced_provider(forced)
|
|
return _resolve_auto()
|
|
|
|
|
|
def get_async_text_auxiliary_client(task: str = ""):
|
|
"""Return (async_client, model_slug) for async consumers.
|
|
|
|
For standard providers returns (AsyncOpenAI, model). For Codex returns
|
|
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
|
Returns (None, None) when no provider is available.
|
|
"""
|
|
from openai import AsyncOpenAI
|
|
|
|
sync_client, model = get_text_auxiliary_client(task)
|
|
if sync_client is None:
|
|
return None, None
|
|
|
|
if isinstance(sync_client, CodexAuxiliaryClient):
|
|
return AsyncCodexAuxiliaryClient(sync_client), model
|
|
|
|
async_kwargs = {
|
|
"api_key": sync_client.api_key,
|
|
"base_url": str(sync_client.base_url),
|
|
}
|
|
if "openrouter" in str(sync_client.base_url).lower():
|
|
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
|
elif "api.kimi.com" in str(sync_client.base_url).lower():
|
|
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
|
return AsyncOpenAI(**async_kwargs), model
|
|
|
|
|
|
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|
"""Return (client, default_model_slug) for vision/multimodal auxiliary tasks.
|
|
|
|
Checks AUXILIARY_VISION_PROVIDER for a forced provider, otherwise
|
|
auto-detects. Callers may override the returned model with
|
|
AUXILIARY_VISION_MODEL.
|
|
|
|
In auto mode, only providers known to support multimodal are tried:
|
|
OpenRouter, Nous Portal, and Codex OAuth (gpt-5.3-codex supports
|
|
vision via the Responses API). Custom endpoints and API-key
|
|
providers are skipped — they may not handle vision input. To use
|
|
them, set AUXILIARY_VISION_PROVIDER explicitly.
|
|
"""
|
|
forced = _get_auxiliary_provider("vision")
|
|
if forced != "auto":
|
|
return _resolve_forced_provider(forced)
|
|
# Auto: try providers known to support multimodal first, then fall
|
|
# back to the user's custom endpoint. Many local models (Qwen-VL,
|
|
# LLaVA, Pixtral, etc.) support vision — skipping them entirely
|
|
# caused silent failures for local-only users.
|
|
for try_fn in (_try_openrouter, _try_nous, _try_codex,
|
|
_try_custom_endpoint):
|
|
client, model = try_fn()
|
|
if client is not None:
|
|
return client, model
|
|
logger.debug("Auxiliary vision client: none available")
|
|
return None, None
|
|
|
|
|
|
def get_auxiliary_extra_body() -> dict:
|
|
"""Return extra_body kwargs for auxiliary API calls.
|
|
|
|
Includes Nous Portal product tags when the auxiliary client is backed
|
|
by Nous Portal. Returns empty dict otherwise.
|
|
"""
|
|
return dict(NOUS_EXTRA_BODY) if auxiliary_is_nous else {}
|
|
|
|
|
|
def auxiliary_max_tokens_param(value: int) -> dict:
|
|
"""Return the correct max tokens kwarg for the auxiliary client's provider.
|
|
|
|
OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
|
|
models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
|
|
The Codex adapter translates max_tokens internally, so we use max_tokens
|
|
for it as well.
|
|
"""
|
|
custom_base = os.getenv("OPENAI_BASE_URL", "")
|
|
or_key = os.getenv("OPENROUTER_API_KEY")
|
|
# Only use max_completion_tokens for direct OpenAI custom endpoints
|
|
if (not or_key
|
|
and _read_nous_auth() is None
|
|
and "api.openai.com" in custom_base.lower()):
|
|
return {"max_completion_tokens": value}
|
|
return {"max_tokens": value}
|