fix: provider/model resolution — salvage 4 PRs + MiniMax aux URL fix (#5983)

Salvaged fixes from community PRs:

- fix(model_switch): _read_auth_store → _load_auth_store + fix auth store
  key lookup (was checking top-level dict instead of store['providers']).
  OAuth providers now correctly detected in /model picker.
  Cherry-picked from PR #5911 by Xule Lin (linxule).

- fix(ollama): pass num_ctx to override 2048 default context window.
  Ollama defaults to 2048 context regardless of model capabilities. Now
  auto-detects from /api/show metadata and injects num_ctx into every
  request. Config override via model.ollama_num_ctx. Fixes #2708.
  Cherry-picked from PR #5929 by kshitij (kshitijk4poor).

- fix(aux): normalize provider aliases for vision/auxiliary routing.
  Adds _normalize_aux_provider() with 17 aliases (google→gemini,
  claude→anthropic, glm→zai, etc). Fixes vision routing failure when
  provider is set to 'google' instead of 'gemini'.
  Cherry-picked from PR #5793 by e11i (Elizabeth1979).

- fix(aux): rewrite MiniMax /anthropic base URLs to /v1 for OpenAI SDK.
  MiniMax's inference_base_url ends in /anthropic (Anthropic Messages API),
  but auxiliary client uses OpenAI SDK which appends /chat/completions →
  404 at /anthropic/chat/completions. Generic _to_openai_base_url() helper
  rewrites terminal /anthropic to /v1 for OpenAI-compatible endpoint.
  Inspired by PR #5786 by Lempkey.

Added debug logging to silent exception blocks across all fixes.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
This commit is contained in:
Teknium
2026-04-07 22:23:28 -07:00
committed by GitHub
parent 8d7a98d2ff
commit 5c03f2e7cc
7 changed files with 378 additions and 31 deletions

View File

@@ -59,6 +59,41 @@ from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
_PROVIDER_ALIASES = {
"google": "gemini",
"google-gemini": "gemini",
"google-ai-studio": "gemini",
"glm": "zai",
"z-ai": "zai",
"z.ai": "zai",
"zhipu": "zai",
"kimi": "kimi-coding",
"moonshot": "kimi-coding",
"minimax-china": "minimax-cn",
"minimax_cn": "minimax-cn",
"claude": "anthropic",
"claude-code": "anthropic",
}
def _normalize_aux_provider(provider: Optional[str], *, for_vision: bool = False) -> str:
normalized = (provider or "auto").strip().lower()
if normalized.startswith("custom:"):
suffix = normalized.split(":", 1)[1].strip()
if not suffix:
return "custom"
normalized = suffix if not for_vision else "custom"
if normalized == "codex":
return "openai-codex"
if normalized == "main":
# Resolve to the user's actual main provider so named custom providers
# and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly.
main_prov = _read_main_provider()
if main_prov and main_prov not in ("auto", "main", ""):
return main_prov
return "custom"
return _PROVIDER_ALIASES.get(normalized, normalized)
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"gemini": "gemini-3-flash-preview",
@@ -106,6 +141,23 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex"
_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
def _to_openai_base_url(base_url: str) -> str:
"""Normalize an Anthropic-style base URL to OpenAI-compatible format.
Some providers (MiniMax, MiniMax-CN) expose an ``/anthropic`` endpoint for
the Anthropic Messages API and a separate ``/v1`` endpoint for OpenAI chat
completions. The auxiliary client uses the OpenAI SDK, so it must hit the
``/v1`` surface. Passing the raw ``inference_base_url`` causes requests to
land on ``/anthropic/chat/completions`` — a 404.
"""
url = str(base_url or "").strip().rstrip("/")
if url.endswith("/anthropic"):
rewritten = url[: -len("/anthropic")] + "/v1"
logger.debug("Auxiliary client: rewrote base URL %s%s", url, rewritten)
return rewritten
return url
def _select_pool_entry(provider: str) -> Tuple[bool, Optional[Any]]:
"""Return (pool_exists_for_provider, selected_entry)."""
try:
@@ -635,7 +687,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
if not api_key:
continue
base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
base_url = _to_openai_base_url(
_pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
)
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
extra = {}
@@ -652,7 +706,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
if not api_key:
continue
base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
base_url = _to_openai_base_url(
str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
)
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
extra = {}
@@ -778,7 +834,7 @@ def _read_main_provider() -> str:
if isinstance(model_cfg, dict):
provider = model_cfg.get("provider", "")
if isinstance(provider, str) and provider.strip():
return provider.strip().lower()
return _normalize_aux_provider(provider)
except Exception:
pass
return ""
@@ -1140,17 +1196,7 @@ def resolve_provider_client(
(client, resolved_model) or (None, None) if auth is unavailable.
"""
# Normalise aliases
provider = (provider or "auto").strip().lower()
if provider == "codex":
provider = "openai-codex"
if provider == "main":
# Resolve to the user's actual main provider so named custom providers
# and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly.
main_prov = _read_main_provider()
if main_prov and main_prov not in ("auto", "main", ""):
provider = main_prov
else:
provider = "custom"
provider = _normalize_aux_provider(provider)
# ── Auto: try all providers in priority order ────────────────────
if provider == "auto":
@@ -1300,7 +1346,9 @@ def resolve_provider_client(
provider, ", ".join(tried_sources))
return None, None
base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
base_url = _to_openai_base_url(
str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
)
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
final_model = model or default_model
@@ -1384,17 +1432,7 @@ _VISION_AUTO_PROVIDER_ORDER = (
def _normalize_vision_provider(provider: Optional[str]) -> str:
provider = (provider or "auto").strip().lower()
if provider == "codex":
return "openai-codex"
if provider == "main":
# Resolve to actual main provider — named custom providers and
# non-aggregator providers need to pass through as their real name.
main_prov = _read_main_provider()
if main_prov and main_prov not in ("auto", "main", ""):
return main_prov
return "custom"
return provider
return _normalize_aux_provider(provider, for_vision=True)
def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:

View File

@@ -611,6 +611,59 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
return False
def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
"""Query an Ollama server for the model's context length.
Returns the model's maximum context from GGUF metadata via ``/api/show``,
or the explicit ``num_ctx`` from the Modelfile if set. Returns None if
the server is unreachable or not Ollama.
This is the value that should be passed as ``num_ctx`` in Ollama chat
requests to override the default 2048.
"""
import httpx
bare_model = _strip_provider_prefix(model)
server_url = base_url.rstrip("/")
if server_url.endswith("/v1"):
server_url = server_url[:-3]
try:
server_type = detect_local_server_type(base_url)
except Exception:
return None
if server_type != "ollama":
return None
try:
with httpx.Client(timeout=3.0) as client:
resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
if resp.status_code != 200:
return None
data = resp.json()
# Prefer explicit num_ctx from Modelfile parameters (user override)
params = data.get("parameters", "")
if "num_ctx" in params:
for line in params.split("\n"):
if "num_ctx" in line:
parts = line.strip().split()
if len(parts) >= 2:
try:
return int(parts[-1])
except ValueError:
pass
# Fall back to GGUF model_info context_length (training max)
model_info = data.get("model_info", {})
for key, value in model_info.items():
if "context_length" in key and isinstance(value, (int, float)):
return int(value)
except Exception:
pass
return None
def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
"""Query a local server for the model's context length."""
import httpx