fix: infer provider from base URL for models.dev context length lookup

Custom endpoint users (DashScope/Alibaba, Z.AI, Kimi, DeepSeek, etc.)
get wrong context lengths because their provider resolves as "openrouter"
or "custom", skipping the models.dev lookup entirely. For example,
qwen3.5-plus on DashScope falls to the generic "qwen" hardcoded default
(131K) instead of the correct 1M.

Add _infer_provider_from_url() that maps known API hostnames to their
models.dev provider IDs. When the explicit provider is generic
(openrouter/custom/empty), infer from the base URL before the models.dev
lookup. This resolves context lengths correctly for DashScope, Z.AI,
Kimi, MiniMax, DeepSeek, and Nous endpoints without requiring users to
manually set context_length in config.

Also refactors _is_known_provider_base_url() to use the same URL mapping,
removing the duplicated hostname list.
This commit is contained in:
Test
2026-03-20 11:57:24 -07:00
parent 76bc27199f
commit 900e848522

View File

@@ -151,22 +151,41 @@ def _is_custom_endpoint(base_url: str) -> bool:
return bool(normalized) and not _is_openrouter_base_url(normalized)
def _is_known_provider_base_url(base_url: str) -> bool:
_URL_TO_PROVIDER: Dict[str, str] = {
"api.openai.com": "openai",
"chatgpt.com": "openai",
"api.anthropic.com": "anthropic",
"api.z.ai": "zai",
"api.moonshot.ai": "kimi-coding",
"api.kimi.com": "kimi-coding",
"api.minimax": "minimax",
"dashscope.aliyuncs.com": "alibaba",
"openrouter.ai": "openrouter",
"inference-api.nousresearch.com": "nous",
"api.deepseek.com": "deepseek",
}
def _infer_provider_from_url(base_url: str) -> Optional[str]:
"""Infer the models.dev provider name from a base URL.
This allows context length resolution via models.dev for custom endpoints
like DashScope (Alibaba), Z.AI, Kimi, etc. without requiring the user to
explicitly set the provider name in config.
"""
normalized = _normalize_base_url(base_url)
if not normalized:
return False
return None
parsed = urlparse(normalized if "://" in normalized else f"https://{normalized}")
host = parsed.netloc.lower() or parsed.path.lower()
known_hosts = (
"api.openai.com",
"chatgpt.com",
"api.anthropic.com",
"api.z.ai",
"api.moonshot.ai",
"api.kimi.com",
"api.minimax",
)
return any(known_host in host for known_host in known_hosts)
for url_part, provider in _URL_TO_PROVIDER.items():
if url_part in host:
return provider
return None
def _is_known_provider_base_url(base_url: str) -> bool:
return _infer_provider_from_url(base_url) is not None
def is_local_endpoint(base_url: str) -> bool:
@@ -808,13 +827,21 @@ def get_model_context_length(
# These are provider-specific and take priority over the generic OR cache,
# since the same model can have different context limits per provider
# (e.g. claude-opus-4.6 is 1M on Anthropic but 128K on GitHub Copilot).
if provider == "nous":
# If provider is generic (openrouter/custom/empty), try to infer from URL.
effective_provider = provider
if not effective_provider or effective_provider in ("openrouter", "custom"):
if base_url:
inferred = _infer_provider_from_url(base_url)
if inferred:
effective_provider = inferred
if effective_provider == "nous":
ctx = _resolve_nous_context_length(model)
if ctx:
return ctx
if provider:
if effective_provider:
from agent.models_dev import lookup_models_dev_context
ctx = lookup_models_dev_context(provider, model)
ctx = lookup_models_dev_context(effective_provider, model)
if ctx:
return ctx