feat: curate HF model picker with OpenRouter analogues (#3440)

Show only agentic models that map to OpenRouter defaults:

  Qwen/Qwen3.5-397B-A17B          ↔ qwen/qwen3.5-plus
  Qwen/Qwen3.5-35B-A3B            ↔ qwen/qwen3.5-35b-a3b
  deepseek-ai/DeepSeek-V3.2       ↔ deepseek/deepseek-chat
  moonshotai/Kimi-K2.5             ↔ moonshotai/kimi-k2.5
  MiniMaxAI/MiniMax-M2.5           ↔ minimax/minimax-m2.5
  zai-org/GLM-5                    ↔ z-ai/glm-5
  XiaomiMiMo/MiMo-V2-Flash         ↔ xiaomi/mimo-v2-pro
  moonshotai/Kimi-K2-Thinking      ↔ moonshotai/kimi-k2-thinking

Users can still pick any HF model via Enter custom model name.
This commit is contained in:
Teknium
2026-03-27 13:54:46 -07:00
committed by GitHub
parent e4e04c2005
commit ab09f6b568
4 changed files with 30 additions and 54 deletions

View File

@@ -115,23 +115,13 @@ DEFAULT_CONTEXT_LENGTHS = {
"kimi": 262144,
# Hugging Face Inference Providers — model IDs use org/name format
"Qwen/Qwen3.5-397B-A17B": 131072,
"Qwen/Qwen3-235B-A22B-Thinking-2507": 131072,
"Qwen/Qwen3-Coder-480B-A35B-Instruct": 131072,
"Qwen/Qwen3-Coder-Next": 131072,
"Qwen/Qwen3-Next-80B-A3B-Instruct": 131072,
"Qwen/Qwen3-Next-80B-A3B-Thinking": 131072,
"deepseek-ai/DeepSeek-R1-0528": 65536,
"Qwen/Qwen3.5-35B-A3B": 131072,
"deepseek-ai/DeepSeek-V3.2": 65536,
"moonshotai/Kimi-K2-Instruct": 262144,
"moonshotai/Kimi-K2-Instruct-0905": 262144,
"moonshotai/Kimi-K2.5": 262144,
"moonshotai/Kimi-K2-Thinking": 262144,
"MiniMaxAI/MiniMax-M2.5": 204800,
"MiniMaxAI/MiniMax-M2.1": 204800,
"XiaomiMiMo/MiMo-V2-Flash": 32768,
"zai-org/GLM-5": 202752,
"zai-org/GLM-4.7": 202752,
"zai-org/GLM-4.7-Flash": 202752,
}
_CONTEXT_LENGTH_KEYS = (

View File

@@ -1504,26 +1504,17 @@ _PROVIDER_MODELS = {
"google/gemini-3-pro-preview",
"google/gemini-3-flash-preview",
],
# Curated model list sourced from https://models.dev (huggingface provider)
# Curated HF model list — only agentic models that map to OpenRouter defaults.
# Format: HF model ID → OpenRouter equivalent noted in comment
"huggingface": [
"Qwen/Qwen3.5-397B-A17B",
"Qwen/Qwen3-235B-A22B-Thinking-2507",
"Qwen/Qwen3-Coder-480B-A35B-Instruct",
"Qwen/Qwen3-Coder-Next",
"Qwen/Qwen3-Next-80B-A3B-Instruct",
"Qwen/Qwen3-Next-80B-A3B-Thinking",
"deepseek-ai/DeepSeek-R1-0528",
"deepseek-ai/DeepSeek-V3.2",
"moonshotai/Kimi-K2-Instruct",
"moonshotai/Kimi-K2-Instruct-0905",
"moonshotai/Kimi-K2.5",
"moonshotai/Kimi-K2-Thinking",
"MiniMaxAI/MiniMax-M2.5",
"MiniMaxAI/MiniMax-M2.1",
"XiaomiMiMo/MiMo-V2-Flash",
"zai-org/GLM-5",
"zai-org/GLM-4.7",
"zai-org/GLM-4.7-Flash",
"Qwen/Qwen3.5-397B-A17B", # ↔ qwen/qwen3.5-plus
"Qwen/Qwen3.5-35B-A3B", # ↔ qwen/qwen3.5-35b-a3b
"deepseek-ai/DeepSeek-V3.2", # ↔ deepseek/deepseek-chat
"moonshotai/Kimi-K2.5", # ↔ moonshotai/kimi-k2.5
"MiniMaxAI/MiniMax-M2.5", # ↔ minimax/minimax-m2.5
"zai-org/GLM-5", # ↔ z-ai/glm-5
"XiaomiMiMo/MiMo-V2-Flash", # ↔ xiaomi/mimo-v2-pro
"moonshotai/Kimi-K2-Thinking", # ↔ moonshotai/kimi-k2-thinking
],
}
@@ -2054,19 +2045,25 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
save_env_value(base_url_env, override)
effective_base = override
# Model selection — try live /models endpoint first, fall back to defaults
from hermes_cli.models import fetch_api_models
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
live_models = fetch_api_models(api_key_for_probe, effective_base)
# Model selection — try live /models endpoint first, fall back to defaults.
# Providers with large live catalogs (100+ models) use a curated list instead
# so users see familiar model names rather than an overwhelming dump.
curated = _PROVIDER_MODELS.get(provider_id, [])
if curated and len(curated) >= 8:
# Curated list is substantial — use it directly, skip live probe
live_models = None
else:
from hermes_cli.models import fetch_api_models
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
live_models = fetch_api_models(api_key_for_probe, effective_base)
if live_models:
model_list = live_models
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
else:
model_list = _PROVIDER_MODELS.get(provider_id, [])
model_list = curated
if model_list:
print(" ⚠ Could not auto-detect models from API — showing defaults.")
print(" Use \"Enter custom model name\" if you don't see your model.")
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
# else: no defaults either, will fall through to raw input
if model_list:

View File

@@ -217,27 +217,16 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"qwen3.5-flash",
"qwen-vl-max",
],
# Curated model list for Hugging Face Inference Providers
# sourced from https://models.dev (huggingface provider)
# Curated HF model list — only agentic models that map to OpenRouter defaults.
"huggingface": [
"Qwen/Qwen3.5-397B-A17B",
"Qwen/Qwen3-235B-A22B-Thinking-2507",
"Qwen/Qwen3-Coder-480B-A35B-Instruct",
"Qwen/Qwen3-Coder-Next",
"Qwen/Qwen3-Next-80B-A3B-Instruct",
"Qwen/Qwen3-Next-80B-A3B-Thinking",
"deepseek-ai/DeepSeek-R1-0528",
"Qwen/Qwen3.5-35B-A3B",
"deepseek-ai/DeepSeek-V3.2",
"moonshotai/Kimi-K2-Instruct",
"moonshotai/Kimi-K2-Instruct-0905",
"moonshotai/Kimi-K2.5",
"moonshotai/Kimi-K2-Thinking",
"MiniMaxAI/MiniMax-M2.5",
"MiniMaxAI/MiniMax-M2.1",
"XiaomiMiMo/MiMo-V2-Flash",
"zai-org/GLM-5",
"zai-org/GLM-4.7",
"zai-org/GLM-4.7-Flash",
"XiaomiMiMo/MiMo-V2-Flash",
"moonshotai/Kimi-K2-Thinking",
],
}

View File

@@ -744,13 +744,13 @@ class TestHuggingFaceModels:
from hermes_cli.main import _PROVIDER_MODELS
assert "huggingface" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["huggingface"]
assert len(models) >= 10, "Expected at least 10 curated HF models"
assert len(models) >= 6, "Expected at least 6 curated HF models"
def test_models_py_has_huggingface(self):
from hermes_cli.models import _PROVIDER_MODELS
assert "huggingface" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["huggingface"]
assert len(models) >= 10
assert len(models) >= 6
def test_model_lists_match(self):
"""Model lists in main.py and models.py should be identical."""