feat: curate HF model picker with OpenRouter analogues (#3440)

Show only agentic models that map to OpenRouter defaults: Qwen/Qwen3.5-397B-A17B ↔ qwen/qwen3.5-plus Qwen/Qwen3.5-35B-A3B ↔ qwen/qwen3.5-35b-a3b deepseek-ai/DeepSeek-V3.2 ↔ deepseek/deepseek-chat moonshotai/Kimi-K2.5 ↔ moonshotai/kimi-k2.5 MiniMaxAI/MiniMax-M2.5 ↔ minimax/minimax-m2.5 zai-org/GLM-5 ↔ z-ai/glm-5 XiaomiMiMo/MiMo-V2-Flash ↔ xiaomi/mimo-v2-pro moonshotai/Kimi-K2-Thinking ↔ moonshotai/kimi-k2-thinking Users can still pick any HF model via Enter custom model name.
2026-03-27 13:54:46 -07:00
parent e4e04c2005
commit ab09f6b568
4 changed files with 30 additions and 54 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -115,23 +115,13 @@ DEFAULT_CONTEXT_LENGTHS = {
    "kimi": 262144,
    # Hugging Face Inference Providers — model IDs use org/name format
    "Qwen/Qwen3.5-397B-A17B": 131072,
-    "Qwen/Qwen3-235B-A22B-Thinking-2507": 131072,
-    "Qwen/Qwen3-Coder-480B-A35B-Instruct": 131072,
-    "Qwen/Qwen3-Coder-Next": 131072,
-    "Qwen/Qwen3-Next-80B-A3B-Instruct": 131072,
-    "Qwen/Qwen3-Next-80B-A3B-Thinking": 131072,
-    "deepseek-ai/DeepSeek-R1-0528": 65536,
+    "Qwen/Qwen3.5-35B-A3B": 131072,
    "deepseek-ai/DeepSeek-V3.2": 65536,
-    "moonshotai/Kimi-K2-Instruct": 262144,
-    "moonshotai/Kimi-K2-Instruct-0905": 262144,
    "moonshotai/Kimi-K2.5": 262144,
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
-    "MiniMaxAI/MiniMax-M2.1": 204800,
    "XiaomiMiMo/MiMo-V2-Flash": 32768,
    "zai-org/GLM-5": 202752,
-    "zai-org/GLM-4.7": 202752,
-    "zai-org/GLM-4.7-Flash": 202752,
 }

 _CONTEXT_LENGTH_KEYS = (
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1504,26 +1504,17 @@ _PROVIDER_MODELS = {
        "google/gemini-3-pro-preview",
        "google/gemini-3-flash-preview",
    ],
-    # Curated model list sourced from https://models.dev (huggingface provider)
+    # Curated HF model list — only agentic models that map to OpenRouter defaults.
+    # Format: HF model ID → OpenRouter equivalent noted in comment
    "huggingface": [
-        "Qwen/Qwen3.5-397B-A17B",
-        "Qwen/Qwen3-235B-A22B-Thinking-2507",
-        "Qwen/Qwen3-Coder-480B-A35B-Instruct",
-        "Qwen/Qwen3-Coder-Next",
-        "Qwen/Qwen3-Next-80B-A3B-Instruct",
-        "Qwen/Qwen3-Next-80B-A3B-Thinking",
-        "deepseek-ai/DeepSeek-R1-0528",
-        "deepseek-ai/DeepSeek-V3.2",
-        "moonshotai/Kimi-K2-Instruct",
-        "moonshotai/Kimi-K2-Instruct-0905",
-        "moonshotai/Kimi-K2.5",
-        "moonshotai/Kimi-K2-Thinking",
-        "MiniMaxAI/MiniMax-M2.5",
-        "MiniMaxAI/MiniMax-M2.1",
-        "XiaomiMiMo/MiMo-V2-Flash",
-        "zai-org/GLM-5",
-        "zai-org/GLM-4.7",
-        "zai-org/GLM-4.7-Flash",
+        "Qwen/Qwen3.5-397B-A17B",                  # ↔ qwen/qwen3.5-plus
+        "Qwen/Qwen3.5-35B-A3B",                     # ↔ qwen/qwen3.5-35b-a3b
+        "deepseek-ai/DeepSeek-V3.2",                # ↔ deepseek/deepseek-chat
+        "moonshotai/Kimi-K2.5",                      # ↔ moonshotai/kimi-k2.5
+        "MiniMaxAI/MiniMax-M2.5",                    # ↔ minimax/minimax-m2.5
+        "zai-org/GLM-5",                             # ↔ z-ai/glm-5
+        "XiaomiMiMo/MiMo-V2-Flash",                 # ↔ xiaomi/mimo-v2-pro
+        "moonshotai/Kimi-K2-Thinking",               # ↔ moonshotai/kimi-k2-thinking
    ],
 }

@@ -2054,19 +2045,25 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
        save_env_value(base_url_env, override)
        effective_base = override

-    # Model selection — try live /models endpoint first, fall back to defaults
-    from hermes_cli.models import fetch_api_models
-    api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
-    live_models = fetch_api_models(api_key_for_probe, effective_base)
+    # Model selection — try live /models endpoint first, fall back to defaults.
+    # Providers with large live catalogs (100+ models) use a curated list instead
+    # so users see familiar model names rather than an overwhelming dump.
+    curated = _PROVIDER_MODELS.get(provider_id, [])
+    if curated and len(curated) >= 8:
+        # Curated list is substantial — use it directly, skip live probe
+        live_models = None
+    else:
+        from hermes_cli.models import fetch_api_models
+        api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
+        live_models = fetch_api_models(api_key_for_probe, effective_base)

    if live_models:
        model_list = live_models
        print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
    else:
-        model_list = _PROVIDER_MODELS.get(provider_id, [])
+        model_list = curated
        if model_list:
-            print("  ⚠ Could not auto-detect models from API — showing defaults.")
-            print("    Use \"Enter custom model name\" if you don't see your model.")
+            print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
        # else: no defaults either, will fall through to raw input

    if model_list:
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -217,27 +217,16 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "qwen3.5-flash",
        "qwen-vl-max",
    ],
-    # Curated model list for Hugging Face Inference Providers
-    # sourced from https://models.dev (huggingface provider)
+    # Curated HF model list — only agentic models that map to OpenRouter defaults.
    "huggingface": [
        "Qwen/Qwen3.5-397B-A17B",
-        "Qwen/Qwen3-235B-A22B-Thinking-2507",
-        "Qwen/Qwen3-Coder-480B-A35B-Instruct",
-        "Qwen/Qwen3-Coder-Next",
-        "Qwen/Qwen3-Next-80B-A3B-Instruct",
-        "Qwen/Qwen3-Next-80B-A3B-Thinking",
-        "deepseek-ai/DeepSeek-R1-0528",
+        "Qwen/Qwen3.5-35B-A3B",
        "deepseek-ai/DeepSeek-V3.2",
-        "moonshotai/Kimi-K2-Instruct",
-        "moonshotai/Kimi-K2-Instruct-0905",
        "moonshotai/Kimi-K2.5",
-        "moonshotai/Kimi-K2-Thinking",
        "MiniMaxAI/MiniMax-M2.5",
-        "MiniMaxAI/MiniMax-M2.1",
-        "XiaomiMiMo/MiMo-V2-Flash",
        "zai-org/GLM-5",
-        "zai-org/GLM-4.7",
-        "zai-org/GLM-4.7-Flash",
+        "XiaomiMiMo/MiMo-V2-Flash",
+        "moonshotai/Kimi-K2-Thinking",
    ],
 }

--- a/tests/test_api_key_providers.py
+++ b/tests/test_api_key_providers.py
@@ -744,13 +744,13 @@ class TestHuggingFaceModels:
        from hermes_cli.main import _PROVIDER_MODELS
        assert "huggingface" in _PROVIDER_MODELS
        models = _PROVIDER_MODELS["huggingface"]
-        assert len(models) >= 10, "Expected at least 10 curated HF models"
+        assert len(models) >= 6, "Expected at least 6 curated HF models"

    def test_models_py_has_huggingface(self):
        from hermes_cli.models import _PROVIDER_MODELS
        assert "huggingface" in _PROVIDER_MODELS
        models = _PROVIDER_MODELS["huggingface"]
-        assert len(models) >= 10
+        assert len(models) >= 6

    def test_model_lists_match(self):
        """Model lists in main.py and models.py should be identical."""