diff --git a/agent/model_metadata.py b/agent/model_metadata.py index c70c8368d..162295f81 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -115,23 +115,13 @@ DEFAULT_CONTEXT_LENGTHS = { "kimi": 262144, # Hugging Face Inference Providers — model IDs use org/name format "Qwen/Qwen3.5-397B-A17B": 131072, - "Qwen/Qwen3-235B-A22B-Thinking-2507": 131072, - "Qwen/Qwen3-Coder-480B-A35B-Instruct": 131072, - "Qwen/Qwen3-Coder-Next": 131072, - "Qwen/Qwen3-Next-80B-A3B-Instruct": 131072, - "Qwen/Qwen3-Next-80B-A3B-Thinking": 131072, - "deepseek-ai/DeepSeek-R1-0528": 65536, + "Qwen/Qwen3.5-35B-A3B": 131072, "deepseek-ai/DeepSeek-V3.2": 65536, - "moonshotai/Kimi-K2-Instruct": 262144, - "moonshotai/Kimi-K2-Instruct-0905": 262144, "moonshotai/Kimi-K2.5": 262144, "moonshotai/Kimi-K2-Thinking": 262144, "MiniMaxAI/MiniMax-M2.5": 204800, - "MiniMaxAI/MiniMax-M2.1": 204800, "XiaomiMiMo/MiMo-V2-Flash": 32768, "zai-org/GLM-5": 202752, - "zai-org/GLM-4.7": 202752, - "zai-org/GLM-4.7-Flash": 202752, } _CONTEXT_LENGTH_KEYS = ( diff --git a/hermes_cli/main.py b/hermes_cli/main.py index a1afd0f07..bc7cf9e9c 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1504,26 +1504,17 @@ _PROVIDER_MODELS = { "google/gemini-3-pro-preview", "google/gemini-3-flash-preview", ], - # Curated model list sourced from https://models.dev (huggingface provider) + # Curated HF model list — only agentic models that map to OpenRouter defaults. + # Format: HF model ID → OpenRouter equivalent noted in comment "huggingface": [ - "Qwen/Qwen3.5-397B-A17B", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "Qwen/Qwen3-Coder-480B-A35B-Instruct", - "Qwen/Qwen3-Coder-Next", - "Qwen/Qwen3-Next-80B-A3B-Instruct", - "Qwen/Qwen3-Next-80B-A3B-Thinking", - "deepseek-ai/DeepSeek-R1-0528", - "deepseek-ai/DeepSeek-V3.2", - "moonshotai/Kimi-K2-Instruct", - "moonshotai/Kimi-K2-Instruct-0905", - "moonshotai/Kimi-K2.5", - "moonshotai/Kimi-K2-Thinking", - "MiniMaxAI/MiniMax-M2.5", - "MiniMaxAI/MiniMax-M2.1", - "XiaomiMiMo/MiMo-V2-Flash", - "zai-org/GLM-5", - "zai-org/GLM-4.7", - "zai-org/GLM-4.7-Flash", + "Qwen/Qwen3.5-397B-A17B", # ↔ qwen/qwen3.5-plus + "Qwen/Qwen3.5-35B-A3B", # ↔ qwen/qwen3.5-35b-a3b + "deepseek-ai/DeepSeek-V3.2", # ↔ deepseek/deepseek-chat + "moonshotai/Kimi-K2.5", # ↔ moonshotai/kimi-k2.5 + "MiniMaxAI/MiniMax-M2.5", # ↔ minimax/minimax-m2.5 + "zai-org/GLM-5", # ↔ z-ai/glm-5 + "XiaomiMiMo/MiMo-V2-Flash", # ↔ xiaomi/mimo-v2-pro + "moonshotai/Kimi-K2-Thinking", # ↔ moonshotai/kimi-k2-thinking ], } @@ -2054,19 +2045,25 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): save_env_value(base_url_env, override) effective_base = override - # Model selection — try live /models endpoint first, fall back to defaults - from hermes_cli.models import fetch_api_models - api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") - live_models = fetch_api_models(api_key_for_probe, effective_base) + # Model selection — try live /models endpoint first, fall back to defaults. + # Providers with large live catalogs (100+ models) use a curated list instead + # so users see familiar model names rather than an overwhelming dump. + curated = _PROVIDER_MODELS.get(provider_id, []) + if curated and len(curated) >= 8: + # Curated list is substantial — use it directly, skip live probe + live_models = None + else: + from hermes_cli.models import fetch_api_models + api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "") + live_models = fetch_api_models(api_key_for_probe, effective_base) if live_models: model_list = live_models print(f" Found {len(model_list)} model(s) from {pconfig.name} API") else: - model_list = _PROVIDER_MODELS.get(provider_id, []) + model_list = curated if model_list: - print(" ⚠ Could not auto-detect models from API — showing defaults.") - print(" Use \"Enter custom model name\" if you don't see your model.") + print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.") # else: no defaults either, will fall through to raw input if model_list: diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 5506d6475..085d3ffb4 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -217,27 +217,16 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "qwen3.5-flash", "qwen-vl-max", ], - # Curated model list for Hugging Face Inference Providers - # sourced from https://models.dev (huggingface provider) + # Curated HF model list — only agentic models that map to OpenRouter defaults. "huggingface": [ "Qwen/Qwen3.5-397B-A17B", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "Qwen/Qwen3-Coder-480B-A35B-Instruct", - "Qwen/Qwen3-Coder-Next", - "Qwen/Qwen3-Next-80B-A3B-Instruct", - "Qwen/Qwen3-Next-80B-A3B-Thinking", - "deepseek-ai/DeepSeek-R1-0528", + "Qwen/Qwen3.5-35B-A3B", "deepseek-ai/DeepSeek-V3.2", - "moonshotai/Kimi-K2-Instruct", - "moonshotai/Kimi-K2-Instruct-0905", "moonshotai/Kimi-K2.5", - "moonshotai/Kimi-K2-Thinking", "MiniMaxAI/MiniMax-M2.5", - "MiniMaxAI/MiniMax-M2.1", - "XiaomiMiMo/MiMo-V2-Flash", "zai-org/GLM-5", - "zai-org/GLM-4.7", - "zai-org/GLM-4.7-Flash", + "XiaomiMiMo/MiMo-V2-Flash", + "moonshotai/Kimi-K2-Thinking", ], } diff --git a/tests/test_api_key_providers.py b/tests/test_api_key_providers.py index 42f74784e..ad1676e65 100644 --- a/tests/test_api_key_providers.py +++ b/tests/test_api_key_providers.py @@ -744,13 +744,13 @@ class TestHuggingFaceModels: from hermes_cli.main import _PROVIDER_MODELS assert "huggingface" in _PROVIDER_MODELS models = _PROVIDER_MODELS["huggingface"] - assert len(models) >= 10, "Expected at least 10 curated HF models" + assert len(models) >= 6, "Expected at least 6 curated HF models" def test_models_py_has_huggingface(self): from hermes_cli.models import _PROVIDER_MODELS assert "huggingface" in _PROVIDER_MODELS models = _PROVIDER_MODELS["huggingface"] - assert len(models) >= 10 + assert len(models) >= 6 def test_model_lists_match(self): """Model lists in main.py and models.py should be identical."""