feat: add Hugging Face as a first-class inference provider (#3419)
Salvage of PR #1747 (original PR #1171 by @davanstrien) onto current main. Registers Hugging Face Inference Providers (router.huggingface.co/v1) as a named provider: - hermes chat --provider huggingface (or --provider hf) - 18 curated open models via hermes model picker - HF_TOKEN in ~/.hermes/.env - OpenAI-compatible endpoint with automatic failover (Groq, Together, SambaNova, etc.) Files: auth.py, models.py, main.py, setup.py, config.py, model_metadata.py, .env.example, 5 docs pages, 17 new tests. Co-authored-by: Daniel van Strien <davanstrien@gmail.com>
This commit is contained in:
@@ -212,6 +212,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
api_key_env_vars=("KILOCODE_API_KEY",),
|
||||
base_url_env_var="KILOCODE_BASE_URL",
|
||||
),
|
||||
"huggingface": ProviderConfig(
|
||||
id="huggingface",
|
||||
name="Hugging Face",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://router.huggingface.co/v1",
|
||||
api_key_env_vars=("HF_TOKEN",),
|
||||
base_url_env_var="HF_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -685,6 +693,7 @@ def resolve_provider(
|
||||
"github-copilot-acp": "copilot-acp", "copilot-acp-agent": "copilot-acp",
|
||||
"aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway",
|
||||
"opencode": "opencode-zen", "zen": "opencode-zen",
|
||||
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
|
||||
"go": "opencode-go", "opencode-go-sub": "opencode-go",
|
||||
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
||||
}
|
||||
|
||||
@@ -593,6 +593,21 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"HF_TOKEN": {
|
||||
"description": "Hugging Face token for Inference Providers (20+ open models via router.huggingface.co)",
|
||||
"prompt": "Hugging Face Token",
|
||||
"url": "https://huggingface.co/settings/tokens",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
},
|
||||
"HF_BASE_URL": {
|
||||
"description": "Hugging Face Inference Providers base URL override",
|
||||
"prompt": "HF base URL (leave empty for default)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── Tool API keys ──
|
||||
"PARALLEL_API_KEY": {
|
||||
|
||||
@@ -795,6 +795,7 @@ def cmd_model(args):
|
||||
"ai-gateway": "AI Gateway",
|
||||
"kilocode": "Kilo Code",
|
||||
"alibaba": "Alibaba Cloud (DashScope)",
|
||||
"huggingface": "Hugging Face",
|
||||
"custom": "Custom endpoint",
|
||||
}
|
||||
active_label = provider_labels.get(active, active)
|
||||
@@ -821,6 +822,7 @@ def cmd_model(args):
|
||||
("opencode-go", "OpenCode Go (open models, $10/month subscription)"),
|
||||
("ai-gateway", "AI Gateway (Vercel — 200+ models, pay-per-use)"),
|
||||
("alibaba", "Alibaba Cloud / DashScope (Qwen models, Anthropic-compatible)"),
|
||||
("huggingface", "Hugging Face Inference Providers (20+ open models)"),
|
||||
]
|
||||
|
||||
# Add user-defined custom providers from config.yaml
|
||||
@@ -893,7 +895,7 @@ def cmd_model(args):
|
||||
_model_flow_anthropic(config, current_model)
|
||||
elif selected_provider == "kimi-coding":
|
||||
_model_flow_kimi(config, current_model)
|
||||
elif selected_provider in ("zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba"):
|
||||
elif selected_provider in ("zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface"):
|
||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||
|
||||
|
||||
@@ -1502,6 +1504,27 @@ _PROVIDER_MODELS = {
|
||||
"google/gemini-3-pro-preview",
|
||||
"google/gemini-3-flash-preview",
|
||||
],
|
||||
# Curated model list sourced from https://models.dev (huggingface provider)
|
||||
"huggingface": [
|
||||
"Qwen/Qwen3.5-397B-A17B",
|
||||
"Qwen/Qwen3-235B-A22B-Thinking-2507",
|
||||
"Qwen/Qwen3-Coder-480B-A35B-Instruct",
|
||||
"Qwen/Qwen3-Coder-Next",
|
||||
"Qwen/Qwen3-Next-80B-A3B-Instruct",
|
||||
"Qwen/Qwen3-Next-80B-A3B-Thinking",
|
||||
"deepseek-ai/DeepSeek-R1-0528",
|
||||
"deepseek-ai/DeepSeek-V3.2",
|
||||
"moonshotai/Kimi-K2-Instruct",
|
||||
"moonshotai/Kimi-K2-Instruct-0905",
|
||||
"moonshotai/Kimi-K2.5",
|
||||
"moonshotai/Kimi-K2-Thinking",
|
||||
"MiniMaxAI/MiniMax-M2.5",
|
||||
"MiniMaxAI/MiniMax-M2.1",
|
||||
"XiaomiMiMo/MiMo-V2-Flash",
|
||||
"zai-org/GLM-5",
|
||||
"zai-org/GLM-4.7",
|
||||
"zai-org/GLM-4.7-Flash",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@@ -3122,7 +3145,7 @@ For more help on a command:
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--provider",
|
||||
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"],
|
||||
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"],
|
||||
default=None,
|
||||
help="Inference provider (default: auto)"
|
||||
)
|
||||
|
||||
@@ -217,6 +217,28 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"qwen3.5-flash",
|
||||
"qwen-vl-max",
|
||||
],
|
||||
# Curated model list for Hugging Face Inference Providers
|
||||
# sourced from https://models.dev (huggingface provider)
|
||||
"huggingface": [
|
||||
"Qwen/Qwen3.5-397B-A17B",
|
||||
"Qwen/Qwen3-235B-A22B-Thinking-2507",
|
||||
"Qwen/Qwen3-Coder-480B-A35B-Instruct",
|
||||
"Qwen/Qwen3-Coder-Next",
|
||||
"Qwen/Qwen3-Next-80B-A3B-Instruct",
|
||||
"Qwen/Qwen3-Next-80B-A3B-Thinking",
|
||||
"deepseek-ai/DeepSeek-R1-0528",
|
||||
"deepseek-ai/DeepSeek-V3.2",
|
||||
"moonshotai/Kimi-K2-Instruct",
|
||||
"moonshotai/Kimi-K2-Instruct-0905",
|
||||
"moonshotai/Kimi-K2.5",
|
||||
"moonshotai/Kimi-K2-Thinking",
|
||||
"MiniMaxAI/MiniMax-M2.5",
|
||||
"MiniMaxAI/MiniMax-M2.1",
|
||||
"XiaomiMiMo/MiMo-V2-Flash",
|
||||
"zai-org/GLM-5",
|
||||
"zai-org/GLM-4.7",
|
||||
"zai-org/GLM-4.7-Flash",
|
||||
],
|
||||
}
|
||||
|
||||
_PROVIDER_LABELS = {
|
||||
@@ -236,6 +258,7 @@ _PROVIDER_LABELS = {
|
||||
"ai-gateway": "AI Gateway",
|
||||
"kilocode": "Kilo Code",
|
||||
"alibaba": "Alibaba Cloud (DashScope)",
|
||||
"huggingface": "Hugging Face",
|
||||
"custom": "Custom endpoint",
|
||||
}
|
||||
|
||||
@@ -271,6 +294,9 @@ _PROVIDER_ALIASES = {
|
||||
"aliyun": "alibaba",
|
||||
"qwen": "alibaba",
|
||||
"alibaba-cloud": "alibaba",
|
||||
"hf": "huggingface",
|
||||
"hugging-face": "huggingface",
|
||||
"huggingface-hub": "huggingface",
|
||||
}
|
||||
|
||||
|
||||
@@ -304,7 +330,7 @@ def list_available_providers() -> list[dict[str, str]]:
|
||||
# Canonical providers in display order
|
||||
_PROVIDER_ORDER = [
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
|
||||
"huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
|
||||
"opencode-zen", "opencode-go",
|
||||
"ai-gateway", "deepseek", "custom",
|
||||
]
|
||||
|
||||
@@ -80,6 +80,11 @@ _DEFAULT_PROVIDER_MODELS = {
|
||||
"minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
|
||||
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
|
||||
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
|
||||
"huggingface": [
|
||||
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
||||
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
|
||||
"deepseek-ai/DeepSeek-V3.2", "moonshotai/Kimi-K2.5",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@@ -884,6 +889,7 @@ def setup_model_provider(config: dict):
|
||||
"OpenCode Go (open models, $10/month subscription)",
|
||||
"GitHub Copilot (uses GITHUB_TOKEN or gh auth token)",
|
||||
"GitHub Copilot ACP (spawns `copilot --acp --stdio`)",
|
||||
"Hugging Face Inference Providers (20+ open models)",
|
||||
]
|
||||
if keep_label:
|
||||
provider_choices.append(keep_label)
|
||||
@@ -1528,7 +1534,26 @@ def setup_model_provider(config: dict):
|
||||
_set_model_provider(config, "copilot-acp", pconfig.inference_base_url)
|
||||
selected_base_url = pconfig.inference_base_url
|
||||
|
||||
# else: provider_idx == 16 (Keep current) — only shown when a provider already exists
|
||||
elif provider_idx == 16: # Hugging Face Inference Providers
|
||||
selected_provider = "huggingface"
|
||||
print()
|
||||
print_header("Hugging Face API Token")
|
||||
pconfig = PROVIDER_REGISTRY["huggingface"]
|
||||
print_info(f"Provider: {pconfig.name}")
|
||||
print_info("Get your token at: https://huggingface.co/settings/tokens")
|
||||
print_info("Required permission: 'Make calls to Inference Providers'")
|
||||
print()
|
||||
|
||||
api_key = prompt(" HF Token", password=True)
|
||||
if api_key:
|
||||
save_env_value("HF_TOKEN", api_key)
|
||||
# Clear OpenRouter env vars to prevent routing confusion
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
_set_model_provider(config, "huggingface", pconfig.inference_base_url)
|
||||
selected_base_url = pconfig.inference_base_url
|
||||
|
||||
# else: provider_idx == 17 (Keep current) — only shown when a provider already exists
|
||||
# Normalize "keep current" to an explicit provider so downstream logic
|
||||
# doesn't fall back to the generic OpenRouter/static-model path.
|
||||
if selected_provider is None:
|
||||
|
||||
Reference in New Issue
Block a user