diff --git a/.env.example b/.env.example index d273a6966..515c00160 100644 --- a/.env.example +++ b/.env.example @@ -59,6 +59,15 @@ OPENCODE_ZEN_API_KEY= # OpenCode Go provides access to open models (GLM-5, Kimi K2.5, MiniMax M2.5) # $10/month subscription. Get your key at: https://opencode.ai/auth OPENCODE_GO_API_KEY= + +# ============================================================================= +# LLM PROVIDER (Hugging Face Inference Providers) +# ============================================================================= +# Hugging Face routes to 20+ open models via unified OpenAI-compatible endpoint. +# Free tier included ($0.10/month), no markup on provider rates. +# Get your token at: https://huggingface.co/settings/tokens +# Required permission: "Make calls to Inference Providers" +HF_TOKEN= # OPENCODE_GO_BASE_URL=https://opencode.ai/zen/go/v1 # Override default base URL # ============================================================================= diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 2e87498e0..c70c8368d 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -113,6 +113,25 @@ DEFAULT_CONTEXT_LENGTHS = { "glm": 202752, # Kimi "kimi": 262144, + # Hugging Face Inference Providers — model IDs use org/name format + "Qwen/Qwen3.5-397B-A17B": 131072, + "Qwen/Qwen3-235B-A22B-Thinking-2507": 131072, + "Qwen/Qwen3-Coder-480B-A35B-Instruct": 131072, + "Qwen/Qwen3-Coder-Next": 131072, + "Qwen/Qwen3-Next-80B-A3B-Instruct": 131072, + "Qwen/Qwen3-Next-80B-A3B-Thinking": 131072, + "deepseek-ai/DeepSeek-R1-0528": 65536, + "deepseek-ai/DeepSeek-V3.2": 65536, + "moonshotai/Kimi-K2-Instruct": 262144, + "moonshotai/Kimi-K2-Instruct-0905": 262144, + "moonshotai/Kimi-K2.5": 262144, + "moonshotai/Kimi-K2-Thinking": 262144, + "MiniMaxAI/MiniMax-M2.5": 204800, + "MiniMaxAI/MiniMax-M2.1": 204800, + "XiaomiMiMo/MiMo-V2-Flash": 32768, + "zai-org/GLM-5": 202752, + "zai-org/GLM-4.7": 202752, + "zai-org/GLM-4.7-Flash": 202752, } _CONTEXT_LENGTH_KEYS = ( diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 493e5a1d8..f83a29ddf 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -212,6 +212,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { api_key_env_vars=("KILOCODE_API_KEY",), base_url_env_var="KILOCODE_BASE_URL", ), + "huggingface": ProviderConfig( + id="huggingface", + name="Hugging Face", + auth_type="api_key", + inference_base_url="https://router.huggingface.co/v1", + api_key_env_vars=("HF_TOKEN",), + base_url_env_var="HF_BASE_URL", + ), } @@ -685,6 +693,7 @@ def resolve_provider( "github-copilot-acp": "copilot-acp", "copilot-acp-agent": "copilot-acp", "aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway", "opencode": "opencode-zen", "zen": "opencode-zen", + "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface", "go": "opencode-go", "opencode-go-sub": "opencode-go", "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode", } diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 2ab681ed6..6ad19a6f6 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -593,6 +593,21 @@ OPTIONAL_ENV_VARS = { "category": "provider", "advanced": True, }, + "HF_TOKEN": { + "description": "Hugging Face token for Inference Providers (20+ open models via router.huggingface.co)", + "prompt": "Hugging Face Token", + "url": "https://huggingface.co/settings/tokens", + "password": True, + "category": "provider", + }, + "HF_BASE_URL": { + "description": "Hugging Face Inference Providers base URL override", + "prompt": "HF base URL (leave empty for default)", + "url": None, + "password": False, + "category": "provider", + "advanced": True, + }, # ── Tool API keys ── "PARALLEL_API_KEY": { diff --git a/hermes_cli/main.py b/hermes_cli/main.py index d19b99b60..a1afd0f07 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -795,6 +795,7 @@ def cmd_model(args): "ai-gateway": "AI Gateway", "kilocode": "Kilo Code", "alibaba": "Alibaba Cloud (DashScope)", + "huggingface": "Hugging Face", "custom": "Custom endpoint", } active_label = provider_labels.get(active, active) @@ -821,6 +822,7 @@ def cmd_model(args): ("opencode-go", "OpenCode Go (open models, $10/month subscription)"), ("ai-gateway", "AI Gateway (Vercel — 200+ models, pay-per-use)"), ("alibaba", "Alibaba Cloud / DashScope (Qwen models, Anthropic-compatible)"), + ("huggingface", "Hugging Face Inference Providers (20+ open models)"), ] # Add user-defined custom providers from config.yaml @@ -893,7 +895,7 @@ def cmd_model(args): _model_flow_anthropic(config, current_model) elif selected_provider == "kimi-coding": _model_flow_kimi(config, current_model) - elif selected_provider in ("zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba"): + elif selected_provider in ("zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface"): _model_flow_api_key_provider(config, selected_provider, current_model) @@ -1502,6 +1504,27 @@ _PROVIDER_MODELS = { "google/gemini-3-pro-preview", "google/gemini-3-flash-preview", ], + # Curated model list sourced from https://models.dev (huggingface provider) + "huggingface": [ + "Qwen/Qwen3.5-397B-A17B", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "Qwen/Qwen3-Coder-Next", + "Qwen/Qwen3-Next-80B-A3B-Instruct", + "Qwen/Qwen3-Next-80B-A3B-Thinking", + "deepseek-ai/DeepSeek-R1-0528", + "deepseek-ai/DeepSeek-V3.2", + "moonshotai/Kimi-K2-Instruct", + "moonshotai/Kimi-K2-Instruct-0905", + "moonshotai/Kimi-K2.5", + "moonshotai/Kimi-K2-Thinking", + "MiniMaxAI/MiniMax-M2.5", + "MiniMaxAI/MiniMax-M2.1", + "XiaomiMiMo/MiMo-V2-Flash", + "zai-org/GLM-5", + "zai-org/GLM-4.7", + "zai-org/GLM-4.7-Flash", + ], } @@ -3122,7 +3145,7 @@ For more help on a command: ) chat_parser.add_argument( "--provider", - choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"], + choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"], default=None, help="Inference provider (default: auto)" ) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 4aa2a3d21..5506d6475 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -217,6 +217,28 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "qwen3.5-flash", "qwen-vl-max", ], + # Curated model list for Hugging Face Inference Providers + # sourced from https://models.dev (huggingface provider) + "huggingface": [ + "Qwen/Qwen3.5-397B-A17B", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "Qwen/Qwen3-Coder-Next", + "Qwen/Qwen3-Next-80B-A3B-Instruct", + "Qwen/Qwen3-Next-80B-A3B-Thinking", + "deepseek-ai/DeepSeek-R1-0528", + "deepseek-ai/DeepSeek-V3.2", + "moonshotai/Kimi-K2-Instruct", + "moonshotai/Kimi-K2-Instruct-0905", + "moonshotai/Kimi-K2.5", + "moonshotai/Kimi-K2-Thinking", + "MiniMaxAI/MiniMax-M2.5", + "MiniMaxAI/MiniMax-M2.1", + "XiaomiMiMo/MiMo-V2-Flash", + "zai-org/GLM-5", + "zai-org/GLM-4.7", + "zai-org/GLM-4.7-Flash", + ], } _PROVIDER_LABELS = { @@ -236,6 +258,7 @@ _PROVIDER_LABELS = { "ai-gateway": "AI Gateway", "kilocode": "Kilo Code", "alibaba": "Alibaba Cloud (DashScope)", + "huggingface": "Hugging Face", "custom": "Custom endpoint", } @@ -271,6 +294,9 @@ _PROVIDER_ALIASES = { "aliyun": "alibaba", "qwen": "alibaba", "alibaba-cloud": "alibaba", + "hf": "huggingface", + "hugging-face": "huggingface", + "huggingface-hub": "huggingface", } @@ -304,7 +330,7 @@ def list_available_providers() -> list[dict[str, str]]: # Canonical providers in display order _PROVIDER_ORDER = [ "openrouter", "nous", "openai-codex", "copilot", "copilot-acp", - "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba", + "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba", "opencode-zen", "opencode-go", "ai-gateway", "deepseek", "custom", ] diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index fadccb58e..88d629701 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -80,6 +80,11 @@ _DEFAULT_PROVIDER_MODELS = { "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"], "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"], "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"], + "huggingface": [ + "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507", + "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528", + "deepseek-ai/DeepSeek-V3.2", "moonshotai/Kimi-K2.5", + ], } @@ -884,6 +889,7 @@ def setup_model_provider(config: dict): "OpenCode Go (open models, $10/month subscription)", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)", + "Hugging Face Inference Providers (20+ open models)", ] if keep_label: provider_choices.append(keep_label) @@ -1528,7 +1534,26 @@ def setup_model_provider(config: dict): _set_model_provider(config, "copilot-acp", pconfig.inference_base_url) selected_base_url = pconfig.inference_base_url - # else: provider_idx == 16 (Keep current) — only shown when a provider already exists + elif provider_idx == 16: # Hugging Face Inference Providers + selected_provider = "huggingface" + print() + print_header("Hugging Face API Token") + pconfig = PROVIDER_REGISTRY["huggingface"] + print_info(f"Provider: {pconfig.name}") + print_info("Get your token at: https://huggingface.co/settings/tokens") + print_info("Required permission: 'Make calls to Inference Providers'") + print() + + api_key = prompt(" HF Token", password=True) + if api_key: + save_env_value("HF_TOKEN", api_key) + # Clear OpenRouter env vars to prevent routing confusion + save_env_value("OPENAI_BASE_URL", "") + save_env_value("OPENAI_API_KEY", "") + _set_model_provider(config, "huggingface", pconfig.inference_base_url) + selected_base_url = pconfig.inference_base_url + + # else: provider_idx == 17 (Keep current) — only shown when a provider already exists # Normalize "keep current" to an explicit provider so downstream logic # doesn't fall back to the generic OpenRouter/static-model path. if selected_provider is None: diff --git a/tests/test_api_key_providers.py b/tests/test_api_key_providers.py index 95d18bdd8..42f74784e 100644 --- a/tests/test_api_key_providers.py +++ b/tests/test_api_key_providers.py @@ -38,6 +38,7 @@ class TestProviderRegistry: @pytest.mark.parametrize("provider_id,name,auth_type", [ ("copilot-acp", "GitHub Copilot ACP", "external_process"), ("copilot", "GitHub Copilot", "api_key"), + ("huggingface", "Hugging Face", "api_key"), ("zai", "Z.AI / GLM", "api_key"), ("kimi-coding", "Kimi / Moonshot", "api_key"), ("minimax", "MiniMax", "api_key"), @@ -87,6 +88,11 @@ class TestProviderRegistry: assert pconfig.api_key_env_vars == ("KILOCODE_API_KEY",) assert pconfig.base_url_env_var == "KILOCODE_BASE_URL" + def test_huggingface_env_vars(self): + pconfig = PROVIDER_REGISTRY["huggingface"] + assert pconfig.api_key_env_vars == ("HF_TOKEN",) + assert pconfig.base_url_env_var == "HF_BASE_URL" + def test_base_urls(self): assert PROVIDER_REGISTRY["copilot"].inference_base_url == "https://api.githubcopilot.com" assert PROVIDER_REGISTRY["copilot-acp"].inference_base_url == "acp://copilot" @@ -96,6 +102,7 @@ class TestProviderRegistry: assert PROVIDER_REGISTRY["minimax-cn"].inference_base_url == "https://api.minimaxi.com/anthropic" assert PROVIDER_REGISTRY["ai-gateway"].inference_base_url == "https://ai-gateway.vercel.sh/v1" assert PROVIDER_REGISTRY["kilocode"].inference_base_url == "https://api.kilo.ai/api/gateway" + assert PROVIDER_REGISTRY["huggingface"].inference_base_url == "https://router.huggingface.co/v1" def test_oauth_providers_unchanged(self): """Ensure we didn't break the existing OAuth providers.""" @@ -199,6 +206,18 @@ class TestResolveProvider: assert resolve_provider("github-copilot-acp") == "copilot-acp" assert resolve_provider("copilot-acp-agent") == "copilot-acp" + def test_explicit_huggingface(self): + assert resolve_provider("huggingface") == "huggingface" + + def test_alias_hf(self): + assert resolve_provider("hf") == "huggingface" + + def test_alias_hugging_face(self): + assert resolve_provider("hugging-face") == "huggingface" + + def test_alias_huggingface_hub(self): + assert resolve_provider("huggingface-hub") == "huggingface" + def test_unknown_provider_raises(self): with pytest.raises(AuthError): resolve_provider("nonexistent-provider-xyz") @@ -235,6 +254,10 @@ class TestResolveProvider: monkeypatch.setenv("KILOCODE_API_KEY", "test-kilo-key") assert resolve_provider("auto") == "kilocode" + def test_auto_detects_hf_token(self, monkeypatch): + monkeypatch.setenv("HF_TOKEN", "hf_test_token") + assert resolve_provider("auto") == "huggingface" + def test_openrouter_takes_priority_over_glm(self, monkeypatch): """OpenRouter API key should win over GLM in auto-detection.""" monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") @@ -708,3 +731,55 @@ class TestKimiMoonshotModelListIsolation: coding_models = _PROVIDER_MODELS["kimi-coding"] assert "kimi-for-coding" in coding_models assert "kimi-k2-thinking-turbo" in coding_models + + +# ============================================================================= +# Hugging Face provider model list tests +# ============================================================================= + +class TestHuggingFaceModels: + """Verify Hugging Face model lists are consistent across all locations.""" + + def test_main_provider_models_has_huggingface(self): + from hermes_cli.main import _PROVIDER_MODELS + assert "huggingface" in _PROVIDER_MODELS + models = _PROVIDER_MODELS["huggingface"] + assert len(models) >= 10, "Expected at least 10 curated HF models" + + def test_models_py_has_huggingface(self): + from hermes_cli.models import _PROVIDER_MODELS + assert "huggingface" in _PROVIDER_MODELS + models = _PROVIDER_MODELS["huggingface"] + assert len(models) >= 10 + + def test_model_lists_match(self): + """Model lists in main.py and models.py should be identical.""" + from hermes_cli.main import _PROVIDER_MODELS as main_models + from hermes_cli.models import _PROVIDER_MODELS as models_models + assert main_models["huggingface"] == models_models["huggingface"] + + def test_model_metadata_has_context_lengths(self): + """Every HF model should have a context length entry.""" + from hermes_cli.models import _PROVIDER_MODELS + from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS + hf_models = _PROVIDER_MODELS["huggingface"] + for model in hf_models: + assert model in DEFAULT_CONTEXT_LENGTHS, ( + f"HF model {model!r} missing from DEFAULT_CONTEXT_LENGTHS" + ) + + def test_models_use_org_name_format(self): + """HF models should use org/name format (e.g. Qwen/Qwen3-235B).""" + from hermes_cli.models import _PROVIDER_MODELS + for model in _PROVIDER_MODELS["huggingface"]: + assert "/" in model, f"HF model {model!r} missing org/ prefix" + + def test_provider_aliases_in_models_py(self): + from hermes_cli.models import _PROVIDER_ALIASES + assert _PROVIDER_ALIASES.get("hf") == "huggingface" + assert _PROVIDER_ALIASES.get("hugging-face") == "huggingface" + + def test_provider_label(self): + from hermes_cli.models import _PROVIDER_LABELS + assert "huggingface" in _PROVIDER_LABELS + assert _PROVIDER_LABELS["huggingface"] == "Hugging Face" diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md index 24068d895..27cee7084 100644 --- a/website/docs/getting-started/quickstart.md +++ b/website/docs/getting-started/quickstart.md @@ -50,6 +50,7 @@ hermes setup # Or configure everything at once | **MiniMax** | International MiniMax endpoint | Set `MINIMAX_API_KEY` | | **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` | | **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` | +| **Hugging Face** | 20+ open models via unified router (Qwen, DeepSeek, Kimi, etc.) | Set `HF_TOKEN` | | **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` | | **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` | | **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` | diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index d527b61e2..9155793e4 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -66,7 +66,7 @@ Common options: | `-q`, `--query "..."` | One-shot, non-interactive prompt. | | `-m`, `--model ` | Override the model for this run. | | `-t`, `--toolsets ` | Enable a comma-separated set of toolsets. | -| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`. | +| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`. | | `-s`, `--skills ` | Preload one or more skills for the session (can be repeated or comma-separated). | | `-v`, `--verbose` | Verbose output. | | `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. | diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 39fb0b83a..939a02132 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -37,6 +37,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `MINIMAX_CN_BASE_URL` | Override MiniMax China base URL (default: `https://api.minimaxi.com/v1`) | | `KILOCODE_API_KEY` | Kilo Code API key ([kilo.ai](https://kilo.ai)) | | `KILOCODE_BASE_URL` | Override Kilo Code base URL (default: `https://api.kilo.ai/api/gateway`) | +| `HF_TOKEN` | Hugging Face token for Inference Providers ([huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)) | +| `HF_BASE_URL` | Override Hugging Face base URL (default: `https://router.huggingface.co/v1`) | | `ANTHROPIC_API_KEY` | Anthropic Console API key ([console.anthropic.com](https://console.anthropic.com/)) | | `ANTHROPIC_TOKEN` | Manual or legacy Anthropic OAuth/setup-token override | | `DASHSCOPE_API_KEY` | Alibaba Cloud DashScope API key for Qwen models ([modelstudio.console.alibabacloud.com](https://modelstudio.console.alibabacloud.com/)) | @@ -61,7 +63,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | Variable | Description | |----------|-------------| -| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode` (default: `auto`) | +| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`, `alibaba` (default: `auto`) | | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) | | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL | | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) | diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 6e4cfa98e..9c5f5d179 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -92,6 +92,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro | **Kilo Code** | `KILOCODE_API_KEY` in `~/.hermes/.env` (provider: `kilocode`) | | **OpenCode Zen** | `OPENCODE_ZEN_API_KEY` in `~/.hermes/.env` (provider: `opencode-zen`) | | **OpenCode Go** | `OPENCODE_GO_API_KEY` in `~/.hermes/.env` (provider: `opencode-go`) | +| **Hugging Face** | `HF_TOKEN` in `~/.hermes/.env` (provider: `huggingface`, aliases: `hf`) | | **Custom Endpoint** | `hermes model` (saved in `config.yaml`) or `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` | :::info Codex Note @@ -224,6 +225,32 @@ model: Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, or `DASHSCOPE_BASE_URL` environment variables. +### Hugging Face Inference Providers + +[Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover. + +```bash +# Use any available model +hermes chat --provider huggingface --model Qwen/Qwen3-235B-A22B-Thinking-2507 +# Requires: HF_TOKEN in ~/.hermes/.env + +# Short alias +hermes chat --provider hf --model deepseek-ai/DeepSeek-V3.2 +``` + +Or set it permanently in `config.yaml`: +```yaml +model: + provider: "huggingface" + default: "Qwen/Qwen3-235B-A22B-Thinking-2507" +``` + +Get your token at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) — make sure to enable the "Make calls to Inference Providers" permission. Free tier included ($0.10/month credit, no markup on provider rates). + +You can append routing suffixes to model names: `:fastest` (default), `:cheapest`, or `:provider_name` to force a specific backend. + +The base URL can be overridden with `HF_BASE_URL`. + ## Custom & Self-Hosted LLM Providers Hermes Agent works with **any OpenAI-compatible API endpoint**. If a server implements `/v1/chat/completions`, you can point Hermes at it. This means you can use local models, GPU inference servers, multi-provider routers, or any third-party API. @@ -627,7 +654,7 @@ fallback_model: When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session. -Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `anthropic`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `custom`. +Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `custom`. :::tip Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers). diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md index 63e9337e4..c149eee94 100644 --- a/website/docs/user-guide/features/fallback-providers.md +++ b/website/docs/user-guide/features/fallback-providers.md @@ -44,6 +44,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback | MiniMax | `minimax` | `MINIMAX_API_KEY` | | MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` | | Kilo Code | `kilocode` | `KILOCODE_API_KEY` | +| Hugging Face | `huggingface` | `HF_TOKEN` | | Custom endpoint | `custom` | `base_url` + `api_key_env` (see below) | ### Custom Endpoint Fallback @@ -161,7 +162,7 @@ When a task's provider is set to `"auto"` (the default), Hermes tries providers ```text OpenRouter → Nous Portal → Custom endpoint → Codex OAuth → -API-key providers (z.ai, Kimi, MiniMax, Anthropic) → give up +API-key providers (z.ai, Kimi, MiniMax, Hugging Face, Anthropic) → give up ``` **For vision tasks:**