feat: add Hugging Face as a first-class inference provider (#3419)

Salvage of PR #1747 (original PR #1171 by @davanstrien) onto current main.

Registers Hugging Face Inference Providers (router.huggingface.co/v1) as a named provider:
- hermes chat --provider huggingface (or --provider hf)
- 18 curated open models via hermes model picker
- HF_TOKEN in ~/.hermes/.env
- OpenAI-compatible endpoint with automatic failover (Groq, Together, SambaNova, etc.)

Files: auth.py, models.py, main.py, setup.py, config.py, model_metadata.py, .env.example, 5 docs pages, 17 new tests.

Co-authored-by: Daniel van Strien <davanstrien@gmail.com>
This commit is contained in:
Teknium
2026-03-27 12:41:59 -07:00
committed by GitHub
parent f57ebf52e9
commit fd8c465e42
13 changed files with 240 additions and 8 deletions

View File

@@ -113,6 +113,25 @@ DEFAULT_CONTEXT_LENGTHS = {
"glm": 202752,
# Kimi
"kimi": 262144,
# Hugging Face Inference Providers — model IDs use org/name format
"Qwen/Qwen3.5-397B-A17B": 131072,
"Qwen/Qwen3-235B-A22B-Thinking-2507": 131072,
"Qwen/Qwen3-Coder-480B-A35B-Instruct": 131072,
"Qwen/Qwen3-Coder-Next": 131072,
"Qwen/Qwen3-Next-80B-A3B-Instruct": 131072,
"Qwen/Qwen3-Next-80B-A3B-Thinking": 131072,
"deepseek-ai/DeepSeek-R1-0528": 65536,
"deepseek-ai/DeepSeek-V3.2": 65536,
"moonshotai/Kimi-K2-Instruct": 262144,
"moonshotai/Kimi-K2-Instruct-0905": 262144,
"moonshotai/Kimi-K2.5": 262144,
"moonshotai/Kimi-K2-Thinking": 262144,
"MiniMaxAI/MiniMax-M2.5": 204800,
"MiniMaxAI/MiniMax-M2.1": 204800,
"XiaomiMiMo/MiMo-V2-Flash": 32768,
"zai-org/GLM-5": 202752,
"zai-org/GLM-4.7": 202752,
"zai-org/GLM-4.7-Flash": 202752,
}
_CONTEXT_LENGTH_KEYS = (