feat: add Hugging Face as a first-class inference provider (#3419)

Salvage of PR #1747 (original PR #1171 by @davanstrien) onto current main. Registers Hugging Face Inference Providers (router.huggingface.co/v1) as a named provider: - hermes chat --provider huggingface (or --provider hf) - 18 curated open models via hermes model picker - HF_TOKEN in ~/.hermes/.env - OpenAI-compatible endpoint with automatic failover (Groq, Together, SambaNova, etc.) Files: auth.py, models.py, main.py, setup.py, config.py, model_metadata.py, .env.example, 5 docs pages, 17 new tests. Co-authored-by: Daniel van Strien <davanstrien@gmail.com>
2026-03-27 12:41:59 -07:00
parent f57ebf52e9
commit fd8c465e42
13 changed files with 240 additions and 8 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -113,6 +113,25 @@ DEFAULT_CONTEXT_LENGTHS = {
    "glm": 202752,
    # Kimi
    "kimi": 262144,
+    # Hugging Face Inference Providers — model IDs use org/name format
+    "Qwen/Qwen3.5-397B-A17B": 131072,
+    "Qwen/Qwen3-235B-A22B-Thinking-2507": 131072,
+    "Qwen/Qwen3-Coder-480B-A35B-Instruct": 131072,
+    "Qwen/Qwen3-Coder-Next": 131072,
+    "Qwen/Qwen3-Next-80B-A3B-Instruct": 131072,
+    "Qwen/Qwen3-Next-80B-A3B-Thinking": 131072,
+    "deepseek-ai/DeepSeek-R1-0528": 65536,
+    "deepseek-ai/DeepSeek-V3.2": 65536,
+    "moonshotai/Kimi-K2-Instruct": 262144,
+    "moonshotai/Kimi-K2-Instruct-0905": 262144,
+    "moonshotai/Kimi-K2.5": 262144,
+    "moonshotai/Kimi-K2-Thinking": 262144,
+    "MiniMaxAI/MiniMax-M2.5": 204800,
+    "MiniMaxAI/MiniMax-M2.1": 204800,
+    "XiaomiMiMo/MiMo-V2-Flash": 32768,
+    "zai-org/GLM-5": 202752,
+    "zai-org/GLM-4.7": 202752,
+    "zai-org/GLM-4.7-Flash": 202752,
 }

 _CONTEXT_LENGTH_KEYS = (