From 3576f44a577fcbc03a65e5fc3193b0d51dae45ea Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 17 Mar 2026 00:12:16 -0700 Subject: [PATCH] feat: add Vercel AI Gateway provider (#1628) * feat: add Vercel AI Gateway as a first-class provider Adds AI Gateway (ai-gateway.vercel.sh) as a new inference provider with AI_GATEWAY_API_KEY authentication, live model discovery, and reasoning support via extra_body.reasoning. Based on PR #1492 by jerilynzheng. * feat: add AI Gateway to setup wizard, doctor, and fallback providers * test: add AI Gateway to api_key_providers test suite * feat: add AI Gateway to hermes model CLI and model metadata Wire AI Gateway into the interactive model selection menu and add context lengths for AI Gateway model IDs in model_metadata.py. * feat: use claude-haiku-4.5 as AI Gateway auxiliary model * revert: use gemini-3-flash as AI Gateway auxiliary model * fix: move AI Gateway below established providers in selection order --------- Co-authored-by: jerilynzheng Co-authored-by: jerilynzheng --- agent/auxiliary_client.py | 1 + agent/model_metadata.py | 9 ++++ hermes_cli/auth.py | 9 ++++ hermes_cli/doctor.py | 1 + hermes_cli/main.py | 4 +- hermes_cli/models.py | 53 ++++++++++++++++++- hermes_cli/setup.py | 39 +++++++++++++- hermes_constants.py | 4 ++ run_agent.py | 2 + tests/test_api_key_providers.py | 39 +++++++++++++- tests/test_provider_parity.py | 34 ++++++++++++ tests/test_runtime_provider_resolution.py | 14 +++++ .../docs/developer-guide/provider-runtime.md | 19 +++++-- .../docs/reference/environment-variables.md | 2 + website/docs/user-guide/configuration.md | 1 + .../user-guide/features/fallback-providers.md | 1 + 16 files changed, 223 insertions(+), 9 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index ff542a113..cf740bc89 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -57,6 +57,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { "minimax": "MiniMax-M2.5-highspeed", "minimax-cn": "MiniMax-M2.5-highspeed", "anthropic": "claude-haiku-4-5-20251001", + "ai-gateway": "google/gemini-3-flash", } # OpenRouter app attribution headers diff --git a/agent/model_metadata.py b/agent/model_metadata.py index a609ea030..755bc81b0 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -40,6 +40,8 @@ DEFAULT_CONTEXT_LENGTHS = { "anthropic/claude-opus-4.6": 200000, "anthropic/claude-sonnet-4": 200000, "anthropic/claude-sonnet-4-20250514": 200000, + "anthropic/claude-sonnet-4.5": 200000, + "anthropic/claude-sonnet-4.6": 200000, "anthropic/claude-haiku-4.5": 200000, # Bare Anthropic model IDs (for native API provider) "claude-opus-4-6": 200000, @@ -50,11 +52,18 @@ DEFAULT_CONTEXT_LENGTHS = { "claude-opus-4-20250514": 200000, "claude-sonnet-4-20250514": 200000, "claude-haiku-4-5-20251001": 200000, + "openai/gpt-5": 128000, + "openai/gpt-4.1": 1047576, + "openai/gpt-4.1-mini": 1047576, "openai/gpt-4o": 128000, "openai/gpt-4-turbo": 128000, "openai/gpt-4o-mini": 128000, + "google/gemini-3-pro-preview": 1048576, + "google/gemini-3-flash": 1048576, + "google/gemini-2.5-flash": 1048576, "google/gemini-2.0-flash": 1048576, "google/gemini-2.5-pro": 1048576, + "deepseek/deepseek-v3.2": 65536, "meta-llama/llama-3.3-70b-instruct": 131072, "deepseek/deepseek-chat-v3": 65536, "qwen/qwen-2.5-72b-instruct": 32768, diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 1863f0bb8..c5d20082b 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -155,6 +155,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { api_key_env_vars=("DEEPSEEK_API_KEY",), base_url_env_var="DEEPSEEK_BASE_URL", ), + "ai-gateway": ProviderConfig( + id="ai-gateway", + name="AI Gateway", + auth_type="api_key", + inference_base_url="https://ai-gateway.vercel.sh/v1", + api_key_env_vars=("AI_GATEWAY_API_KEY",), + base_url_env_var="AI_GATEWAY_BASE_URL", + ), } @@ -532,6 +540,7 @@ def resolve_provider( "kimi": "kimi-coding", "moonshot": "kimi-coding", "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn", "claude": "anthropic", "claude-code": "anthropic", + "aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway", } normalized = _PROVIDER_ALIASES.get(normalized, normalized) diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 9cd0a8a9e..33900b7cc 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -570,6 +570,7 @@ def run_doctor(args): # MiniMax APIs don't support /models endpoint — https://github.com/NousResearch/hermes-agent/issues/811 ("MiniMax", ("MINIMAX_API_KEY",), None, "MINIMAX_BASE_URL", False), ("MiniMax (China)", ("MINIMAX_CN_API_KEY",), None, "MINIMAX_CN_BASE_URL", False), + ("AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True), ] for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers: _key = "" diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 15f546cb1..876bc38c8 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -768,6 +768,7 @@ def cmd_model(args): "kimi-coding": "Kimi / Moonshot", "minimax": "MiniMax", "minimax-cn": "MiniMax (China)", + "ai-gateway": "AI Gateway", "custom": "Custom endpoint", } active_label = provider_labels.get(active, active) @@ -787,6 +788,7 @@ def cmd_model(args): ("kimi-coding", "Kimi / Moonshot (Moonshot AI direct API)"), ("minimax", "MiniMax (global direct API)"), ("minimax-cn", "MiniMax China (domestic direct API)"), + ("ai-gateway", "AI Gateway (Vercel — 200+ models, pay-per-use)"), ] # Add user-defined custom providers from config.yaml @@ -855,7 +857,7 @@ def cmd_model(args): _model_flow_anthropic(config, current_model) elif selected_provider == "kimi-coding": _model_flow_kimi(config, current_model) - elif selected_provider in ("zai", "minimax", "minimax-cn"): + elif selected_provider in ("zai", "minimax", "minimax-cn", "ai-gateway"): _model_flow_api_key_provider(config, selected_provider, current_model) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 13373afa9..528273f95 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -8,6 +8,7 @@ Add, remove, or reorder entries here — both `hermes setup` and from __future__ import annotations import json +import os import urllib.request import urllib.error from difflib import get_close_matches @@ -82,6 +83,20 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "deepseek-chat", "deepseek-reasoner", ], + "ai-gateway": [ + "anthropic/claude-opus-4.6", + "anthropic/claude-sonnet-4.6", + "anthropic/claude-sonnet-4.5", + "anthropic/claude-haiku-4.5", + "openai/gpt-5", + "openai/gpt-4.1", + "openai/gpt-4.1-mini", + "google/gemini-3-pro-preview", + "google/gemini-3-flash", + "google/gemini-2.5-pro", + "google/gemini-2.5-flash", + "deepseek/deepseek-v3.2", + ], } _PROVIDER_LABELS = { @@ -94,6 +109,7 @@ _PROVIDER_LABELS = { "minimax-cn": "MiniMax (China)", "anthropic": "Anthropic", "deepseek": "DeepSeek", + "ai-gateway": "AI Gateway", "custom": "Custom endpoint", } @@ -109,6 +125,9 @@ _PROVIDER_ALIASES = { "claude": "anthropic", "claude-code": "anthropic", "deep-seek": "deepseek", + "aigateway": "ai-gateway", + "vercel": "ai-gateway", + "vercel-ai-gateway": "ai-gateway", } @@ -142,7 +161,8 @@ def list_available_providers() -> list[dict[str, str]]: # Canonical providers in display order _PROVIDER_ORDER = [ "openrouter", "nous", "openai-codex", - "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek", + "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", + "ai-gateway", "deepseek", ] # Build reverse alias map aliases_for: dict[str, list[str]] = {} @@ -372,6 +392,10 @@ def provider_model_ids(provider: Optional[str]) -> list[str]: live = _fetch_anthropic_models() if live: return live + if normalized == "ai-gateway": + live = _fetch_ai_gateway_models() + if live: + return live return list(_PROVIDER_MODELS.get(normalized, [])) @@ -475,6 +499,33 @@ def probe_api_models( } +def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]: + """Fetch available language models with tool-use from AI Gateway.""" + api_key = os.getenv("AI_GATEWAY_API_KEY", "").strip() + if not api_key: + return None + base_url = os.getenv("AI_GATEWAY_BASE_URL", "").strip() + if not base_url: + from hermes_constants import AI_GATEWAY_BASE_URL + base_url = AI_GATEWAY_BASE_URL + + url = base_url.rstrip("/") + "/models" + headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"} + req = urllib.request.Request(url, headers=headers) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read().decode()) + return [ + m["id"] + for m in data.get("data", []) + if m.get("id") + and m.get("type") == "language" + and "tool-use" in (m.get("tags") or []) + ] + except Exception: + return None + + def fetch_api_models( api_key: Optional[str], base_url: Optional[str], diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index e751811a1..c567dc700 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -59,6 +59,7 @@ _DEFAULT_PROVIDER_MODELS = { "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], "minimax": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"], "minimax-cn": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"], + "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"], } @@ -724,6 +725,7 @@ def setup_model_provider(config: dict): "MiniMax (global endpoint)", "MiniMax China (mainland China endpoint)", "Anthropic (Claude models — API key or Claude Code subscription)", + "AI Gateway (Vercel — 200+ models, pay-per-use)", ] if keep_label: provider_choices.append(keep_label) @@ -1232,7 +1234,39 @@ def setup_model_provider(config: dict): _set_model_provider(config, "anthropic") selected_base_url = "" - # else: provider_idx == 9 (Keep current) — only shown when a provider already exists + elif provider_idx == 9: # AI Gateway + selected_provider = "ai-gateway" + print() + print_header("AI Gateway API Key") + pconfig = PROVIDER_REGISTRY["ai-gateway"] + print_info(f"Provider: {pconfig.name}") + print_info("Get your API key at: https://vercel.com/docs/ai-gateway") + print() + + existing_key = get_env_value("AI_GATEWAY_API_KEY") + if existing_key: + print_info(f"Current: {existing_key[:8]}... (configured)") + if prompt_yes_no("Update API key?", False): + api_key = prompt(" AI Gateway API key", password=True) + if api_key: + save_env_value("AI_GATEWAY_API_KEY", api_key) + print_success("AI Gateway API key updated") + else: + api_key = prompt(" AI Gateway API key", password=True) + if api_key: + save_env_value("AI_GATEWAY_API_KEY", api_key) + print_success("AI Gateway API key saved") + else: + print_warning("Skipped - agent won't work without an API key") + + # Clear custom endpoint vars if switching + if existing_custom: + save_env_value("OPENAI_BASE_URL", "") + save_env_value("OPENAI_API_KEY", "") + _update_config_for_provider("ai-gateway", pconfig.inference_base_url, default_model="anthropic/claude-opus-4.6") + _set_model_provider(config, "ai-gateway", pconfig.inference_base_url) + + # else: provider_idx == 10 (Keep current) — only shown when a provider already exists # Normalize "keep current" to an explicit provider so downstream logic # doesn't fall back to the generic OpenRouter/static-model path. if selected_provider is None: @@ -1269,6 +1303,7 @@ def setup_model_provider(config: dict): "minimax": "MiniMax", "minimax-cn": "MiniMax CN", "anthropic": "Anthropic", + "ai-gateway": "AI Gateway", "custom": "your custom endpoint", } _prov_display = _prov_names.get(selected_provider, selected_provider or "your provider") @@ -1402,7 +1437,7 @@ def setup_model_provider(config: dict): _set_default_model(config, custom) _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) _set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL) - elif selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn"): + elif selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn", "ai-gateway"): _setup_provider_model_selection( config, selected_provider, current_model, prompt_choice, prompt, diff --git a/hermes_constants.py b/hermes_constants.py index a81af04d3..6a11fb37a 100644 --- a/hermes_constants.py +++ b/hermes_constants.py @@ -8,5 +8,9 @@ OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" OPENROUTER_MODELS_URL = f"{OPENROUTER_BASE_URL}/models" OPENROUTER_CHAT_URL = f"{OPENROUTER_BASE_URL}/chat/completions" +AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh/v1" +AI_GATEWAY_MODELS_URL = f"{AI_GATEWAY_BASE_URL}/models" +AI_GATEWAY_CHAT_URL = f"{AI_GATEWAY_BASE_URL}/chat/completions" + NOUS_API_BASE_URL = "https://inference-api.nousresearch.com/v1" NOUS_API_CHAT_URL = f"{NOUS_API_BASE_URL}/chat/completions" diff --git a/run_agent.py b/run_agent.py index 6ae8170db..afee105e8 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3523,6 +3523,8 @@ class AIAgent: base_url = (self.base_url or "").lower() if "nousresearch" in base_url: return True + if "ai-gateway.vercel.sh" in base_url: + return True if "openrouter" not in base_url: return False if "api.mistral.ai" in base_url: diff --git a/tests/test_api_key_providers.py b/tests/test_api_key_providers.py index 01378569d..3ff377fbe 100644 --- a/tests/test_api_key_providers.py +++ b/tests/test_api_key_providers.py @@ -1,4 +1,4 @@ -"""Tests for API-key provider support (z.ai/GLM, Kimi, MiniMax).""" +"""Tests for API-key provider support (z.ai/GLM, Kimi, MiniMax, AI Gateway).""" import os import sys @@ -37,6 +37,7 @@ class TestProviderRegistry: ("kimi-coding", "Kimi / Moonshot", "api_key"), ("minimax", "MiniMax", "api_key"), ("minimax-cn", "MiniMax (China)", "api_key"), + ("ai-gateway", "AI Gateway", "api_key"), ]) def test_provider_registered(self, provider_id, name, auth_type): assert provider_id in PROVIDER_REGISTRY @@ -65,11 +66,17 @@ class TestProviderRegistry: assert pconfig.api_key_env_vars == ("MINIMAX_CN_API_KEY",) assert pconfig.base_url_env_var == "MINIMAX_CN_BASE_URL" + def test_ai_gateway_env_vars(self): + pconfig = PROVIDER_REGISTRY["ai-gateway"] + assert pconfig.api_key_env_vars == ("AI_GATEWAY_API_KEY",) + assert pconfig.base_url_env_var == "AI_GATEWAY_BASE_URL" + def test_base_urls(self): assert PROVIDER_REGISTRY["zai"].inference_base_url == "https://api.z.ai/api/paas/v4" assert PROVIDER_REGISTRY["kimi-coding"].inference_base_url == "https://api.moonshot.ai/v1" assert PROVIDER_REGISTRY["minimax"].inference_base_url == "https://api.minimax.io/v1" assert PROVIDER_REGISTRY["minimax-cn"].inference_base_url == "https://api.minimaxi.com/v1" + assert PROVIDER_REGISTRY["ai-gateway"].inference_base_url == "https://ai-gateway.vercel.sh/v1" def test_oauth_providers_unchanged(self): """Ensure we didn't break the existing OAuth providers.""" @@ -87,6 +94,7 @@ PROVIDER_ENV_VARS = ( "OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY", "KIMI_API_KEY", "KIMI_BASE_URL", "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY", + "AI_GATEWAY_API_KEY", "AI_GATEWAY_BASE_URL", "OPENAI_BASE_URL", ) @@ -112,6 +120,9 @@ class TestResolveProvider: def test_explicit_minimax_cn(self): assert resolve_provider("minimax-cn") == "minimax-cn" + def test_explicit_ai_gateway(self): + assert resolve_provider("ai-gateway") == "ai-gateway" + def test_alias_glm(self): assert resolve_provider("glm") == "zai" @@ -130,6 +141,12 @@ class TestResolveProvider: def test_alias_minimax_underscore(self): assert resolve_provider("minimax_cn") == "minimax-cn" + def test_alias_aigateway(self): + assert resolve_provider("aigateway") == "ai-gateway" + + def test_alias_vercel(self): + assert resolve_provider("vercel") == "ai-gateway" + def test_alias_case_insensitive(self): assert resolve_provider("GLM") == "zai" assert resolve_provider("Z-AI") == "zai" @@ -163,6 +180,10 @@ class TestResolveProvider: monkeypatch.setenv("MINIMAX_CN_API_KEY", "test-mm-cn-key") assert resolve_provider("auto") == "minimax-cn" + def test_auto_detects_ai_gateway_key(self, monkeypatch): + monkeypatch.setenv("AI_GATEWAY_API_KEY", "test-gw-key") + assert resolve_provider("auto") == "ai-gateway" + def test_openrouter_takes_priority_over_glm(self, monkeypatch): """OpenRouter API key should win over GLM in auto-detection.""" monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") @@ -248,6 +269,13 @@ class TestResolveApiKeyProviderCredentials: assert creds["api_key"] == "mmcn-secret-key" assert creds["base_url"] == "https://api.minimaxi.com/v1" + def test_resolve_ai_gateway_with_key(self, monkeypatch): + monkeypatch.setenv("AI_GATEWAY_API_KEY", "gw-secret-key") + creds = resolve_api_key_provider_credentials("ai-gateway") + assert creds["provider"] == "ai-gateway" + assert creds["api_key"] == "gw-secret-key" + assert creds["base_url"] == "https://ai-gateway.vercel.sh/v1" + def test_resolve_with_custom_base_url(self, monkeypatch): monkeypatch.setenv("GLM_API_KEY", "glm-key") monkeypatch.setenv("GLM_BASE_URL", "https://custom.glm.example/v4") @@ -309,6 +337,15 @@ class TestRuntimeProviderResolution: assert result["provider"] == "minimax" assert result["api_key"] == "mm-key" + def test_runtime_ai_gateway(self, monkeypatch): + monkeypatch.setenv("AI_GATEWAY_API_KEY", "gw-key") + from hermes_cli.runtime_provider import resolve_runtime_provider + result = resolve_runtime_provider(requested="ai-gateway") + assert result["provider"] == "ai-gateway" + assert result["api_mode"] == "chat_completions" + assert result["api_key"] == "gw-key" + assert "ai-gateway.vercel.sh" in result["base_url"] + def test_runtime_auto_detects_api_key_provider(self, monkeypatch): monkeypatch.setenv("KIMI_API_KEY", "auto-kimi-key") from hermes_cli.runtime_provider import resolve_runtime_provider diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py index dc976b8f1..e6d885604 100644 --- a/tests/test_provider_parity.py +++ b/tests/test_provider_parity.py @@ -137,6 +137,40 @@ class TestBuildApiKwargsOpenRouter: assert "codex_reasoning_items" in messages[1] +class TestBuildApiKwargsAIGateway: + def test_uses_chat_completions_format(self, monkeypatch): + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "messages" in kwargs + assert "model" in kwargs + assert kwargs["messages"][-1]["content"] == "hi" + + def test_no_responses_api_fields(self, monkeypatch): + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "input" not in kwargs + assert "instructions" not in kwargs + assert "store" not in kwargs + + def test_includes_reasoning_in_extra_body(self, monkeypatch): + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + extra = kwargs.get("extra_body", {}) + assert "reasoning" in extra + assert extra["reasoning"]["enabled"] is True + + def test_includes_tools(self, monkeypatch): + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "tools" in kwargs + tool_names = [t["function"]["name"] for t in kwargs["tools"]] + assert "web_search" in tool_names + + class TestBuildApiKwargsNousPortal: def test_includes_nous_product_tags(self, monkeypatch): agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1") diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py index 52d4a1d4f..c02fb3cdc 100644 --- a/tests/test_runtime_provider_resolution.py +++ b/tests/test_runtime_provider_resolution.py @@ -26,6 +26,20 @@ def test_resolve_runtime_provider_codex(monkeypatch): assert resolved["requested_provider"] == "openai-codex" +def test_resolve_runtime_provider_ai_gateway(monkeypatch): + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "ai-gateway") + monkeypatch.setattr(rp, "_get_model_config", lambda: {}) + monkeypatch.setenv("AI_GATEWAY_API_KEY", "test-ai-gw-key") + + resolved = rp.resolve_runtime_provider(requested="ai-gateway") + + assert resolved["provider"] == "ai-gateway" + assert resolved["api_mode"] == "chat_completions" + assert resolved["base_url"] == "https://ai-gateway.vercel.sh/v1" + assert resolved["api_key"] == "test-ai-gw-key" + assert resolved["requested_provider"] == "ai-gateway" + + def test_resolve_runtime_provider_openrouter_explicit(monkeypatch): monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") monkeypatch.setattr(rp, "_get_model_config", lambda: {}) diff --git a/website/docs/developer-guide/provider-runtime.md b/website/docs/developer-guide/provider-runtime.md index bf3c95090..faa84d5f6 100644 --- a/website/docs/developer-guide/provider-runtime.md +++ b/website/docs/developer-guide/provider-runtime.md @@ -37,6 +37,7 @@ That ordering matters because Hermes treats the saved model/provider choice as t Current provider families include: +- AI Gateway (Vercel) - OpenRouter - Nous Portal - OpenAI Codex @@ -68,11 +69,21 @@ This resolver is the main reason Hermes can share auth/runtime logic between: - ACP editor sessions - auxiliary model tasks -## OpenRouter vs custom OpenAI-compatible base URLs +## AI Gateway -Hermes contains logic to avoid leaking the wrong API key to a custom endpoint when both `OPENROUTER_API_KEY` and `OPENAI_API_KEY` exist. +Set `AI_GATEWAY_API_KEY` in `~/.hermes/.env` and run with `--provider ai-gateway`. Hermes fetches available models from the gateway's `/models` endpoint, filtering to language models with tool-use support. -It also distinguishes between: +## OpenRouter, AI Gateway, and custom OpenAI-compatible base URLs + +Hermes contains logic to avoid leaking the wrong API key to a custom endpoint when multiple provider keys exist (e.g. `OPENROUTER_API_KEY`, `AI_GATEWAY_API_KEY`, and `OPENAI_API_KEY`). + +Each provider's API key is scoped to its own base URL: + +- `OPENROUTER_API_KEY` is only sent to `openrouter.ai` endpoints +- `AI_GATEWAY_API_KEY` is only sent to `ai-gateway.vercel.sh` endpoints +- `OPENAI_API_KEY` is used for custom endpoints and as a fallback + +Hermes also distinguishes between: - a real custom endpoint selected by the user - the OpenRouter fallback path used when no custom endpoint is configured @@ -80,7 +91,7 @@ It also distinguishes between: That distinction is especially important for: - local model servers -- non-OpenRouter OpenAI-compatible APIs +- non-OpenRouter/non-AI Gateway OpenAI-compatible APIs - switching providers without re-running setup - config-saved custom endpoints that should keep working even when `OPENAI_BASE_URL` is not exported in the current shell diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index daaad87bc..d10d66c1f 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -14,6 +14,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config |----------|-------------| | `OPENROUTER_API_KEY` | OpenRouter API key (recommended for flexibility) | | `OPENROUTER_BASE_URL` | Override the OpenRouter-compatible base URL | +| `AI_GATEWAY_API_KEY` | Vercel AI Gateway API key ([ai-gateway.vercel.sh](https://ai-gateway.vercel.sh)) | +| `AI_GATEWAY_BASE_URL` | Override AI Gateway base URL (default: `https://ai-gateway.vercel.sh/v1`) | | `OPENAI_API_KEY` | API key for custom OpenAI-compatible endpoints (used with `OPENAI_BASE_URL`) | | `OPENAI_BASE_URL` | Base URL for custom endpoint (VLLM, SGLang, etc.) | | `GLM_API_KEY` | z.ai / ZhipuAI GLM API key ([z.ai](https://z.ai)) | diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index f55a65181..abaabbad4 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -65,6 +65,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro | **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) | | **Anthropic** | `hermes model` (Claude Pro/Max via Claude Code auth, Anthropic API key, or manual setup-token) | | **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` | +| **AI Gateway** | `AI_GATEWAY_API_KEY` in `~/.hermes/.env` (provider: `ai-gateway`) | | **z.ai / GLM** | `GLM_API_KEY` in `~/.hermes/.env` (provider: `zai`) | | **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) | | **MiniMax** | `MINIMAX_API_KEY` in `~/.hermes/.env` (provider: `minimax`) | diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md index f94e380d4..5df658e8e 100644 --- a/website/docs/user-guide/features/fallback-providers.md +++ b/website/docs/user-guide/features/fallback-providers.md @@ -34,6 +34,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback | Provider | Value | Requirements | |----------|-------|-------------| +| AI Gateway | `ai-gateway` | `AI_GATEWAY_API_KEY` | | OpenRouter | `openrouter` | `OPENROUTER_API_KEY` | | Nous Portal | `nous` | `hermes login` (OAuth) | | OpenAI Codex | `openai-codex` | `hermes model` (ChatGPT OAuth) |