fix(auth): stop silently falling back to OpenRouter when no provider is configured (#3862)
Previously, when no API keys or provider credentials were found, Hermes silently defaulted to OpenRouter + Claude Opus. This caused confusion when users configured local servers (LM Studio, Ollama, etc.) with a typo or unrecognized provider name — the system would silently route to OpenRouter instead of telling them something was wrong. Changes: - resolve_provider() now raises AuthError when no credentials are found instead of returning 'openrouter' as a silent fallback - Added local server aliases: lmstudio, ollama, vllm, llamacpp → custom - Removed hardcoded 'anthropic/claude-opus-4.6' fallback from gateway and cron scheduler (they read from config.yaml instead) - Updated cli-config.yaml.example with complete provider documentation including all supported providers, aliases, and local server setup
This commit is contained in:
@@ -11,14 +11,29 @@ model:
|
|||||||
default: "anthropic/claude-opus-4.6"
|
default: "anthropic/claude-opus-4.6"
|
||||||
|
|
||||||
# Inference provider selection:
|
# Inference provider selection:
|
||||||
# "auto" - Use Nous Portal if logged in, otherwise OpenRouter/env vars (default)
|
# "auto" - Auto-detect from credentials (default)
|
||||||
# "nous-api" - Use Nous Portal via API key (requires: NOUS_API_KEY)
|
# "openrouter" - OpenRouter (requires: OPENROUTER_API_KEY or OPENAI_API_KEY)
|
||||||
# "openrouter" - Always use OpenRouter API key from OPENROUTER_API_KEY
|
# "nous" - Nous Portal OAuth (requires: hermes login)
|
||||||
# "nous" - Always use Nous Portal (requires: hermes login)
|
# "nous-api" - Nous Portal API key (requires: NOUS_API_KEY)
|
||||||
# "zai" - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
|
# "anthropic" - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
|
||||||
# "kimi-coding"- Use Kimi / Moonshot AI models (requires: KIMI_API_KEY)
|
# "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex)
|
||||||
# "minimax" - Use MiniMax global endpoint (requires: MINIMAX_API_KEY)
|
# "copilot" - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
|
||||||
# "minimax-cn" - Use MiniMax China endpoint (requires: MINIMAX_CN_API_KEY)
|
# "zai" - z.ai / ZhipuAI GLM (requires: GLM_API_KEY)
|
||||||
|
# "kimi-coding" - Kimi / Moonshot AI (requires: KIMI_API_KEY)
|
||||||
|
# "minimax" - MiniMax global (requires: MINIMAX_API_KEY)
|
||||||
|
# "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY)
|
||||||
|
# "huggingface" - Hugging Face Inference (requires: HF_TOKEN)
|
||||||
|
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
|
||||||
|
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
|
||||||
|
#
|
||||||
|
# Local servers (LM Studio, Ollama, vLLM, llama.cpp):
|
||||||
|
# "custom" - Any OpenAI-compatible endpoint. Set base_url below.
|
||||||
|
# Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
|
||||||
|
# Example for LM Studio:
|
||||||
|
# provider: "lmstudio"
|
||||||
|
# base_url: "http://localhost:1234/v1"
|
||||||
|
# No API key needed — local servers typically ignore auth.
|
||||||
|
#
|
||||||
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
|
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
|
||||||
provider: "auto"
|
provider: "auto"
|
||||||
|
|
||||||
|
|||||||
10
cli.py
10
cli.py
@@ -1087,10 +1087,10 @@ class HermesCLI:
|
|||||||
# env vars would stomp each other.
|
# env vars would stomp each other.
|
||||||
_model_config = CLI_CONFIG.get("model", {})
|
_model_config = CLI_CONFIG.get("model", {})
|
||||||
_config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "")
|
_config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "")
|
||||||
_FALLBACK_MODEL = "anthropic/claude-opus-4.6"
|
_DEFAULT_CONFIG_MODEL = "anthropic/claude-opus-4.6"
|
||||||
self.model = model or _config_model or _FALLBACK_MODEL
|
self.model = model or _config_model or _DEFAULT_CONFIG_MODEL
|
||||||
# Auto-detect model from local server if still on fallback
|
# Auto-detect model from local server if still on default
|
||||||
if self.model == _FALLBACK_MODEL:
|
if self.model == _DEFAULT_CONFIG_MODEL:
|
||||||
_base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else ""
|
_base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else ""
|
||||||
if "localhost" in _base_url or "127.0.0.1" in _base_url:
|
if "localhost" in _base_url or "127.0.0.1" in _base_url:
|
||||||
from hermes_cli.runtime_provider import _auto_detect_local_model
|
from hermes_cli.runtime_provider import _auto_detect_local_model
|
||||||
@@ -1104,7 +1104,7 @@ class HermesCLI:
|
|||||||
# explicit choice — the user just never changed it. But a config model
|
# explicit choice — the user just never changed it. But a config model
|
||||||
# like "gpt-5.3-codex" IS explicit and must be preserved.
|
# like "gpt-5.3-codex" IS explicit and must be preserved.
|
||||||
self._model_is_default = not model and (
|
self._model_is_default = not model and (
|
||||||
not _config_model or _config_model == _FALLBACK_MODEL
|
not _config_model or _config_model == _DEFAULT_CONFIG_MODEL
|
||||||
)
|
)
|
||||||
|
|
||||||
self._explicit_api_key = api_key
|
self._explicit_api_key = api_key
|
||||||
|
|||||||
@@ -321,7 +321,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
|||||||
if delivery_target.get("thread_id") is not None:
|
if delivery_target.get("thread_id") is not None:
|
||||||
os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
|
os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
|
||||||
|
|
||||||
model = job.get("model") or os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
|
model = job.get("model") or os.getenv("HERMES_MODEL") or ""
|
||||||
|
|
||||||
# Load config.yaml for model, reasoning, prefill, toolsets, provider routing
|
# Load config.yaml for model, reasoning, prefill, toolsets, provider routing
|
||||||
_cfg = {}
|
_cfg = {}
|
||||||
|
|||||||
@@ -323,10 +323,10 @@ def _resolve_gateway_model(config: dict | None = None) -> str:
|
|||||||
"""Read model from env/config — mirrors the resolution in _run_agent_sync.
|
"""Read model from env/config — mirrors the resolution in _run_agent_sync.
|
||||||
|
|
||||||
Without this, temporary AIAgent instances (memory flush, /compress) fall
|
Without this, temporary AIAgent instances (memory flush, /compress) fall
|
||||||
back to the hardcoded default ("anthropic/claude-opus-4.6") which fails
|
back to the hardcoded default which fails when the active provider is
|
||||||
when the active provider is openai-codex.
|
openai-codex.
|
||||||
"""
|
"""
|
||||||
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
|
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or ""
|
||||||
cfg = config if config is not None else _load_gateway_config()
|
cfg = config if config is not None else _load_gateway_config()
|
||||||
model_cfg = cfg.get("model", {})
|
model_cfg = cfg.get("model", {})
|
||||||
if isinstance(model_cfg, str):
|
if isinstance(model_cfg, str):
|
||||||
|
|||||||
@@ -696,6 +696,10 @@ def resolve_provider(
|
|||||||
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
|
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
|
||||||
"go": "opencode-go", "opencode-go-sub": "opencode-go",
|
"go": "opencode-go", "opencode-go-sub": "opencode-go",
|
||||||
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
||||||
|
# Local server aliases — route through the generic custom provider
|
||||||
|
"lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
|
||||||
|
"ollama": "custom", "vllm": "custom", "llamacpp": "custom",
|
||||||
|
"llama.cpp": "custom", "llama-cpp": "custom",
|
||||||
}
|
}
|
||||||
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
|
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
|
||||||
|
|
||||||
@@ -742,7 +746,12 @@ def resolve_provider(
|
|||||||
if has_usable_secret(os.getenv(env_var, "")):
|
if has_usable_secret(os.getenv(env_var, "")):
|
||||||
return pid
|
return pid
|
||||||
|
|
||||||
return "openrouter"
|
raise AuthError(
|
||||||
|
"No inference provider configured. Run 'hermes model' to choose a "
|
||||||
|
"provider and model, or set an API key (OPENROUTER_API_KEY, "
|
||||||
|
"OPENAI_API_KEY, etc.) in ~/.hermes/.env.",
|
||||||
|
code="no_provider_configured",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
@@ -266,7 +266,8 @@ class TestResolveProvider:
|
|||||||
|
|
||||||
def test_auto_does_not_select_copilot_from_github_token(self, monkeypatch):
|
def test_auto_does_not_select_copilot_from_github_token(self, monkeypatch):
|
||||||
monkeypatch.setenv("GITHUB_TOKEN", "gh-test-token")
|
monkeypatch.setenv("GITHUB_TOKEN", "gh-test-token")
|
||||||
assert resolve_provider("auto") == "openrouter"
|
with pytest.raises(AuthError, match="No inference provider configured"):
|
||||||
|
resolve_provider("auto")
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user