fix(auth): stop silently falling back to OpenRouter when no provider is configured (#3862)
Previously, when no API keys or provider credentials were found, Hermes silently defaulted to OpenRouter + Claude Opus. This caused confusion when users configured local servers (LM Studio, Ollama, etc.) with a typo or unrecognized provider name — the system would silently route to OpenRouter instead of telling them something was wrong. Changes: - resolve_provider() now raises AuthError when no credentials are found instead of returning 'openrouter' as a silent fallback - Added local server aliases: lmstudio, ollama, vllm, llamacpp → custom - Removed hardcoded 'anthropic/claude-opus-4.6' fallback from gateway and cron scheduler (they read from config.yaml instead) - Updated cli-config.yaml.example with complete provider documentation including all supported providers, aliases, and local server setup
This commit is contained in:
@@ -11,14 +11,29 @@ model:
|
||||
default: "anthropic/claude-opus-4.6"
|
||||
|
||||
# Inference provider selection:
|
||||
# "auto" - Use Nous Portal if logged in, otherwise OpenRouter/env vars (default)
|
||||
# "nous-api" - Use Nous Portal via API key (requires: NOUS_API_KEY)
|
||||
# "openrouter" - Always use OpenRouter API key from OPENROUTER_API_KEY
|
||||
# "nous" - Always use Nous Portal (requires: hermes login)
|
||||
# "zai" - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
|
||||
# "kimi-coding"- Use Kimi / Moonshot AI models (requires: KIMI_API_KEY)
|
||||
# "minimax" - Use MiniMax global endpoint (requires: MINIMAX_API_KEY)
|
||||
# "minimax-cn" - Use MiniMax China endpoint (requires: MINIMAX_CN_API_KEY)
|
||||
# "auto" - Auto-detect from credentials (default)
|
||||
# "openrouter" - OpenRouter (requires: OPENROUTER_API_KEY or OPENAI_API_KEY)
|
||||
# "nous" - Nous Portal OAuth (requires: hermes login)
|
||||
# "nous-api" - Nous Portal API key (requires: NOUS_API_KEY)
|
||||
# "anthropic" - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
|
||||
# "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex)
|
||||
# "copilot" - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
|
||||
# "zai" - z.ai / ZhipuAI GLM (requires: GLM_API_KEY)
|
||||
# "kimi-coding" - Kimi / Moonshot AI (requires: KIMI_API_KEY)
|
||||
# "minimax" - MiniMax global (requires: MINIMAX_API_KEY)
|
||||
# "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY)
|
||||
# "huggingface" - Hugging Face Inference (requires: HF_TOKEN)
|
||||
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
|
||||
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
|
||||
#
|
||||
# Local servers (LM Studio, Ollama, vLLM, llama.cpp):
|
||||
# "custom" - Any OpenAI-compatible endpoint. Set base_url below.
|
||||
# Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
|
||||
# Example for LM Studio:
|
||||
# provider: "lmstudio"
|
||||
# base_url: "http://localhost:1234/v1"
|
||||
# No API key needed — local servers typically ignore auth.
|
||||
#
|
||||
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
|
||||
provider: "auto"
|
||||
|
||||
|
||||
10
cli.py
10
cli.py
@@ -1087,10 +1087,10 @@ class HermesCLI:
|
||||
# env vars would stomp each other.
|
||||
_model_config = CLI_CONFIG.get("model", {})
|
||||
_config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "")
|
||||
_FALLBACK_MODEL = "anthropic/claude-opus-4.6"
|
||||
self.model = model or _config_model or _FALLBACK_MODEL
|
||||
# Auto-detect model from local server if still on fallback
|
||||
if self.model == _FALLBACK_MODEL:
|
||||
_DEFAULT_CONFIG_MODEL = "anthropic/claude-opus-4.6"
|
||||
self.model = model or _config_model or _DEFAULT_CONFIG_MODEL
|
||||
# Auto-detect model from local server if still on default
|
||||
if self.model == _DEFAULT_CONFIG_MODEL:
|
||||
_base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else ""
|
||||
if "localhost" in _base_url or "127.0.0.1" in _base_url:
|
||||
from hermes_cli.runtime_provider import _auto_detect_local_model
|
||||
@@ -1104,7 +1104,7 @@ class HermesCLI:
|
||||
# explicit choice — the user just never changed it. But a config model
|
||||
# like "gpt-5.3-codex" IS explicit and must be preserved.
|
||||
self._model_is_default = not model and (
|
||||
not _config_model or _config_model == _FALLBACK_MODEL
|
||||
not _config_model or _config_model == _DEFAULT_CONFIG_MODEL
|
||||
)
|
||||
|
||||
self._explicit_api_key = api_key
|
||||
|
||||
@@ -321,7 +321,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
if delivery_target.get("thread_id") is not None:
|
||||
os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
|
||||
|
||||
model = job.get("model") or os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
|
||||
model = job.get("model") or os.getenv("HERMES_MODEL") or ""
|
||||
|
||||
# Load config.yaml for model, reasoning, prefill, toolsets, provider routing
|
||||
_cfg = {}
|
||||
|
||||
@@ -323,10 +323,10 @@ def _resolve_gateway_model(config: dict | None = None) -> str:
|
||||
"""Read model from env/config — mirrors the resolution in _run_agent_sync.
|
||||
|
||||
Without this, temporary AIAgent instances (memory flush, /compress) fall
|
||||
back to the hardcoded default ("anthropic/claude-opus-4.6") which fails
|
||||
when the active provider is openai-codex.
|
||||
back to the hardcoded default which fails when the active provider is
|
||||
openai-codex.
|
||||
"""
|
||||
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
|
||||
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or ""
|
||||
cfg = config if config is not None else _load_gateway_config()
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, str):
|
||||
|
||||
@@ -696,6 +696,10 @@ def resolve_provider(
|
||||
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
|
||||
"go": "opencode-go", "opencode-go-sub": "opencode-go",
|
||||
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
||||
# Local server aliases — route through the generic custom provider
|
||||
"lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
|
||||
"ollama": "custom", "vllm": "custom", "llamacpp": "custom",
|
||||
"llama.cpp": "custom", "llama-cpp": "custom",
|
||||
}
|
||||
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
|
||||
|
||||
@@ -742,7 +746,12 @@ def resolve_provider(
|
||||
if has_usable_secret(os.getenv(env_var, "")):
|
||||
return pid
|
||||
|
||||
return "openrouter"
|
||||
raise AuthError(
|
||||
"No inference provider configured. Run 'hermes model' to choose a "
|
||||
"provider and model, or set an API key (OPENROUTER_API_KEY, "
|
||||
"OPENAI_API_KEY, etc.) in ~/.hermes/.env.",
|
||||
code="no_provider_configured",
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
@@ -266,7 +266,8 @@ class TestResolveProvider:
|
||||
|
||||
def test_auto_does_not_select_copilot_from_github_token(self, monkeypatch):
|
||||
monkeypatch.setenv("GITHUB_TOKEN", "gh-test-token")
|
||||
assert resolve_provider("auto") == "openrouter"
|
||||
with pytest.raises(AuthError, match="No inference provider configured"):
|
||||
resolve_provider("auto")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
Reference in New Issue
Block a user