fix(auth): stop silently falling back to OpenRouter when no provider is configured (#3862)

Previously, when no API keys or provider credentials were found, Hermes silently defaulted to OpenRouter + Claude Opus. This caused confusion when users configured local servers (LM Studio, Ollama, etc.) with a typo or unrecognized provider name — the system would silently route to OpenRouter instead of telling them something was wrong. Changes: - resolve_provider() now raises AuthError when no credentials are found instead of returning 'openrouter' as a silent fallback - Added local server aliases: lmstudio, ollama, vllm, llamacpp → custom - Removed hardcoded 'anthropic/claude-opus-4.6' fallback from gateway and cron scheduler (they read from config.yaml instead) - Updated cli-config.yaml.example with complete provider documentation including all supported providers, aliases, and local server setup
2026-03-29 21:06:35 -07:00
parent 3cc50532d1
commit 86ac23c8da
6 changed files with 44 additions and 19 deletions
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -11,14 +11,29 @@ model:
  default: "anthropic/claude-opus-4.6"
  
  # Inference provider selection:
-  #   "auto"       - Use Nous Portal if logged in, otherwise OpenRouter/env vars (default)
-  #   "nous-api"   - Use Nous Portal via API key (requires: NOUS_API_KEY)
-  #   "openrouter" - Always use OpenRouter API key from OPENROUTER_API_KEY
-  #   "nous"       - Always use Nous Portal (requires: hermes login)
-  #   "zai"        - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
-  #   "kimi-coding"- Use Kimi / Moonshot AI models (requires: KIMI_API_KEY)
-  #   "minimax"    - Use MiniMax global endpoint (requires: MINIMAX_API_KEY)
-  #   "minimax-cn" - Use MiniMax China endpoint (requires: MINIMAX_CN_API_KEY)
+  #   "auto"         - Auto-detect from credentials (default)
+  #   "openrouter"   - OpenRouter (requires: OPENROUTER_API_KEY or OPENAI_API_KEY)
+  #   "nous"         - Nous Portal OAuth (requires: hermes login)
+  #   "nous-api"     - Nous Portal API key (requires: NOUS_API_KEY)
+  #   "anthropic"    - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
+  #   "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex)
+  #   "copilot"      - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
+  #   "zai"          - z.ai / ZhipuAI GLM (requires: GLM_API_KEY)
+  #   "kimi-coding"  - Kimi / Moonshot AI (requires: KIMI_API_KEY)
+  #   "minimax"      - MiniMax global (requires: MINIMAX_API_KEY)
+  #   "minimax-cn"   - MiniMax China (requires: MINIMAX_CN_API_KEY)
+  #   "huggingface"  - Hugging Face Inference (requires: HF_TOKEN)
+  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
+  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
+  #
+  # Local servers (LM Studio, Ollama, vLLM, llama.cpp):
+  #   "custom"       - Any OpenAI-compatible endpoint. Set base_url below.
+  #   Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
+  #   Example for LM Studio:
+  #     provider: "lmstudio"
+  #     base_url: "http://localhost:1234/v1"
+  #   No API key needed — local servers typically ignore auth.
+  #
  # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
  provider: "auto"
  
--- a/cli.py
+++ b/cli.py
@@ -1087,10 +1087,10 @@ class HermesCLI:
        # env vars would stomp each other.
        _model_config = CLI_CONFIG.get("model", {})
        _config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "")
-        _FALLBACK_MODEL = "anthropic/claude-opus-4.6"
-        self.model = model or _config_model or _FALLBACK_MODEL
-        # Auto-detect model from local server if still on fallback
-        if self.model == _FALLBACK_MODEL:
+        _DEFAULT_CONFIG_MODEL = "anthropic/claude-opus-4.6"
+        self.model = model or _config_model or _DEFAULT_CONFIG_MODEL
+        # Auto-detect model from local server if still on default
+        if self.model == _DEFAULT_CONFIG_MODEL:
            _base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else ""
            if "localhost" in _base_url or "127.0.0.1" in _base_url:
                from hermes_cli.runtime_provider import _auto_detect_local_model
@@ -1104,7 +1104,7 @@ class HermesCLI:
        # explicit choice — the user just never changed it.  But a config model
        # like "gpt-5.3-codex" IS explicit and must be preserved.
        self._model_is_default = not model and (
-            not _config_model or _config_model == _FALLBACK_MODEL
+            not _config_model or _config_model == _DEFAULT_CONFIG_MODEL
        )

        self._explicit_api_key = api_key
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -321,7 +321,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            if delivery_target.get("thread_id") is not None:
                os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])

-        model = job.get("model") or os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
+        model = job.get("model") or os.getenv("HERMES_MODEL") or ""

        # Load config.yaml for model, reasoning, prefill, toolsets, provider routing
        _cfg = {}
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -323,10 +323,10 @@ def _resolve_gateway_model(config: dict | None = None) -> str:
    """Read model from env/config — mirrors the resolution in _run_agent_sync.

    Without this, temporary AIAgent instances (memory flush, /compress) fall
-    back to the hardcoded default ("anthropic/claude-opus-4.6") which fails
-    when the active provider is openai-codex.
+    back to the hardcoded default which fails when the active provider is
+    openai-codex.
    """
-    model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
+    model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or ""
    cfg = config if config is not None else _load_gateway_config()
    model_cfg = cfg.get("model", {})
    if isinstance(model_cfg, str):
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -696,6 +696,10 @@ def resolve_provider(
        "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
        "go": "opencode-go", "opencode-go-sub": "opencode-go",
        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
+        # Local server aliases — route through the generic custom provider
+        "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
+        "ollama": "custom", "vllm": "custom", "llamacpp": "custom",
+        "llama.cpp": "custom", "llama-cpp": "custom",
    }
    normalized = _PROVIDER_ALIASES.get(normalized, normalized)

@@ -742,7 +746,12 @@ def resolve_provider(
            if has_usable_secret(os.getenv(env_var, "")):
                return pid

-    return "openrouter"
+    raise AuthError(
+        "No inference provider configured. Run 'hermes model' to choose a "
+        "provider and model, or set an API key (OPENROUTER_API_KEY, "
+        "OPENAI_API_KEY, etc.) in ~/.hermes/.env.",
+        code="no_provider_configured",
+    )


 # =============================================================================
--- a/tests/test_api_key_providers.py
+++ b/tests/test_api_key_providers.py
@@ -266,7 +266,8 @@ class TestResolveProvider:

    def test_auto_does_not_select_copilot_from_github_token(self, monkeypatch):
        monkeypatch.setenv("GITHUB_TOKEN", "gh-test-token")
-        assert resolve_provider("auto") == "openrouter"
+        with pytest.raises(AuthError, match="No inference provider configured"):
+            resolve_provider("auto")


 # =============================================================================