diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 1f7a515c..504b2178 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -11,14 +11,29 @@ model: default: "anthropic/claude-opus-4.6" # Inference provider selection: - # "auto" - Use Nous Portal if logged in, otherwise OpenRouter/env vars (default) - # "nous-api" - Use Nous Portal via API key (requires: NOUS_API_KEY) - # "openrouter" - Always use OpenRouter API key from OPENROUTER_API_KEY - # "nous" - Always use Nous Portal (requires: hermes login) - # "zai" - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY) - # "kimi-coding"- Use Kimi / Moonshot AI models (requires: KIMI_API_KEY) - # "minimax" - Use MiniMax global endpoint (requires: MINIMAX_API_KEY) - # "minimax-cn" - Use MiniMax China endpoint (requires: MINIMAX_CN_API_KEY) + # "auto" - Auto-detect from credentials (default) + # "openrouter" - OpenRouter (requires: OPENROUTER_API_KEY or OPENAI_API_KEY) + # "nous" - Nous Portal OAuth (requires: hermes login) + # "nous-api" - Nous Portal API key (requires: NOUS_API_KEY) + # "anthropic" - Direct Anthropic API (requires: ANTHROPIC_API_KEY) + # "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex) + # "copilot" - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN) + # "zai" - z.ai / ZhipuAI GLM (requires: GLM_API_KEY) + # "kimi-coding" - Kimi / Moonshot AI (requires: KIMI_API_KEY) + # "minimax" - MiniMax global (requires: MINIMAX_API_KEY) + # "minimax-cn" - MiniMax China (requires: MINIMAX_CN_API_KEY) + # "huggingface" - Hugging Face Inference (requires: HF_TOKEN) + # "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY) + # "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY) + # + # Local servers (LM Studio, Ollama, vLLM, llama.cpp): + # "custom" - Any OpenAI-compatible endpoint. Set base_url below. + # Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom". + # Example for LM Studio: + # provider: "lmstudio" + # base_url: "http://localhost:1234/v1" + # No API key needed — local servers typically ignore auth. + # # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var. provider: "auto" diff --git a/cli.py b/cli.py index 5144b5bb..a601878f 100644 --- a/cli.py +++ b/cli.py @@ -1087,10 +1087,10 @@ class HermesCLI: # env vars would stomp each other. _model_config = CLI_CONFIG.get("model", {}) _config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "") - _FALLBACK_MODEL = "anthropic/claude-opus-4.6" - self.model = model or _config_model or _FALLBACK_MODEL - # Auto-detect model from local server if still on fallback - if self.model == _FALLBACK_MODEL: + _DEFAULT_CONFIG_MODEL = "anthropic/claude-opus-4.6" + self.model = model or _config_model or _DEFAULT_CONFIG_MODEL + # Auto-detect model from local server if still on default + if self.model == _DEFAULT_CONFIG_MODEL: _base_url = (_model_config.get("base_url") or "") if isinstance(_model_config, dict) else "" if "localhost" in _base_url or "127.0.0.1" in _base_url: from hermes_cli.runtime_provider import _auto_detect_local_model @@ -1104,7 +1104,7 @@ class HermesCLI: # explicit choice — the user just never changed it. But a config model # like "gpt-5.3-codex" IS explicit and must be preserved. self._model_is_default = not model and ( - not _config_model or _config_model == _FALLBACK_MODEL + not _config_model or _config_model == _DEFAULT_CONFIG_MODEL ) self._explicit_api_key = api_key diff --git a/cron/scheduler.py b/cron/scheduler.py index 0058c1c0..52ae8e31 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -321,7 +321,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: if delivery_target.get("thread_id") is not None: os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"]) - model = job.get("model") or os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6" + model = job.get("model") or os.getenv("HERMES_MODEL") or "" # Load config.yaml for model, reasoning, prefill, toolsets, provider routing _cfg = {} diff --git a/gateway/run.py b/gateway/run.py index ff4f9328..97d85364 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -323,10 +323,10 @@ def _resolve_gateway_model(config: dict | None = None) -> str: """Read model from env/config — mirrors the resolution in _run_agent_sync. Without this, temporary AIAgent instances (memory flush, /compress) fall - back to the hardcoded default ("anthropic/claude-opus-4.6") which fails - when the active provider is openai-codex. + back to the hardcoded default which fails when the active provider is + openai-codex. """ - model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" + model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "" cfg = config if config is not None else _load_gateway_config() model_cfg = cfg.get("model", {}) if isinstance(model_cfg, str): diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 6b41ac6e..0c503701 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -696,6 +696,10 @@ def resolve_provider( "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface", "go": "opencode-go", "opencode-go-sub": "opencode-go", "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode", + # Local server aliases — route through the generic custom provider + "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom", + "ollama": "custom", "vllm": "custom", "llamacpp": "custom", + "llama.cpp": "custom", "llama-cpp": "custom", } normalized = _PROVIDER_ALIASES.get(normalized, normalized) @@ -742,7 +746,12 @@ def resolve_provider( if has_usable_secret(os.getenv(env_var, "")): return pid - return "openrouter" + raise AuthError( + "No inference provider configured. Run 'hermes model' to choose a " + "provider and model, or set an API key (OPENROUTER_API_KEY, " + "OPENAI_API_KEY, etc.) in ~/.hermes/.env.", + code="no_provider_configured", + ) # ============================================================================= diff --git a/tests/test_api_key_providers.py b/tests/test_api_key_providers.py index ad1676e6..0c6337d3 100644 --- a/tests/test_api_key_providers.py +++ b/tests/test_api_key_providers.py @@ -266,7 +266,8 @@ class TestResolveProvider: def test_auto_does_not_select_copilot_from_github_token(self, monkeypatch): monkeypatch.setenv("GITHUB_TOKEN", "gh-test-token") - assert resolve_provider("auto") == "openrouter" + with pytest.raises(AuthError, match="No inference provider configured"): + resolve_provider("auto") # =============================================================================