diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 957452fc3..255efe076 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -480,9 +480,44 @@ def _read_main_model() -> str: return "" +def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]: + """Resolve the active custom/main endpoint the same way the main CLI does. + + This covers both env-driven OPENAI_BASE_URL setups and config-saved custom + endpoints where the base URL lives in config.yaml instead of the live + environment. + """ + try: + from hermes_cli.runtime_provider import resolve_runtime_provider + + runtime = resolve_runtime_provider(requested="custom") + except Exception as exc: + logger.debug("Auxiliary client: custom runtime resolution failed: %s", exc) + return None, None + + custom_base = runtime.get("base_url") + custom_key = runtime.get("api_key") + if not isinstance(custom_base, str) or not custom_base.strip(): + return None, None + if not isinstance(custom_key, str) or not custom_key.strip(): + return None, None + + custom_base = custom_base.strip().rstrip("/") + if "openrouter.ai" in custom_base.lower(): + # requested='custom' falls back to OpenRouter when no custom endpoint is + # configured. Treat that as "no custom endpoint" for auxiliary routing. + return None, None + + return custom_base, custom_key.strip() + + +def _current_custom_base_url() -> str: + custom_base, _ = _resolve_custom_runtime() + return custom_base or "" + + def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: - custom_base = os.getenv("OPENAI_BASE_URL") - custom_key = os.getenv("OPENAI_API_KEY") + custom_base, custom_key = _resolve_custom_runtime() if not custom_base or not custom_key: return None, None model = _read_main_model() or "gpt-4o-mini" @@ -947,7 +982,7 @@ def auxiliary_max_tokens_param(value: int) -> dict: The Codex adapter translates max_tokens internally, so we use max_tokens for it as well. """ - custom_base = os.getenv("OPENAI_BASE_URL", "") + custom_base = _current_custom_base_url() or_key = os.getenv("OPENROUTER_API_KEY") # Only use max_completion_tokens for direct OpenAI custom endpoints if (not or_key @@ -1097,7 +1132,7 @@ def _build_call_kwargs( # Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens. # Direct OpenAI api.openai.com with newer models needs max_completion_tokens. if provider == "custom": - custom_base = base_url or os.getenv("OPENAI_BASE_URL", "") + custom_base = base_url or _current_custom_base_url() if "api.openai.com" in custom_base.lower(): kwargs["max_completion_tokens"] = max_tokens else: diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 6e75c9b51..df9694843 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -141,6 +141,37 @@ def _service_scope_label(system: bool = False) -> str: return "system" if system else "user" +def get_installed_systemd_scopes() -> list[str]: + scopes = [] + seen_paths: set[Path] = set() + for system, label in ((False, "user"), (True, "system")): + unit_path = get_systemd_unit_path(system=system) + if unit_path in seen_paths: + continue + if unit_path.exists(): + scopes.append(label) + seen_paths.add(unit_path) + return scopes + + +def has_conflicting_systemd_units() -> bool: + return len(get_installed_systemd_scopes()) > 1 + + +def print_systemd_scope_conflict_warning() -> None: + scopes = get_installed_systemd_scopes() + if len(scopes) < 2: + return + + rendered_scopes = " + ".join(scopes) + print_warning(f"Both user and system gateway services are installed ({rendered_scopes}).") + print_info(" This is confusing and can make start/stop/status behavior ambiguous.") + print_info(" Default gateway commands target the user service unless you pass --system.") + print_info(" Keep one of these:") + print_info(" hermes gateway uninstall") + print_info(" sudo hermes gateway uninstall --system") + + def _require_root_for_system_service(action: str) -> None: if os.geteuid() != 0: print(f"System gateway {action} requires root. Re-run with sudo.") @@ -178,6 +209,57 @@ def _read_systemd_user_from_unit(unit_path: Path) -> str | None: return None +def _default_system_service_user() -> str | None: + for candidate in (os.getenv("SUDO_USER"), os.getenv("USER"), os.getenv("LOGNAME")): + if candidate and candidate.strip() and candidate.strip() != "root": + return candidate.strip() + return None + + +def prompt_linux_gateway_install_scope() -> str | None: + choice = prompt_choice( + " Choose how the gateway should run in the background:", + [ + "User service (no sudo; best for laptops/dev boxes; may need linger after logout)", + "System service (starts on boot; requires sudo; still runs as your user)", + "Skip service install for now", + ], + default=0, + ) + return {0: "user", 1: "system", 2: None}[choice] + + +def install_linux_gateway_from_setup(force: bool = False) -> tuple[str | None, bool]: + scope = prompt_linux_gateway_install_scope() + if scope is None: + return None, False + + if scope == "system": + run_as_user = _default_system_service_user() + if os.geteuid() != 0: + print_warning(" System service install requires sudo, so Hermes can't create it from this user session.") + if run_as_user: + print_info(f" After setup, run: sudo hermes gateway install --system --run-as-user {run_as_user}") + else: + print_info(" After setup, run: sudo hermes gateway install --system --run-as-user ") + print_info(" Then start it with: sudo hermes gateway start --system") + return scope, False + + if not run_as_user: + while True: + run_as_user = prompt(" Run the system gateway service as which user?", default="") + run_as_user = (run_as_user or "").strip() + if run_as_user and run_as_user != "root": + break + print_error(" Enter a non-root username.") + + systemd_install(force=force, system=True, run_as_user=run_as_user) + return scope, True + + systemd_install(force=force, system=False) + return scope, True + + def get_systemd_linger_status() -> tuple[bool | None, str]: """Return whether systemd user lingering is enabled for the current user. @@ -462,6 +544,8 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str else: _ensure_linger_enabled() + print_systemd_scope_conflict_warning() + def systemd_uninstall(system: bool = False): system = _select_systemd_scope(system) @@ -519,6 +603,10 @@ def systemd_status(deep: bool = False, system: bool = False): print(f" Run: {'sudo ' if system else ''}hermes gateway install{scope_flag}") return + if has_conflicting_systemd_units(): + print_systemd_scope_conflict_warning() + print() + if not systemd_unit_is_current(system=system): print("⚠ Installed gateway service definition is outdated") print(f" Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag} # auto-refreshes the unit") @@ -1025,18 +1113,26 @@ def _is_service_installed() -> bool: def _is_service_running() -> bool: """Check if the gateway service is currently running.""" if is_linux(): - if get_systemd_unit_path(system=False).exists(): + user_unit_exists = get_systemd_unit_path(system=False).exists() + system_unit_exists = get_systemd_unit_path(system=True).exists() + + if user_unit_exists: result = subprocess.run( _systemctl_cmd(False) + ["is-active", SERVICE_NAME], capture_output=True, text=True ) - return result.stdout.strip() == "active" - if get_systemd_unit_path(system=True).exists(): + if result.stdout.strip() == "active": + return True + + if system_unit_exists: result = subprocess.run( _systemctl_cmd(True) + ["is-active", SERVICE_NAME], capture_output=True, text=True ) - return result.stdout.strip() == "active" + if result.stdout.strip() == "active": + return True + + return False elif is_macos() and get_launchd_plist_path().exists(): result = subprocess.run( ["launchctl", "list", "ai.hermes.gateway"], @@ -1178,6 +1274,10 @@ def gateway_setup(): service_installed = _is_service_installed() service_running = _is_service_running() + if is_linux() and has_conflicting_systemd_units(): + print_systemd_scope_conflict_warning() + print() + if service_installed and service_running: print_success("Gateway service is installed and running.") elif service_installed: @@ -1259,16 +1359,18 @@ def gateway_setup(): platform_name = "systemd" if is_linux() else "launchd" if prompt_yes_no(f" Install the gateway as a {platform_name} service? (runs in background, starts on boot)", True): try: - force = False + installed_scope = None + did_install = False if is_linux(): - systemd_install(force) + installed_scope, did_install = install_linux_gateway_from_setup(force=False) else: - launchd_install(force) + launchd_install(force=False) + did_install = True print() - if prompt_yes_no(" Start the service now?", True): + if did_install and prompt_yes_no(" Start the service now?", True): try: if is_linux(): - systemd_start() + systemd_start(system=installed_scope == "system") else: launchd_start() except subprocess.CalledProcessError as e: @@ -1278,6 +1380,8 @@ def gateway_setup(): print_info(" You can try manually: hermes gateway install") else: print_info(" You can install later: hermes gateway install") + if is_linux(): + print_info(" Or as a boot-time service: sudo hermes gateway install --system") print_info(" Or run in foreground: hermes gateway") else: print_info(" Service install not supported on this platform.") diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index fead68000..e0535357a 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -144,10 +144,16 @@ def _resolve_openrouter_runtime( env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip() use_config_base_url = False - if requested_norm == "auto": - if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url: + if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url: + if requested_norm == "auto": if not cfg_provider or cfg_provider == "auto": use_config_base_url = True + elif requested_norm == "custom": + # Persisted custom endpoints store their base URL in config.yaml. + # If OPENAI_BASE_URL is not currently set in the environment, keep + # honoring that saved endpoint instead of falling back to OpenRouter. + if cfg_provider == "custom": + use_config_base_url = True # When the user explicitly requested the openrouter provider, skip # OPENAI_BASE_URL — it typically points to a custom / non-OpenRouter diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 051de13c1..ef5f0969f 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -2240,7 +2240,9 @@ def setup_gateway(config: dict): from hermes_cli.gateway import ( _is_service_installed, _is_service_running, - systemd_install, + has_conflicting_systemd_units, + install_linux_gateway_from_setup, + print_systemd_scope_conflict_warning, systemd_start, systemd_restart, launchd_install, @@ -2252,6 +2254,10 @@ def setup_gateway(config: dict): service_running = _is_service_running() print() + if _is_linux and has_conflicting_systemd_units(): + print_systemd_scope_conflict_warning() + print() + if service_running: if prompt_yes_no(" Restart the gateway to pick up changes?", True): try: @@ -2277,15 +2283,18 @@ def setup_gateway(config: dict): True, ): try: + installed_scope = None + did_install = False if _is_linux: - systemd_install(force=False) + installed_scope, did_install = install_linux_gateway_from_setup(force=False) else: launchd_install(force=False) + did_install = True print() - if prompt_yes_no(" Start the service now?", True): + if did_install and prompt_yes_no(" Start the service now?", True): try: if _is_linux: - systemd_start() + systemd_start(system=installed_scope == "system") elif _is_macos: launchd_start() except Exception as e: @@ -2295,6 +2304,8 @@ def setup_gateway(config: dict): print_info(" You can try manually: hermes gateway install") else: print_info(" You can install later: hermes gateway install") + if _is_linux: + print_info(" Or as a boot-time service: sudo hermes gateway install --system") print_info(" Or run in foreground: hermes gateway") else: print_info("Start the gateway to bring your bots online:") diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index d60e3c813..3ddb6d7c6 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -165,6 +165,29 @@ class TestGetTextAuxiliaryClient: assert model is None mock_openai.assert_not_called() + def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch): + config = { + "model": { + "provider": "custom", + "base_url": "http://localhost:1234/v1", + "default": "my-local-model", + } + } + monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key") + monkeypatch.setattr("hermes_cli.config.load_config", lambda: config) + monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config) + + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \ + patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_text_auxiliary_client() + + assert client is not None + assert model == "my-local-model" + call_kwargs = mock_openai.call_args + assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1" + def test_codex_fallback_when_nothing_else(self, codex_auth_dir): with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ patch("agent.auxiliary_client.OpenAI") as mock_openai: @@ -364,6 +387,27 @@ class TestResolveForcedProvider: client, model = _resolve_forced_provider("main") assert model == "my-local-model" + def test_forced_main_uses_config_saved_custom_endpoint(self, monkeypatch): + config = { + "model": { + "provider": "custom", + "base_url": "http://local:8080/v1", + "default": "my-local-model", + } + } + monkeypatch.setenv("OPENAI_API_KEY", "local-key") + monkeypatch.setattr("hermes_cli.config.load_config", lambda: config) + monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config) + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \ + patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = _resolve_forced_provider("main") + assert client is not None + assert model == "my-local-model" + call_kwargs = mock_openai.call_args + assert call_kwargs.kwargs["base_url"] == "http://local:8080/v1" + def test_forced_main_skips_openrouter_nous(self, monkeypatch): """Even if OpenRouter key is set, 'main' skips it.""" monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") diff --git a/tests/hermes_cli/test_gateway.py b/tests/hermes_cli/test_gateway.py index d3f4bb9e8..29da657e2 100644 --- a/tests/hermes_cli/test_gateway.py +++ b/tests/hermes_cli/test_gateway.py @@ -115,3 +115,57 @@ def test_systemd_install_system_scope_skips_linger_and_uses_systemctl(monkeypatc assert helper_calls == [] assert "Configured to run as: alice" not in out # generated test unit has no User= line assert "System service installed and enabled" in out + + +def test_conflicting_systemd_units_warning(monkeypatch, tmp_path, capsys): + user_unit = tmp_path / "user" / "hermes-gateway.service" + system_unit = tmp_path / "system" / "hermes-gateway.service" + user_unit.parent.mkdir(parents=True) + system_unit.parent.mkdir(parents=True) + user_unit.write_text("[Unit]\n", encoding="utf-8") + system_unit.write_text("[Unit]\n", encoding="utf-8") + + monkeypatch.setattr( + gateway, + "get_systemd_unit_path", + lambda system=False: system_unit if system else user_unit, + ) + + gateway.print_systemd_scope_conflict_warning() + + out = capsys.readouterr().out + assert "Both user and system gateway services are installed" in out + assert "hermes gateway uninstall" in out + assert "--system" in out + + +def test_install_linux_gateway_from_setup_system_choice_without_root_prints_followup(monkeypatch, capsys): + monkeypatch.setattr(gateway, "prompt_linux_gateway_install_scope", lambda: "system") + monkeypatch.setattr(gateway.os, "geteuid", lambda: 1000) + monkeypatch.setattr(gateway, "_default_system_service_user", lambda: "alice") + monkeypatch.setattr(gateway, "systemd_install", lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("should not install"))) + + scope, did_install = gateway.install_linux_gateway_from_setup(force=False) + + out = capsys.readouterr().out + assert (scope, did_install) == ("system", False) + assert "sudo hermes gateway install --system --run-as-user alice" in out + assert "sudo hermes gateway start --system" in out + + +def test_install_linux_gateway_from_setup_system_choice_as_root_installs(monkeypatch): + monkeypatch.setattr(gateway, "prompt_linux_gateway_install_scope", lambda: "system") + monkeypatch.setattr(gateway.os, "geteuid", lambda: 0) + monkeypatch.setattr(gateway, "_default_system_service_user", lambda: "alice") + + calls = [] + monkeypatch.setattr( + gateway, + "systemd_install", + lambda force=False, system=False, run_as_user=None: calls.append((force, system, run_as_user)), + ) + + scope, did_install = gateway.install_linux_gateway_from_setup(force=True) + + assert (scope, did_install) == ("system", True) + assert calls == [(True, True, "alice")] diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index 1cc0968da..ce41a57a1 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -78,6 +78,31 @@ class TestGatewayStopCleanup: assert kill_calls == [False] +class TestGatewayServiceDetection: + def test_is_service_running_checks_system_scope_when_user_scope_is_inactive(self, monkeypatch): + user_unit = SimpleNamespace(exists=lambda: True) + system_unit = SimpleNamespace(exists=lambda: True) + + monkeypatch.setattr(gateway_cli, "is_linux", lambda: True) + monkeypatch.setattr(gateway_cli, "is_macos", lambda: False) + monkeypatch.setattr( + gateway_cli, + "get_systemd_unit_path", + lambda system=False: system_unit if system else user_unit, + ) + + def fake_run(cmd, capture_output=True, text=True, **kwargs): + if cmd == ["systemctl", "--user", "is-active", gateway_cli.SERVICE_NAME]: + return SimpleNamespace(returncode=0, stdout="inactive\n", stderr="") + if cmd == ["systemctl", "is-active", gateway_cli.SERVICE_NAME]: + return SimpleNamespace(returncode=0, stdout="active\n", stderr="") + raise AssertionError(f"Unexpected command: {cmd}") + + monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run) + + assert gateway_cli._is_service_running() is True + + class TestGatewaySystemServiceRouting: def test_gateway_install_passes_system_flags(self, monkeypatch): monkeypatch.setattr(gateway_cli, "is_linux", lambda: True) diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py index a53c716a3..52d4a1d4f 100644 --- a/tests/test_runtime_provider_resolution.py +++ b/tests/test_runtime_provider_resolution.py @@ -131,13 +131,36 @@ def test_custom_endpoint_prefers_openai_key(monkeypatch): monkeypatch.setattr(rp, "_get_model_config", lambda: {}) monkeypatch.setenv("OPENAI_BASE_URL", "https://api.z.ai/api/coding/paas/v4") monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) - monkeypatch.setenv("OPENAI_API_KEY", "sk-zai-correct-key") - monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-wrong-key-for-zai") + monkeypatch.setenv("OPENAI_API_KEY", "zai-key") + monkeypatch.setenv("OPENROUTER_API_KEY", "openrouter-key") resolved = rp.resolve_runtime_provider(requested="custom") assert resolved["base_url"] == "https://api.z.ai/api/coding/paas/v4" - assert resolved["api_key"] == "sk-zai-correct-key" + assert resolved["api_key"] == "zai-key" + + +def test_custom_endpoint_uses_saved_config_base_url_when_env_missing(monkeypatch): + """Persisted custom endpoints in config.yaml must still resolve when + OPENAI_BASE_URL is absent from the current environment.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "custom", + "base_url": "http://127.0.0.1:1234/v1", + }, + ) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.setenv("OPENAI_API_KEY", "local-key") + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["base_url"] == "http://127.0.0.1:1234/v1" + assert resolved["api_key"] == "local-key" def test_custom_endpoint_auto_provider_prefers_openai_key(monkeypatch): diff --git a/website/docs/developer-guide/provider-runtime.md b/website/docs/developer-guide/provider-runtime.md index 08f950509..77832fc92 100644 --- a/website/docs/developer-guide/provider-runtime.md +++ b/website/docs/developer-guide/provider-runtime.md @@ -27,10 +27,12 @@ If you are trying to add a new first-class inference provider, read [Adding Prov At a high level, provider resolution uses: 1. explicit CLI/runtime request -2. environment variables -3. `config.yaml` model/provider config +2. `config.yaml` model/provider config +3. environment variables 4. provider-specific defaults or auto resolution +That ordering matters because Hermes treats the saved model/provider choice as the source of truth for normal runs. This prevents a stale shell export from silently overriding the endpoint a user last selected in `hermes model`. + ## Providers Current provider families include: @@ -70,11 +72,17 @@ This resolver is the main reason Hermes can share auth/runtime logic between: Hermes contains logic to avoid leaking the wrong API key to a custom endpoint when both `OPENROUTER_API_KEY` and `OPENAI_API_KEY` exist. +It also distinguishes between: + +- a real custom endpoint selected by the user +- the OpenRouter fallback path used when no custom endpoint is configured + That distinction is especially important for: - local model servers - non-OpenRouter OpenAI-compatible APIs - switching providers without re-running setup +- config-saved custom endpoints that should keep working even when `OPENAI_BASE_URL` is not exported in the current shell ## Native Anthropic path @@ -114,6 +122,12 @@ Auxiliary tasks such as: can use their own provider/model routing rather than the main conversational model. +When an auxiliary task is configured with provider `main`, Hermes resolves that through the same shared runtime path as normal chat. In practice that means: + +- env-driven custom endpoints still work +- custom endpoints saved via `hermes model` / `config.yaml` also work +- auxiliary routing can tell the difference between a real saved custom endpoint and the OpenRouter fallback + ## Fallback models Hermes also supports a configured fallback model/provider, allowing runtime failover in supported error paths. diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md index 02a82dce7..4d7be7aa0 100644 --- a/website/docs/reference/faq.md +++ b/website/docs/reference/faq.md @@ -50,6 +50,8 @@ hermes config set OPENAI_API_KEY ollama # Any non-empty va hermes config set HERMES_MODEL llama3.1 ``` +You can also save the endpoint interactively with `hermes model`. Hermes persists that custom endpoint in `config.yaml`, and auxiliary tasks configured with provider `main` follow the same saved endpoint. + This works with Ollama, vLLM, llama.cpp server, SGLang, LocalAI, and others. See the [Configuration guide](../user-guide/configuration.md) for details. ### How much does it cost? diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 0a1c50cb0..7e368ecf7 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -69,7 +69,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro | **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) | | **MiniMax** | `MINIMAX_API_KEY` in `~/.hermes/.env` (provider: `minimax`) | | **MiniMax China** | `MINIMAX_CN_API_KEY` in `~/.hermes/.env` (provider: `minimax-cn`) | -| **Custom Endpoint** | `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` | +| **Custom Endpoint** | `hermes model` (saved in `config.yaml`) or `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` | :::info Codex Note The OpenAI Codex provider authenticates via device code (open a URL, enter a code). Hermes stores the resulting credentials in its own auth store under `~/.hermes/auth.json` and can import existing Codex CLI credentials from `~/.codex/auth.json` when present. No Codex CLI installation is required. @@ -163,10 +163,12 @@ hermes model ```bash # Add to ~/.hermes/.env OPENAI_BASE_URL=http://localhost:8000/v1 -OPENAI_API_KEY=your-key-or-dummy +OPENAI_API_KEY=*** LLM_MODEL=your-model-name ``` +`hermes model` and the manual `.env` approach end up in the same runtime path. If you save a custom endpoint through `hermes model`, Hermes persists the provider + base URL in `config.yaml` so later sessions keep using that endpoint even if `OPENAI_BASE_URL` is not exported in your current shell. + Everything below follows this same pattern — just change the URL, key, and model name. --- @@ -604,7 +606,7 @@ AUXILIARY_VISION_MODEL=openai/gpt-4o | `"openrouter"` | Force OpenRouter — routes to any model (Gemini, GPT-4o, Claude, etc.) | `OPENROUTER_API_KEY` | | `"nous"` | Force Nous Portal | `hermes login` | | `"codex"` | Force Codex OAuth (ChatGPT account). Supports vision (gpt-5.3-codex). | `hermes model` → Codex | -| `"main"` | Use your custom endpoint (`OPENAI_BASE_URL` + `OPENAI_API_KEY`). Works with OpenAI, local models, or any OpenAI-compatible API. | `OPENAI_BASE_URL` + `OPENAI_API_KEY` | +| `"main"` | Use your active custom/main endpoint. This can come from `OPENAI_BASE_URL` + `OPENAI_API_KEY` or from a custom endpoint saved via `hermes model` / `config.yaml`. Works with OpenAI, local models, or any OpenAI-compatible API. | Custom endpoint credentials + base URL | ### Common Setups @@ -651,10 +653,12 @@ auxiliary: ```yaml auxiliary: vision: - provider: "main" # uses your OPENAI_BASE_URL endpoint + provider: "main" # uses your active custom endpoint model: "my-local-model" ``` +`provider: "main"` follows the same custom endpoint Hermes uses for normal chat. That endpoint can be set directly with `OPENAI_BASE_URL`, or saved once through `hermes model` and persisted in `config.yaml`. + :::tip If you use Codex OAuth as your main model provider, vision works automatically — no extra configuration needed. Codex is included in the auto-detection chain for vision. :::