Merge pull request #1373 from NousResearch/hermes/hermes-781f9235

fix: restore config-saved custom endpoint resolution
This commit is contained in:
Teknium
2026-03-14 21:06:41 -07:00
committed by GitHub
4 changed files with 117 additions and 9 deletions

View File

@@ -465,9 +465,44 @@ def _read_main_model() -> str:
return ""
def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
"""Resolve the active custom/main endpoint the same way the main CLI does.
This covers both env-driven OPENAI_BASE_URL setups and config-saved custom
endpoints where the base URL lives in config.yaml instead of the live
environment.
"""
try:
from hermes_cli.runtime_provider import resolve_runtime_provider
runtime = resolve_runtime_provider(requested="custom")
except Exception as exc:
logger.debug("Auxiliary client: custom runtime resolution failed: %s", exc)
return None, None
custom_base = runtime.get("base_url")
custom_key = runtime.get("api_key")
if not isinstance(custom_base, str) or not custom_base.strip():
return None, None
if not isinstance(custom_key, str) or not custom_key.strip():
return None, None
custom_base = custom_base.strip().rstrip("/")
if "openrouter.ai" in custom_base.lower():
# requested='custom' falls back to OpenRouter when no custom endpoint is
# configured. Treat that as "no custom endpoint" for auxiliary routing.
return None, None
return custom_base, custom_key.strip()
def _current_custom_base_url() -> str:
custom_base, _ = _resolve_custom_runtime()
return custom_base or ""
def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
custom_base = os.getenv("OPENAI_BASE_URL")
custom_key = os.getenv("OPENAI_API_KEY")
custom_base, custom_key = _resolve_custom_runtime()
if not custom_base or not custom_key:
return None, None
model = _read_main_model() or "gpt-4o-mini"
@@ -888,7 +923,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
The Codex adapter translates max_tokens internally, so we use max_tokens
for it as well.
"""
custom_base = os.getenv("OPENAI_BASE_URL", "")
custom_base = _current_custom_base_url()
or_key = os.getenv("OPENROUTER_API_KEY")
# Only use max_completion_tokens for direct OpenAI custom endpoints
if (not or_key
@@ -1009,7 +1044,7 @@ def _build_call_kwargs(
# Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
if provider == "custom":
custom_base = os.getenv("OPENAI_BASE_URL", "")
custom_base = _current_custom_base_url()
if "api.openai.com" in custom_base.lower():
kwargs["max_completion_tokens"] = max_tokens
else:

View File

@@ -144,10 +144,16 @@ def _resolve_openrouter_runtime(
env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
use_config_base_url = False
if requested_norm == "auto":
if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url:
if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url:
if requested_norm == "auto":
if not cfg_provider or cfg_provider == "auto":
use_config_base_url = True
elif requested_norm == "custom":
# Persisted custom endpoints store their base URL in config.yaml.
# If OPENAI_BASE_URL is not currently set in the environment, keep
# honoring that saved endpoint instead of falling back to OpenRouter.
if cfg_provider == "custom":
use_config_base_url = True
# When the user explicitly requested the openrouter provider, skip
# OPENAI_BASE_URL — it typically points to a custom / non-OpenRouter

View File

@@ -142,6 +142,29 @@ class TestGetTextAuxiliaryClient:
call_kwargs = mock_openai.call_args
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
config = {
"model": {
"provider": "custom",
"base_url": "http://localhost:1234/v1",
"default": "my-local-model",
}
}
monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client()
assert client is not None
assert model == "my-local-model"
call_kwargs = mock_openai.call_args
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
@@ -320,6 +343,27 @@ class TestResolveForcedProvider:
client, model = _resolve_forced_provider("main")
assert model == "my-local-model"
def test_forced_main_uses_config_saved_custom_endpoint(self, monkeypatch):
config = {
"model": {
"provider": "custom",
"base_url": "http://local:8080/v1",
"default": "my-local-model",
}
}
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = _resolve_forced_provider("main")
assert client is not None
assert model == "my-local-model"
call_kwargs = mock_openai.call_args
assert call_kwargs.kwargs["base_url"] == "http://local:8080/v1"
def test_forced_main_skips_openrouter_nous(self, monkeypatch):
"""Even if OpenRouter key is set, 'main' skips it."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")

View File

@@ -131,13 +131,36 @@ def test_custom_endpoint_prefers_openai_key(monkeypatch):
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
monkeypatch.setenv("OPENAI_BASE_URL", "https://api.z.ai/api/coding/paas/v4")
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
monkeypatch.setenv("OPENAI_API_KEY", "sk-zai-correct-key")
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-wrong-key-for-zai")
monkeypatch.setenv("OPENAI_API_KEY", "zai-key")
monkeypatch.setenv("OPENROUTER_API_KEY", "openrouter-key")
resolved = rp.resolve_runtime_provider(requested="custom")
assert resolved["base_url"] == "https://api.z.ai/api/coding/paas/v4"
assert resolved["api_key"] == "sk-zai-correct-key"
assert resolved["api_key"] == "zai-key"
def test_custom_endpoint_uses_saved_config_base_url_when_env_missing(monkeypatch):
"""Persisted custom endpoints in config.yaml must still resolve when
OPENAI_BASE_URL is absent from the current environment."""
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter")
monkeypatch.setattr(
rp,
"_get_model_config",
lambda: {
"provider": "custom",
"base_url": "http://127.0.0.1:1234/v1",
},
)
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
resolved = rp.resolve_runtime_provider(requested="custom")
assert resolved["base_url"] == "http://127.0.0.1:1234/v1"
assert resolved["api_key"] == "local-key"
def test_custom_endpoint_auto_provider_prefers_openai_key(monkeypatch):