fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)

* fix: prevent model/provider mismatch when switching providers during active gateway When _update_config_for_provider() writes the new provider and base_url to config.yaml, the gateway (which re-reads config per-message) can pick up the change before model selection completes. This causes the old model name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's API (e.g. MiniMax), which fails. Changes: - _update_config_for_provider() now accepts an optional default_model parameter. When provided and the current model.default is empty or uses OpenRouter format (contains '/'), it sets a safe default model for the new provider. - All setup.py callers for direct-API providers (zai, kimi, minimax, minimax-cn, anthropic) now pass a provider-appropriate default model. - _setup_provider_model_selection() now validates the 'Keep current' choice: if the current model uses OpenRouter format and wouldn't work with the new provider, it warns and switches to the provider's first default model instead of silently keeping the incompatible name. Reported by a user on Home Assistant whose gateway started sending 'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup. * fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL), the auxiliary client (context compression, vision, session search) would send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to the local server, which only serves one model — causing 404 errors mid-task. Changes: - _try_custom_endpoint() now reads the user's configured main model via _read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL → config.yaml model.default) instead of hardcoding 'gpt-4o-mini'. - resolve_provider_client() auto mode now detects when an OpenRouter- formatted model override (containing '/') would be sent to a non- OpenRouter provider (like a local server) and drops it in favor of the provider's default model. - Test isolation fixes: properly clear env vars in 'nothing available' tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
parent 153ccbfd61
commit 11b577671b
2 changed files with 53 additions and 8 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -439,12 +439,37 @@ def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]:
    )


+def _read_main_model() -> str:
+    """Read the user's configured main model from config/env.
+
+    Falls back through HERMES_MODEL → LLM_MODEL → config.yaml model.default
+    so the auxiliary client can use the same model as the main agent when no
+    dedicated auxiliary model is available.
+    """
+    from_env = os.getenv("OPENAI_MODEL") or os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL")
+    if from_env:
+        return from_env.strip()
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+        model_cfg = cfg.get("model", {})
+        if isinstance(model_cfg, str) and model_cfg.strip():
+            return model_cfg.strip()
+        if isinstance(model_cfg, dict):
+            default = model_cfg.get("default", "")
+            if isinstance(default, str) and default.strip():
+                return default.strip()
+    except Exception:
+        pass
+    return ""
+
+
 def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
    custom_base = os.getenv("OPENAI_BASE_URL")
    custom_key = os.getenv("OPENAI_API_KEY")
    if not custom_base or not custom_key:
        return None, None
-    model = os.getenv("OPENAI_MODEL") or "gpt-4o-mini"
+    model = _read_main_model() or "gpt-4o-mini"
    logger.debug("Auxiliary client: custom endpoint (%s)", model)
    return OpenAI(api_key=custom_key, base_url=custom_base), model

@@ -575,6 +600,15 @@ def resolve_provider_client(
        client, resolved = _resolve_auto()
        if client is None:
            return None, None
+        # When auto-detection lands on a non-OpenRouter provider (e.g. a
+        # local server), an OpenRouter-formatted model override like
+        # "google/gemini-3-flash-preview" won't work.  Drop it and use
+        # the provider's own default model instead.
+        if model and "/" in model and resolved and "/" not in resolved:
+            logger.debug(
+                "Dropping OpenRouter-format model %r for non-OpenRouter "
+                "auxiliary provider (using %r instead)", model, resolved)
+            model = None
        final_model = model or resolved
        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -129,6 +129,7 @@ class TestGetTextAuxiliaryClient:
    def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
+        monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
        # Override the autouse monkeypatch for codex
        monkeypatch.setattr(
            "agent.auxiliary_client._read_codex_access_token",
@@ -137,7 +138,7 @@ class TestGetTextAuxiliaryClient:
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_text_auxiliary_client()
-        assert model == "gpt-4o-mini"
+        assert model == "my-local-model"
        call_kwargs = mock_openai.call_args
        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"

@@ -150,9 +151,13 @@ class TestGetTextAuxiliaryClient:
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)

-    def test_returns_none_when_nothing_available(self):
+    def test_returns_none_when_nothing_available(self, monkeypatch):
+        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
-             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
+             patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
            client, model = get_text_auxiliary_client()
        assert client is None
        assert model is None
@@ -209,17 +214,21 @@ class TestVisionClientFallback:
        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_vision_auxiliary_client()
        assert client is not None
-        assert model == "gpt-4o-mini"
+        assert model == "my-local-model"

    def test_vision_forced_main_returns_none_without_creds(self, monkeypatch):
        """Forced main with no credentials still returns None."""
        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
+        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
-             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
+             patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
            client, model = get_vision_auxiliary_client()
        assert client is None
        assert model is None
@@ -305,21 +314,23 @@ class TestResolveForcedProvider:
    def test_forced_main_uses_custom(self, monkeypatch):
        monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = _resolve_forced_provider("main")
-        assert model == "gpt-4o-mini"
+        assert model == "my-local-model"

    def test_forced_main_skips_openrouter_nous(self, monkeypatch):
        """Even if OpenRouter key is set, 'main' skips it."""
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
        monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = _resolve_forced_provider("main")
        # Should use custom endpoint, not OpenRouter
-        assert model == "gpt-4o-mini"
+        assert model == "my-local-model"

    def test_forced_main_falls_to_codex(self, codex_auth_dir, monkeypatch):
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \