diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index f9c12e7fb..e274e65ac 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -439,12 +439,37 @@ def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]: ) +def _read_main_model() -> str: + """Read the user's configured main model from config/env. + + Falls back through HERMES_MODEL → LLM_MODEL → config.yaml model.default + so the auxiliary client can use the same model as the main agent when no + dedicated auxiliary model is available. + """ + from_env = os.getenv("OPENAI_MODEL") or os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") + if from_env: + return from_env.strip() + try: + from hermes_cli.config import load_config + cfg = load_config() + model_cfg = cfg.get("model", {}) + if isinstance(model_cfg, str) and model_cfg.strip(): + return model_cfg.strip() + if isinstance(model_cfg, dict): + default = model_cfg.get("default", "") + if isinstance(default, str) and default.strip(): + return default.strip() + except Exception: + pass + return "" + + def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: custom_base = os.getenv("OPENAI_BASE_URL") custom_key = os.getenv("OPENAI_API_KEY") if not custom_base or not custom_key: return None, None - model = os.getenv("OPENAI_MODEL") or "gpt-4o-mini" + model = _read_main_model() or "gpt-4o-mini" logger.debug("Auxiliary client: custom endpoint (%s)", model) return OpenAI(api_key=custom_key, base_url=custom_base), model @@ -575,6 +600,15 @@ def resolve_provider_client( client, resolved = _resolve_auto() if client is None: return None, None + # When auto-detection lands on a non-OpenRouter provider (e.g. a + # local server), an OpenRouter-formatted model override like + # "google/gemini-3-flash-preview" won't work. Drop it and use + # the provider's own default model instead. + if model and "/" in model and resolved and "/" not in resolved: + logger.debug( + "Dropping OpenRouter-format model %r for non-OpenRouter " + "auxiliary provider (using %r instead)", model, resolved) + model = None final_model = model or resolved return (_to_async_client(client, final_model) if async_mode else (client, final_model)) diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 299d083f2..57c73eb8b 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -129,6 +129,7 @@ class TestGetTextAuxiliaryClient: def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir): monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1") monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key") + monkeypatch.setenv("OPENAI_MODEL", "my-local-model") # Override the autouse monkeypatch for codex monkeypatch.setattr( "agent.auxiliary_client._read_codex_access_token", @@ -137,7 +138,7 @@ class TestGetTextAuxiliaryClient: with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ patch("agent.auxiliary_client.OpenAI") as mock_openai: client, model = get_text_auxiliary_client() - assert model == "gpt-4o-mini" + assert model == "my-local-model" call_kwargs = mock_openai.call_args assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1" @@ -150,9 +151,13 @@ class TestGetTextAuxiliaryClient: from agent.auxiliary_client import CodexAuxiliaryClient assert isinstance(client, CodexAuxiliaryClient) - def test_returns_none_when_nothing_available(self): + def test_returns_none_when_nothing_available(self, monkeypatch): + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ - patch("agent.auxiliary_client._read_codex_access_token", return_value=None): + patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \ + patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)): client, model = get_text_auxiliary_client() assert client is None assert model is None @@ -209,17 +214,21 @@ class TestVisionClientFallback: monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main") monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1") monkeypatch.setenv("OPENAI_API_KEY", "local-key") + monkeypatch.setenv("OPENAI_MODEL", "my-local-model") with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ patch("agent.auxiliary_client.OpenAI") as mock_openai: client, model = get_vision_auxiliary_client() assert client is not None - assert model == "gpt-4o-mini" + assert model == "my-local-model" def test_vision_forced_main_returns_none_without_creds(self, monkeypatch): """Forced main with no credentials still returns None.""" monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main") + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ - patch("agent.auxiliary_client._read_codex_access_token", return_value=None): + patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \ + patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)): client, model = get_vision_auxiliary_client() assert client is None assert model is None @@ -305,21 +314,23 @@ class TestResolveForcedProvider: def test_forced_main_uses_custom(self, monkeypatch): monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1") monkeypatch.setenv("OPENAI_API_KEY", "local-key") + monkeypatch.setenv("OPENAI_MODEL", "my-local-model") with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ patch("agent.auxiliary_client.OpenAI") as mock_openai: client, model = _resolve_forced_provider("main") - assert model == "gpt-4o-mini" + assert model == "my-local-model" def test_forced_main_skips_openrouter_nous(self, monkeypatch): """Even if OpenRouter key is set, 'main' skips it.""" monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1") monkeypatch.setenv("OPENAI_API_KEY", "local-key") + monkeypatch.setenv("OPENAI_MODEL", "my-local-model") with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ patch("agent.auxiliary_client.OpenAI") as mock_openai: client, model = _resolve_forced_provider("main") # Should use custom endpoint, not OpenRouter - assert model == "gpt-4o-mini" + assert model == "my-local-model" def test_forced_main_falls_to_codex(self, codex_auth_dir, monkeypatch): with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \