From 3cc50532d15956af15d520c902efd17a562a0708 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 29 Mar 2026 21:05:36 -0700 Subject: [PATCH] fix: auxiliary client uses placeholder key for local servers without auth (#3842) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Local inference servers (Ollama, llama.cpp, vLLM, LM Studio) don't require API keys, but the auxiliary client's _resolve_custom_runtime() rejected endpoints with empty keys — causing the auto-detection chain to skip the user's local server entirely. This broke compression, summarization, and memory flush for users running local models without an OpenRouter/cloud API key. The main CLI already had this fix (PR #2556, 'no-key-required' placeholder), but the auxiliary client's resolution path was missed. Two fixes: - _resolve_custom_runtime(): use 'no-key-required' placeholder instead of returning None when base_url is present but key is empty - resolve_provider_client() custom branch: same placeholder fallback for explicit_base_url without explicit_api_key Updates 2 tests that expected the old (broken) behavior. --- agent/auxiliary_client.py | 14 ++++++++++---- tests/agent/test_auxiliary_client.py | 19 +++++++++++-------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index fcd2eb12f..c4a5a184e 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -627,8 +627,6 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]: custom_key = runtime.get("api_key") if not isinstance(custom_base, str) or not custom_base.strip(): return None, None - if not isinstance(custom_key, str) or not custom_key.strip(): - return None, None custom_base = custom_base.strip().rstrip("/") if "openrouter.ai" in custom_base.lower(): @@ -636,6 +634,13 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]: # configured. Treat that as "no custom endpoint" for auxiliary routing. return None, None + # Local servers (Ollama, llama.cpp, vLLM, LM Studio) don't require auth. + # Use a placeholder key — the OpenAI SDK requires a non-empty string but + # local servers ignore the Authorization header. Same fix as cli.py + # _ensure_runtime_credentials() (PR #2556). + if not isinstance(custom_key, str) or not custom_key.strip(): + custom_key = "no-key-required" + return custom_base, custom_key.strip() @@ -897,11 +902,12 @@ def resolve_provider_client( custom_key = ( (explicit_api_key or "").strip() or os.getenv("OPENAI_API_KEY", "").strip() + or "no-key-required" # local servers don't need auth ) - if not custom_base or not custom_key: + if not custom_base: logger.warning( "resolve_provider_client: explicit custom endpoint requested " - "but no API key was found (set explicit_api_key or OPENAI_API_KEY)" + "but base_url is empty" ) return None, None final_model = model or _read_main_model() or "gpt-4o-mini" diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 08ed9bc8d..35dcee7ad 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -491,15 +491,17 @@ class TestGetTextAuxiliaryClient: assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1" assert mock_openai.call_args.kwargs["api_key"] == "task-key" - def test_task_direct_endpoint_without_openai_key_does_not_fall_back(self, monkeypatch): + def test_task_direct_endpoint_without_openai_key_uses_placeholder(self, monkeypatch): + """Local endpoints without an API key should use 'no-key-required' placeholder.""" monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1") monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model") with patch("agent.auxiliary_client.OpenAI") as mock_openai: client, model = get_text_auxiliary_client("web_extract") - assert client is None - assert model is None - mock_openai.assert_not_called() + assert client is not None + assert model == "task-model" + assert mock_openai.call_args.kwargs["api_key"] == "no-key-required" + assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1" def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch): config = { @@ -696,15 +698,16 @@ class TestVisionClientFallback: assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:4567/v1" assert mock_openai.call_args.kwargs["api_key"] == "vision-key" - def test_vision_direct_endpoint_requires_openai_api_key(self, monkeypatch): + def test_vision_direct_endpoint_without_key_uses_placeholder(self, monkeypatch): + """Vision endpoint without API key should use 'no-key-required' placeholder.""" monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1") monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model") with patch("agent.auxiliary_client.OpenAI") as mock_openai: client, model = get_vision_auxiliary_client() - assert client is None - assert model is None - mock_openai.assert_not_called() + assert client is not None + assert model == "vision-model" + assert mock_openai.call_args.kwargs["api_key"] == "no-key-required" def test_vision_uses_openrouter_when_available(self, monkeypatch): monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")