From 3cc50532d15956af15d520c902efd17a562a0708 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 29 Mar 2026 21:05:36 -0700
Subject: [PATCH] fix: auxiliary client uses placeholder key for local servers
 without auth (#3842)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Local inference servers (Ollama, llama.cpp, vLLM, LM Studio) don't
require API keys, but the auxiliary client's _resolve_custom_runtime()
rejected endpoints with empty keys — causing the auto-detection chain
to skip the user's local server entirely.  This broke compression,
summarization, and memory flush for users running local models without
an OpenRouter/cloud API key.

The main CLI already had this fix (PR #2556, 'no-key-required'
placeholder), but the auxiliary client's resolution path was missed.

Two fixes:
- _resolve_custom_runtime(): use 'no-key-required' placeholder instead
  of returning None when base_url is present but key is empty
- resolve_provider_client() custom branch: same placeholder fallback
  for explicit_base_url without explicit_api_key

Updates 2 tests that expected the old (broken) behavior.
---
 agent/auxiliary_client.py            | 14 ++++++++++----
 tests/agent/test_auxiliary_client.py | 19 +++++++++++--------
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index fcd2eb12f..c4a5a184e 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -627,8 +627,6 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
     custom_key = runtime.get("api_key")
     if not isinstance(custom_base, str) or not custom_base.strip():
         return None, None
-    if not isinstance(custom_key, str) or not custom_key.strip():
-        return None, None
 
     custom_base = custom_base.strip().rstrip("/")
     if "openrouter.ai" in custom_base.lower():
@@ -636,6 +634,13 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
         # configured. Treat that as "no custom endpoint" for auxiliary routing.
         return None, None
 
+    # Local servers (Ollama, llama.cpp, vLLM, LM Studio) don't require auth.
+    # Use a placeholder key — the OpenAI SDK requires a non-empty string but
+    # local servers ignore the Authorization header.  Same fix as cli.py
+    # _ensure_runtime_credentials() (PR #2556).
+    if not isinstance(custom_key, str) or not custom_key.strip():
+        custom_key = "no-key-required"
+
     return custom_base, custom_key.strip()
 
 
@@ -897,11 +902,12 @@ def resolve_provider_client(
             custom_key = (
                 (explicit_api_key or "").strip()
                 or os.getenv("OPENAI_API_KEY", "").strip()
+                or "no-key-required"  # local servers don't need auth
             )
-            if not custom_base or not custom_key:
+            if not custom_base:
                 logger.warning(
                     "resolve_provider_client: explicit custom endpoint requested "
-                    "but no API key was found (set explicit_api_key or OPENAI_API_KEY)"
+                    "but base_url is empty"
                 )
                 return None, None
             final_model = model or _read_main_model() or "gpt-4o-mini"
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 08ed9bc8d..35dcee7ad 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -491,15 +491,17 @@ class TestGetTextAuxiliaryClient:
         assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
         assert mock_openai.call_args.kwargs["api_key"] == "task-key"
 
-    def test_task_direct_endpoint_without_openai_key_does_not_fall_back(self, monkeypatch):
+    def test_task_direct_endpoint_without_openai_key_uses_placeholder(self, monkeypatch):
+        """Local endpoints without an API key should use 'no-key-required' placeholder."""
         monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
         monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
         monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
         with patch("agent.auxiliary_client.OpenAI") as mock_openai:
             client, model = get_text_auxiliary_client("web_extract")
-        assert client is None
-        assert model is None
-        mock_openai.assert_not_called()
+        assert client is not None
+        assert model == "task-model"
+        assert mock_openai.call_args.kwargs["api_key"] == "no-key-required"
+        assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
 
     def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
         config = {
@@ -696,15 +698,16 @@ class TestVisionClientFallback:
         assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:4567/v1"
         assert mock_openai.call_args.kwargs["api_key"] == "vision-key"
 
-    def test_vision_direct_endpoint_requires_openai_api_key(self, monkeypatch):
+    def test_vision_direct_endpoint_without_key_uses_placeholder(self, monkeypatch):
+        """Vision endpoint without API key should use 'no-key-required' placeholder."""
         monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
         monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
         monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
         with patch("agent.auxiliary_client.OpenAI") as mock_openai:
             client, model = get_vision_auxiliary_client()
-        assert client is None
-        assert model is None
-        mock_openai.assert_not_called()
+        assert client is not None
+        assert model == "vision-model"
+        assert mock_openai.call_args.kwargs["api_key"] == "no-key-required"
 
     def test_vision_uses_openrouter_when_available(self, monkeypatch):
         monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")