fix: auxiliary client uses placeholder key for local servers without auth (#3842)

Local inference servers (Ollama, llama.cpp, vLLM, LM Studio) don't require API keys, but the auxiliary client's _resolve_custom_runtime() rejected endpoints with empty keys — causing the auto-detection chain to skip the user's local server entirely. This broke compression, summarization, and memory flush for users running local models without an OpenRouter/cloud API key. The main CLI already had this fix (PR #2556, 'no-key-required' placeholder), but the auxiliary client's resolution path was missed. Two fixes: - _resolve_custom_runtime(): use 'no-key-required' placeholder instead of returning None when base_url is present but key is empty - resolve_provider_client() custom branch: same placeholder fallback for explicit_base_url without explicit_api_key Updates 2 tests that expected the old (broken) behavior.
2026-03-29 21:05:36 -07:00
parent 2d607d36f6
commit 3cc50532d1
2 changed files with 21 additions and 12 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -627,8 +627,6 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
    custom_key = runtime.get("api_key")
    if not isinstance(custom_base, str) or not custom_base.strip():
        return None, None
-    if not isinstance(custom_key, str) or not custom_key.strip():
-        return None, None

    custom_base = custom_base.strip().rstrip("/")
    if "openrouter.ai" in custom_base.lower():
@@ -636,6 +634,13 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
        # configured. Treat that as "no custom endpoint" for auxiliary routing.
        return None, None

+    # Local servers (Ollama, llama.cpp, vLLM, LM Studio) don't require auth.
+    # Use a placeholder key — the OpenAI SDK requires a non-empty string but
+    # local servers ignore the Authorization header.  Same fix as cli.py
+    # _ensure_runtime_credentials() (PR #2556).
+    if not isinstance(custom_key, str) or not custom_key.strip():
+        custom_key = "no-key-required"
+
    return custom_base, custom_key.strip()


@@ -897,11 +902,12 @@ def resolve_provider_client(
            custom_key = (
                (explicit_api_key or "").strip()
                or os.getenv("OPENAI_API_KEY", "").strip()
+                or "no-key-required"  # local servers don't need auth
            )
-            if not custom_base or not custom_key:
+            if not custom_base:
                logger.warning(
                    "resolve_provider_client: explicit custom endpoint requested "
-                    "but no API key was found (set explicit_api_key or OPENAI_API_KEY)"
+                    "but base_url is empty"
                )
                return None, None
            final_model = model or _read_main_model() or "gpt-4o-mini"