fix: vision auto-detection now falls back to custom/local endpoints

Vision auto-mode previously only tried OpenRouter, Nous, and Codex
for multimodal — deliberately skipping custom endpoints with the
assumption they 'may not handle vision input.' This caused silent
failures for users running local multimodal models (Qwen-VL, LLaVA,
Pixtral, etc.) without any cloud API keys.

Now custom endpoints are tried as a last resort in auto mode. If the
model doesn't support vision, the API call fails gracefully — but
users with local vision models no longer need to manually set
auxiliary.vision.provider: main in config.yaml.

Reported by @Spadav and @kotyKD.
This commit is contained in:
teknium1
2026-03-09 15:36:19 -07:00
parent 1a2141d04d
commit ef5d811aba
2 changed files with 16 additions and 8 deletions

View File

@@ -560,12 +560,16 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
forced = _get_auxiliary_provider("vision") forced = _get_auxiliary_provider("vision")
if forced != "auto": if forced != "auto":
return _resolve_forced_provider(forced) return _resolve_forced_provider(forced)
# Auto: only multimodal-capable providers # Auto: try providers known to support multimodal first, then fall
for try_fn in (_try_openrouter, _try_nous, _try_codex): # back to the user's custom endpoint. Many local models (Qwen-VL,
# LLaVA, Pixtral, etc.) support vision — skipping them entirely
# caused silent failures for local-only users.
for try_fn in (_try_openrouter, _try_nous, _try_codex,
_try_custom_endpoint):
client, model = try_fn() client, model = try_fn()
if client is not None: if client is not None:
return client, model return client, model
logger.debug("Auxiliary vision client: none available (auto only tries OpenRouter/Nous/Codex)") logger.debug("Auxiliary vision client: none available")
return None, None return None, None

View File

@@ -176,14 +176,18 @@ class TestVisionClientFallback:
assert isinstance(client, CodexAuxiliaryClient) assert isinstance(client, CodexAuxiliaryClient)
assert model == "gpt-5.3-codex" assert model == "gpt-5.3-codex"
def test_vision_auto_skips_custom_endpoint(self, monkeypatch): def test_vision_auto_falls_back_to_custom_endpoint(self, monkeypatch):
"""Custom endpoint is skipped in vision auto mode.""" """Custom endpoint is used as fallback in vision auto mode.
Many local models (Qwen-VL, LLaVA, etc.) support vision.
When no OpenRouter/Nous/Codex is available, try the custom endpoint.
"""
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1") monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
monkeypatch.setenv("OPENAI_API_KEY", "local-key") monkeypatch.setenv("OPENAI_API_KEY", "local-key")
with patch("agent.auxiliary_client._read_nous_auth", return_value=None): with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_vision_auxiliary_client() client, model = get_vision_auxiliary_client()
assert client is None assert client is not None # Custom endpoint picked up as fallback
assert model is None
def test_vision_uses_openrouter_when_available(self, monkeypatch): def test_vision_uses_openrouter_when_available(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")