fix: vision auto-detection now falls back to custom/local endpoints
Vision auto-mode previously only tried OpenRouter, Nous, and Codex for multimodal — deliberately skipping custom endpoints with the assumption they 'may not handle vision input.' This caused silent failures for users running local multimodal models (Qwen-VL, LLaVA, Pixtral, etc.) without any cloud API keys. Now custom endpoints are tried as a last resort in auto mode. If the model doesn't support vision, the API call fails gracefully — but users with local vision models no longer need to manually set auxiliary.vision.provider: main in config.yaml. Reported by @Spadav and @kotyKD.
This commit is contained in:
@@ -560,12 +560,16 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||||||
forced = _get_auxiliary_provider("vision")
|
forced = _get_auxiliary_provider("vision")
|
||||||
if forced != "auto":
|
if forced != "auto":
|
||||||
return _resolve_forced_provider(forced)
|
return _resolve_forced_provider(forced)
|
||||||
# Auto: only multimodal-capable providers
|
# Auto: try providers known to support multimodal first, then fall
|
||||||
for try_fn in (_try_openrouter, _try_nous, _try_codex):
|
# back to the user's custom endpoint. Many local models (Qwen-VL,
|
||||||
|
# LLaVA, Pixtral, etc.) support vision — skipping them entirely
|
||||||
|
# caused silent failures for local-only users.
|
||||||
|
for try_fn in (_try_openrouter, _try_nous, _try_codex,
|
||||||
|
_try_custom_endpoint):
|
||||||
client, model = try_fn()
|
client, model = try_fn()
|
||||||
if client is not None:
|
if client is not None:
|
||||||
return client, model
|
return client, model
|
||||||
logger.debug("Auxiliary vision client: none available (auto only tries OpenRouter/Nous/Codex)")
|
logger.debug("Auxiliary vision client: none available")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -176,14 +176,18 @@ class TestVisionClientFallback:
|
|||||||
assert isinstance(client, CodexAuxiliaryClient)
|
assert isinstance(client, CodexAuxiliaryClient)
|
||||||
assert model == "gpt-5.3-codex"
|
assert model == "gpt-5.3-codex"
|
||||||
|
|
||||||
def test_vision_auto_skips_custom_endpoint(self, monkeypatch):
|
def test_vision_auto_falls_back_to_custom_endpoint(self, monkeypatch):
|
||||||
"""Custom endpoint is skipped in vision auto mode."""
|
"""Custom endpoint is used as fallback in vision auto mode.
|
||||||
|
|
||||||
|
Many local models (Qwen-VL, LLaVA, etc.) support vision.
|
||||||
|
When no OpenRouter/Nous/Codex is available, try the custom endpoint.
|
||||||
|
"""
|
||||||
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
|
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
|
||||||
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
||||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||||
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||||
client, model = get_vision_auxiliary_client()
|
client, model = get_vision_auxiliary_client()
|
||||||
assert client is None
|
assert client is not None # Custom endpoint picked up as fallback
|
||||||
assert model is None
|
|
||||||
|
|
||||||
def test_vision_uses_openrouter_when_available(self, monkeypatch):
|
def test_vision_uses_openrouter_when_available(self, monkeypatch):
|
||||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
||||||
|
|||||||
Reference in New Issue
Block a user