refactor: unify vision backend gating

2026-03-14 20:22:13 -07:00
parent 799114ac8b
commit dc11b86e4b
7 changed files with 292 additions and 171 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -768,48 +768,107 @@ def get_async_text_auxiliary_client(task: str = ""):
    return resolve_provider_client("auto", async_mode=True)


-def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
-    """Return (client, default_model_slug) for vision/multimodal auxiliary tasks.
+_VISION_AUTO_PROVIDER_ORDER = (
+    "openrouter",
+    "nous",
+    "openai-codex",
+    "custom",
+)

-    Checks AUXILIARY_VISION_PROVIDER for a forced provider, otherwise
-    auto-detects.  Callers may override the returned model with
-    AUXILIARY_VISION_MODEL.

-    In auto mode, only providers known to support multimodal are tried:
-    OpenRouter, Nous Portal, and Codex OAuth (gpt-5.3-codex supports
-    vision via the Responses API).  Custom endpoints and API-key
-    providers are skipped — they may not handle vision input.  To use
-    them, set AUXILIARY_VISION_PROVIDER explicitly.
-    """
-    forced = _get_auxiliary_provider("vision")
-    if forced != "auto":
-        return resolve_provider_client(forced)
-    # Auto: try providers known to support multimodal first, then fall
-    # back to the user's custom endpoint.  Many local models (Qwen-VL,
-    # LLaVA, Pixtral, etc.) support vision — skipping them entirely
-    # caused silent failures for local-only users.
-    for try_fn in (_try_openrouter, _try_nous, _try_codex,
-                   _try_custom_endpoint):
-        client, model = try_fn()
-        if client is not None:
-            return client, model
-    logger.debug("Auxiliary vision client: none available")
+def _normalize_vision_provider(provider: Optional[str]) -> str:
+    provider = (provider or "auto").strip().lower()
+    if provider == "codex":
+        return "openai-codex"
+    if provider == "main":
+        return "custom"
+    return provider
+
+
+def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
+    provider = _normalize_vision_provider(provider)
+    if provider == "openrouter":
+        return _try_openrouter()
+    if provider == "nous":
+        return _try_nous()
+    if provider == "openai-codex":
+        return _try_codex()
+    if provider == "custom":
+        return _try_custom_endpoint()
    return None, None


-def get_async_vision_auxiliary_client():
-    """Return (async_client, model_slug) for async vision consumers.
+def _strict_vision_backend_available(provider: str) -> bool:
+    return _resolve_strict_vision_backend(provider)[0] is not None

-    Properly handles Codex routing — unlike manually constructing
-    AsyncOpenAI from a sync client, this preserves the Responses API
-    adapter for Codex providers.

-    Returns (None, None) when no provider is available.
+def get_available_vision_backends() -> List[str]:
+    """Return the currently available vision backends in auto-selection order.
+
+    This is the single source of truth for setup, tool gating, and runtime
+    auto-routing of vision tasks. Phase 1 keeps the auto list conservative:
+    OpenRouter, Nous Portal, Codex OAuth, then custom OpenAI-compatible
+    endpoints. Explicit provider overrides can still route elsewhere.
    """
-    sync_client, model = get_vision_auxiliary_client()
-    if sync_client is None:
-        return None, None
-    return _to_async_client(sync_client, model)
+    return [
+        provider
+        for provider in _VISION_AUTO_PROVIDER_ORDER
+        if _strict_vision_backend_available(provider)
+    ]
+
+
+def resolve_vision_provider_client(
+    provider: Optional[str] = None,
+    model: Optional[str] = None,
+    *,
+    async_mode: bool = False,
+) -> Tuple[Optional[str], Optional[Any], Optional[str]]:
+    """Resolve the client actually used for vision tasks.
+
+    Explicit provider overrides still use the generic provider router for
+    non-standard backends, so users can intentionally force experimental
+    providers. Auto mode stays conservative and only tries vision backends
+    known to work today.
+    """
+    requested = _normalize_vision_provider(provider or _get_auxiliary_provider("vision"))
+
+    def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[str]):
+        if sync_client is None:
+            return resolved_provider, None, None
+        final_model = model or default_model
+        if async_mode:
+            async_client, async_model = _to_async_client(sync_client, final_model)
+            return resolved_provider, async_client, async_model
+        return resolved_provider, sync_client, final_model
+
+    if requested == "auto":
+        for candidate in get_available_vision_backends():
+            sync_client, default_model = _resolve_strict_vision_backend(candidate)
+            if sync_client is not None:
+                return _finalize(candidate, sync_client, default_model)
+        logger.debug("Auxiliary vision client: none available")
+        return None, None, None
+
+    if requested in _VISION_AUTO_PROVIDER_ORDER:
+        sync_client, default_model = _resolve_strict_vision_backend(requested)
+        return _finalize(requested, sync_client, default_model)
+
+    client, final_model = _get_cached_client(requested, model, async_mode)
+    if client is None:
+        return requested, None, None
+    return requested, client, final_model
+
+
+def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
+    """Return (client, default_model_slug) for vision/multimodal auxiliary tasks."""
+    _, client, final_model = resolve_vision_provider_client(async_mode=False)
+    return client, final_model
+
+
+def get_async_vision_auxiliary_client():
+    """Return (async_client, model_slug) for async vision consumers."""
+    _, client, final_model = resolve_vision_provider_client(async_mode=True)
+    return client, final_model


 def get_auxiliary_extra_body() -> dict:
@@ -1010,18 +1069,41 @@ def call_llm(
    resolved_provider, resolved_model = _resolve_task_provider_model(
        task, provider, model)

-    client, final_model = _get_cached_client(resolved_provider, resolved_model)
-    if client is None:
-        # Fallback: try openrouter
-        if resolved_provider != "openrouter":
-            logger.warning("Provider %s unavailable, falling back to openrouter",
-                           resolved_provider)
-            client, final_model = _get_cached_client(
-                "openrouter", resolved_model or _OPENROUTER_MODEL)
-    if client is None:
-        raise RuntimeError(
-            f"No LLM provider configured for task={task} provider={resolved_provider}. "
-            f"Run: hermes setup")
+    if task == "vision":
+        effective_provider, client, final_model = resolve_vision_provider_client(
+            provider=resolved_provider,
+            model=resolved_model,
+            async_mode=False,
+        )
+        if client is None and resolved_provider != "auto":
+            logger.warning(
+                "Vision provider %s unavailable, falling back to auto vision backends",
+                resolved_provider,
+            )
+            effective_provider, client, final_model = resolve_vision_provider_client(
+                provider="auto",
+                model=resolved_model,
+                async_mode=False,
+            )
+        if client is None:
+            raise RuntimeError(
+                f"No LLM provider configured for task={task} provider={resolved_provider}. "
+                f"Run: hermes setup"
+            )
+        resolved_provider = effective_provider or resolved_provider
+    else:
+        client, final_model = _get_cached_client(resolved_provider, resolved_model)
+        if client is None:
+            # Fallback: try openrouter
+            if resolved_provider != "openrouter":
+                logger.warning("Provider %s unavailable, falling back to openrouter",
+                               resolved_provider)
+                client, final_model = _get_cached_client(
+                    "openrouter", resolved_model or _OPENROUTER_MODEL)
+        if client is None:
+            raise RuntimeError(
+                f"No LLM provider configured for task={task} provider={resolved_provider}. "
+                f"Run: hermes setup")

    kwargs = _build_call_kwargs(
        resolved_provider, final_model, messages,
@@ -1059,19 +1141,42 @@ async def async_call_llm(
    resolved_provider, resolved_model = _resolve_task_provider_model(
        task, provider, model)

-    client, final_model = _get_cached_client(
-        resolved_provider, resolved_model, async_mode=True)
-    if client is None:
-        if resolved_provider != "openrouter":
-            logger.warning("Provider %s unavailable, falling back to openrouter",
-                           resolved_provider)
-            client, final_model = _get_cached_client(
-                "openrouter", resolved_model or _OPENROUTER_MODEL,
-                async_mode=True)
-    if client is None:
-        raise RuntimeError(
-            f"No LLM provider configured for task={task} provider={resolved_provider}. "
-            f"Run: hermes setup")
+    if task == "vision":
+        effective_provider, client, final_model = resolve_vision_provider_client(
+            provider=resolved_provider,
+            model=resolved_model,
+            async_mode=True,
+        )
+        if client is None and resolved_provider != "auto":
+            logger.warning(
+                "Vision provider %s unavailable, falling back to auto vision backends",
+                resolved_provider,
+            )
+            effective_provider, client, final_model = resolve_vision_provider_client(
+                provider="auto",
+                model=resolved_model,
+                async_mode=True,
+            )
+        if client is None:
+            raise RuntimeError(
+                f"No LLM provider configured for task={task} provider={resolved_provider}. "
+                f"Run: hermes setup"
+            )
+        resolved_provider = effective_provider or resolved_provider
+    else:
+        client, final_model = _get_cached_client(
+            resolved_provider, resolved_model, async_mode=True)
+        if client is None:
+            if resolved_provider != "openrouter":
+                logger.warning("Provider %s unavailable, falling back to openrouter",
+                               resolved_provider)
+                client, final_model = _get_cached_client(
+                    "openrouter", resolved_model or _OPENROUTER_MODEL,
+                    async_mode=True)
+        if client is None:
+            raise RuntimeError(
+                f"No LLM provider configured for task={task} provider={resolved_provider}. "
+                f"Run: hermes setup")

    kwargs = _build_call_kwargs(
        resolved_provider, final_model, messages,
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -460,33 +460,15 @@ def _print_setup_summary(config: dict, hermes_home):

    tool_status = []

-    # Vision — works with OpenRouter, Nous OAuth, Codex OAuth, or OpenAI endpoint
-    _has_vision = False
-    if get_env_value("OPENROUTER_API_KEY"):
-        _has_vision = True
-    else:
-        try:
-            _vauth_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "auth.json"
-            if _vauth_path.is_file():
-                import json as _vjson
+    # Vision — use the same runtime resolver as the actual vision tools
+    try:
+        from agent.auxiliary_client import get_available_vision_backends

-                _vauth = _vjson.loads(_vauth_path.read_text())
-                if _vauth.get("active_provider") == "nous":
-                    _np = _vauth.get("providers", {}).get("nous", {})
-                    if _np.get("agent_key") or _np.get("access_token"):
-                        _has_vision = True
-                elif _vauth.get("active_provider") == "openai-codex":
-                    _cp = _vauth.get("providers", {}).get("openai-codex", {})
-                    if _cp.get("tokens", {}).get("access_token"):
-                        _has_vision = True
-        except Exception:
-            pass
-    if not _has_vision:
-        _oai_base = get_env_value("OPENAI_BASE_URL") or ""
-        if get_env_value("OPENAI_API_KEY") and "api.openai.com" in _oai_base.lower():
-            _has_vision = True
+        _vision_backends = get_available_vision_backends()
+    except Exception:
+        _vision_backends = []

-    if _has_vision:
+    if _vision_backends:
        tool_status.append(("Vision (image analysis)", True, None))
    else:
        tool_status.append(("Vision (image analysis)", False, "run 'hermes setup' to configure"))
@@ -1276,58 +1258,22 @@ def setup_model_provider(config: dict):
            selected_provider = "openrouter"

    # ── Vision & Image Analysis Setup ──
-    # Vision requires a multimodal-capable provider. Check whether the user's
-    # chosen provider already covers it — if so, skip the prompt entirely.
-    _vision_needs_setup = True
+    # Keep setup aligned with the actual runtime resolver the vision tools use.
+    try:
+        from agent.auxiliary_client import get_available_vision_backends

-    if selected_provider == "openrouter":
-        # OpenRouter → Gemini for vision, already configured
-        _vision_needs_setup = False
-    elif selected_provider == "nous":
-        # Nous Portal OAuth → Gemini via Nous, already configured
-        _vision_needs_setup = False
-    elif selected_provider == "openai-codex":
-        # Codex OAuth → gpt-5.3-codex supports vision
-        _vision_needs_setup = False
-    elif selected_provider == "custom":
-        _custom_base = (get_env_value("OPENAI_BASE_URL") or "").lower()
-        if "api.openai.com" in _custom_base:
-            # Direct OpenAI endpoint — show vision model picker
-            print()
-            print_header("Vision Model")
-            print_info("Your OpenAI endpoint supports vision. Pick a model for image analysis:")
-            _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
-            _vm_choices = _oai_vision_models + ["Keep default (gpt-4o-mini)"]
-            _vm_idx = prompt_choice("Select vision model:", _vm_choices, len(_vm_choices) - 1)
-            _selected_vision_model = (
-                _oai_vision_models[_vm_idx]
-                if _vm_idx < len(_oai_vision_models)
-                else "gpt-4o-mini"
-            )
-            save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
-            print_success(f"Vision model set to {_selected_vision_model}")
-            _vision_needs_setup = False
+        _vision_backends = set(get_available_vision_backends())
+    except Exception:
+        _vision_backends = set()

-    # Even for providers without native vision, check if existing credentials
-    # from a previous setup already cover it (e.g. user had OpenRouter before
-    # switching to z.ai)
-    if _vision_needs_setup:
-        if get_env_value("OPENROUTER_API_KEY"):
-            _vision_needs_setup = False
-        else:
-            # Check for Nous Portal OAuth in auth.json
-            try:
-                _auth_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "auth.json"
-                if _auth_path.is_file():
-                    import json as _json
+    _vision_needs_setup = not bool(_vision_backends)

-                    _auth_data = _json.loads(_auth_path.read_text())
-                    if _auth_data.get("active_provider") == "nous":
-                        _nous_p = _auth_data.get("providers", {}).get("nous", {})
-                        if _nous_p.get("agent_key") or _nous_p.get("access_token"):
-                            _vision_needs_setup = False
-            except Exception:
-                pass
+    if selected_provider in {"openrouter", "nous", "openai-codex"}:
+        # If the user just selected one of our known-good vision backends during
+        # setup, treat vision as covered. Auth/setup failure returns earlier.
+        _vision_needs_setup = False
+    elif selected_provider == "custom" and "custom" in _vision_backends:
+        _vision_needs_setup = False

    if _vision_needs_setup:
        _prov_names = {
@@ -1343,44 +1289,54 @@ def setup_model_provider(config: dict):

        print()
        print_header("Vision & Image Analysis (optional)")
-        print_info(f"Vision requires a multimodal-capable provider. {_prov_display}")
-        print_info("doesn't natively support it. Choose how to enable vision,")
-        print_info("or skip to configure later.")
+        print_info(f"Vision uses a separate multimodal backend. {_prov_display}")
+        print_info("doesn't currently provide one Hermes can auto-use for vision,")
+        print_info("so choose a backend now or skip and configure later.")
        print()

        _vision_choices = [
            "OpenRouter — uses Gemini (free tier at openrouter.ai/keys)",
-            "OpenAI — enter API key & choose a vision model",
+            "OpenAI-compatible endpoint — base URL, API key, and vision model",
            "Skip for now",
        ]
        _vision_idx = prompt_choice("Configure vision:", _vision_choices, 2)

        if _vision_idx == 0:  # OpenRouter
-            _or_key = prompt("  OpenRouter API key", password=True)
+            _or_key = prompt("  OpenRouter API key", password=True).strip()
            if _or_key:
                save_env_value("OPENROUTER_API_KEY", _or_key)
                print_success("OpenRouter key saved — vision will use Gemini")
            else:
                print_info("Skipped — vision won't be available")
-        elif _vision_idx == 1:  # OpenAI
-            _oai_key = prompt("  OpenAI API key", password=True)
+        elif _vision_idx == 1:  # OpenAI-compatible endpoint
+            _base_url = prompt("  Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
+            _api_key_label = "  API key"
+            if "api.openai.com" in _base_url.lower():
+                _api_key_label = "  OpenAI API key"
+            _oai_key = prompt(_api_key_label, password=True).strip()
            if _oai_key:
                save_env_value("OPENAI_API_KEY", _oai_key)
-                save_env_value("OPENAI_BASE_URL", "https://api.openai.com/v1")
-                _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
-                _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
-                _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
-                _selected_vision_model = (
-                    _oai_vision_models[_vm_idx]
-                    if _vm_idx < len(_oai_vision_models)
-                    else "gpt-4o-mini"
-                )
+                save_env_value("OPENAI_BASE_URL", _base_url)
+                if "api.openai.com" in _base_url.lower():
+                    _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
+                    _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
+                    _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
+                    _selected_vision_model = (
+                        _oai_vision_models[_vm_idx]
+                        if _vm_idx < len(_oai_vision_models)
+                        else "gpt-4o-mini"
+                    )
+                else:
+                    _selected_vision_model = prompt("  Vision model (blank = use main/custom default)").strip()
                save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
-                print_success(f"Vision configured with OpenAI ({_selected_vision_model})")
+                print_success(
+                    f"Vision configured with {_base_url}"
+                    + (f" ({_selected_vision_model})" if _selected_vision_model else "")
+                )
            else:
                print_info("Skipped — vision won't be available")
        else:
-            print_info("Skipped — add later with 'hermes config set OPENROUTER_API_KEY ...'")
+            print_info("Skipped — add later with 'hermes setup' or configure AUXILIARY_VISION_* settings")

    # ── Model Selection (adapts based on provider) ──
    if selected_provider != "custom":  # Custom already prompted for model name
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -362,14 +362,21 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[

 def _toolset_has_keys(ts_key: str) -> bool:
    """Check if a toolset's required API keys are configured."""
+    if ts_key == "vision":
+        try:
+            from agent.auxiliary_client import resolve_vision_provider_client
+
+            _provider, client, _model = resolve_vision_provider_client()
+            return client is not None
+        except Exception:
+            return False
+
    # Check TOOL_CATEGORIES first (provider-aware)
    cat = TOOL_CATEGORIES.get(ts_key)
    if cat:
-        for provider in cat["providers"]:
+        for provider in cat.get("providers", []):
            env_vars = provider.get("env_vars", [])
-            if not env_vars:
-                return True  # Free provider (e.g., Edge TTS)
-            if all(get_env_value(v["key"]) for v in env_vars):
+            if env_vars and all(get_env_value(e["key"]) for e in env_vars):
                return True
        return False

@@ -628,6 +635,39 @@ def _configure_provider(provider: dict, config: dict):

 def _configure_simple_requirements(ts_key: str):
    """Simple fallback for toolsets that just need env vars (no provider selection)."""
+    if ts_key == "vision":
+        if _toolset_has_keys("vision"):
+            return
+        print()
+        print(color("  Vision / Image Analysis requires a multimodal backend:", Colors.YELLOW))
+        choices = [
+            "OpenRouter — uses Gemini",
+            "OpenAI-compatible endpoint — base URL, API key, and vision model",
+            "Skip",
+        ]
+        idx = _prompt_choice("  Configure vision backend", choices, 2)
+        if idx == 0:
+            _print_info("  Get key at: https://openrouter.ai/keys")
+            value = _prompt("    OPENROUTER_API_KEY", password=True)
+            if value and value.strip():
+                save_env_value("OPENROUTER_API_KEY", value.strip())
+                _print_success("    Saved")
+            else:
+                _print_warning("    Skipped")
+        elif idx == 1:
+            base_url = _prompt("    OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
+            key_label = "    OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else "    API key"
+            api_key = _prompt(key_label, password=True)
+            if api_key and api_key.strip():
+                save_env_value("OPENAI_BASE_URL", base_url)
+                save_env_value("OPENAI_API_KEY", api_key.strip())
+                if "api.openai.com" in base_url.lower():
+                    save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini")
+                _print_success("    Saved")
+            else:
+                _print_warning("    Skipped")
+        return
+
    requirements = TOOLSET_ENV_REQUIREMENTS.get(ts_key, [])
    if not requirements:
        return
--- a/tests/hermes_cli/test_setup_model_provider.py
+++ b/tests/hermes_cli/test_setup_model_provider.py
@@ -39,6 +39,8 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m
    """Keep-current custom should not fall through to the generic model menu."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _clear_provider_env(monkeypatch)
+    save_env_value("OPENAI_BASE_URL", "https://example.invalid/v1")
+    save_env_value("OPENAI_API_KEY", "custom-key")

    config = load_config()
    config["model"] = {
@@ -55,10 +57,6 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m
        if calls["count"] == 1:
            assert choices[-1] == "Keep current (Custom: https://example.invalid/v1)"
            return len(choices) - 1
-        if calls["count"] == 2:
-            assert question == "Configure vision:"
-            assert choices[-1] == "Skip for now"
-            return len(choices) - 1
        raise AssertionError("Model menu should not appear for keep-current custom")

    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
@@ -74,7 +72,7 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m
    assert reloaded["model"]["provider"] == "custom"
    assert reloaded["model"]["default"] == "custom/model"
    assert reloaded["model"]["base_url"] == "https://example.invalid/v1"
-    assert calls["count"] == 2
+    assert calls["count"] == 1


 def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tmp_path, monkeypatch):
@@ -214,7 +212,7 @@ def test_setup_summary_marks_codex_auth_as_vision_available(tmp_path, monkeypatc
    _clear_provider_env(monkeypatch)

    (tmp_path / "auth.json").write_text(
-        '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token":"tok"}}}}'
+        '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token": "***", "refresh_token": "***"}}}}'
    )

    monkeypatch.setattr("shutil.which", lambda _name: None)
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -1,6 +1,6 @@
 """Tests for hermes_cli.tools_config platform tool persistence."""

-from hermes_cli.tools_config import _get_platform_tools, _platform_toolset_summary
+from hermes_cli.tools_config import _get_platform_tools, _platform_toolset_summary, _toolset_has_keys


 def test_get_platform_tools_uses_default_when_platform_not_configured():
@@ -26,3 +26,17 @@ def test_platform_toolset_summary_uses_explicit_platform_list():

    assert set(summary.keys()) == {"cli"}
    assert summary["cli"] == _get_platform_tools(config, "cli")
+
+
+def test_toolset_has_keys_for_vision_accepts_codex_auth(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / "auth.json").write_text(
+        '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token":"codex-access-token","refresh_token":"codex-refresh-token"}}}}'
+    )
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("AUXILIARY_VISION_PROVIDER", raising=False)
+    monkeypatch.delenv("CONTEXT_VISION_PROVIDER", raising=False)
+
+    assert _toolset_has_keys("vision") is True
--- a/tests/tools/test_vision_tools.py
+++ b/tests/tools/test_vision_tools.py
@@ -351,6 +351,19 @@ class TestVisionRequirements:
        result = check_vision_requirements()
        assert isinstance(result, bool)

+    def test_check_requirements_accepts_codex_auth(self, monkeypatch, tmp_path):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        (tmp_path / "auth.json").write_text(
+            '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token":"codex-access-token","refresh_token":"codex-refresh-token"}}}}'
+        )
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.delenv("AUXILIARY_VISION_PROVIDER", raising=False)
+        monkeypatch.delenv("CONTEXT_VISION_PROVIDER", raising=False)
+
+        assert check_vision_requirements() is True
+
    def test_debug_session_info_returns_dict(self):
        info = get_debug_session_info()
        assert isinstance(info, dict)
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -377,16 +377,11 @@ async def vision_analyze_tool(


 def check_vision_requirements() -> bool:
-    """Check if an auxiliary vision model is available."""
+    """Check if the configured runtime vision path can resolve a client."""
    try:
-        from agent.auxiliary_client import resolve_provider_client
-        client, _ = resolve_provider_client("openrouter")
-        if client is not None:
-            return True
-        client, _ = resolve_provider_client("nous")
-        if client is not None:
-            return True
-        client, _ = resolve_provider_client("custom")
+        from agent.auxiliary_client import resolve_vision_provider_client
+
+        _provider, client, _model = resolve_vision_provider_client()
        return client is not None
    except Exception:
        return False