fix: harden auxiliary model config — gateway bridge, vision safety, tests

Improvements on top of PR #606 (auxiliary model configuration): 1. Gateway bridge: Added auxiliary.* and compression.summary_provider config bridging to gateway/run.py so config.yaml settings work from messaging platforms (not just CLI). Matches the pattern in cli.py. 2. Vision auto-fallback safety: In auto mode, vision now only tries OpenRouter + Nous Portal (known multimodal-capable providers). Custom endpoints, Codex, and API-key providers are skipped to avoid confusing errors from providers that don't support vision input. Explicit provider override (AUXILIARY_VISION_PROVIDER=main) still allows using any provider. 3. Comprehensive tests (46 new): - _get_auxiliary_provider env var resolution (8 tests) - _resolve_forced_provider with all provider types (8 tests) - Per-task provider routing integration (4 tests) - Vision auto-fallback safety (7 tests) - Config bridging logic (11 tests) - Gateway/CLI bridge parity (2 tests) - Vision model override via env var (2 tests) - DEFAULT_CONFIG shape validation (4 tests) 4. Docs: Added auxiliary_client.py to AGENTS.md project structure. Updated module docstring with separate text/vision resolution chains. Tests: 2429 passed (was 2383).
2026-03-08 18:06:40 -07:00
parent d9f373654b
commit 5ae0b731d0
5 changed files with 534 additions and 5 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -16,6 +16,7 @@ source venv/bin/activate  # Before running any Python commands
 ```
 hermes-agent/
 ├── agent/                # Agent internals (extracted from run_agent.py)
+│   ├── auxiliary_client.py   # Shared auxiliary OpenAI client (vision, compression, web extract)
 │   ├── model_metadata.py     # Model context lengths, token estimation
 │   ├── context_compressor.py # Auto context compression
 │   ├── prompt_caching.py     # Anthropic prompt caching
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -4,7 +4,7 @@ Provides a single resolution chain so every consumer (context compression,
 session search, web extraction, vision analysis, browser vision) picks up
 the best available backend without duplicating fallback logic.

-Resolution order (same for text and vision tasks):
+Resolution order for text tasks (auto mode):
  1. OpenRouter  (OPENROUTER_API_KEY)
  2. Nous Portal (~/.hermes/auth.json active provider)
  3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
@@ -14,10 +14,19 @@ Resolution order (same for text and vision tasks):
     — checked via PROVIDER_REGISTRY entries with auth_type='api_key'
  6. None

+Resolution order for vision/multimodal tasks (auto mode):
+  1. OpenRouter
+  2. Nous Portal
+  3. None  (steps 3-5 are skipped — they may not support multimodal)
+
 Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER,
 CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task:
 "openrouter", "nous", or "main" (= steps 3-5).
-Default "auto" follows the full chain above.
+Default "auto" follows the chains above.
+
+Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
+AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
+than the provider's default.
 """

 import json
@@ -485,11 +494,23 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
    Checks AUXILIARY_VISION_PROVIDER for a forced provider, otherwise
    auto-detects.  Callers may override the returned model with
    AUXILIARY_VISION_MODEL.
+
+    In auto mode, only OpenRouter and Nous Portal are tried because they
+    are known to support multimodal (Gemini).  Custom endpoints, Codex,
+    and API-key providers are skipped — they may not handle vision input
+    and would produce confusing errors.  To use one of those providers
+    for vision, set AUXILIARY_VISION_PROVIDER explicitly.
    """
    forced = _get_auxiliary_provider("vision")
    if forced != "auto":
        return _resolve_forced_provider(forced)
-    return _resolve_auto()
+    # Auto: only multimodal-capable providers
+    for try_fn in (_try_openrouter, _try_nous):
+        client, model = try_fn()
+        if client is not None:
+            return client, model
+    logger.debug("Auxiliary vision client: none available (auto only tries OpenRouter/Nous)")
+    return None, None


 def get_auxiliary_extra_body() -> dict:
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -86,10 +86,29 @@ if _config_path.exists():
                "enabled": "CONTEXT_COMPRESSION_ENABLED",
                "threshold": "CONTEXT_COMPRESSION_THRESHOLD",
                "summary_model": "CONTEXT_COMPRESSION_MODEL",
+                "summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
            }
            for _cfg_key, _env_var in _compression_env_map.items():
                if _cfg_key in _compression_cfg:
                    os.environ[_env_var] = str(_compression_cfg[_cfg_key])
+        # Auxiliary model overrides (vision, web_extract).
+        # Each task has provider + model; bridge non-default values to env vars.
+        _auxiliary_cfg = _cfg.get("auxiliary", {})
+        if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
+            _aux_task_env = {
+                "vision":      ("AUXILIARY_VISION_PROVIDER",      "AUXILIARY_VISION_MODEL"),
+                "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER",  "AUXILIARY_WEB_EXTRACT_MODEL"),
+            }
+            for _task_key, (_prov_env, _model_env) in _aux_task_env.items():
+                _task_cfg = _auxiliary_cfg.get(_task_key, {})
+                if not isinstance(_task_cfg, dict):
+                    continue
+                _prov = str(_task_cfg.get("provider", "")).strip()
+                _model = str(_task_cfg.get("model", "")).strip()
+                if _prov and _prov != "auto":
+                    os.environ[_prov_env] = _prov
+                if _model:
+                    os.environ[_model_env] = _model
        _agent_cfg = _cfg.get("agent", {})
        if _agent_cfg and isinstance(_agent_cfg, dict):
            if "max_turns" in _agent_cfg:
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -1,4 +1,4 @@
-"""Tests for agent.auxiliary_client resolution chain, especially the Codex fallback."""
+"""Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides."""

 import json
 import os
@@ -12,6 +12,9 @@ from agent.auxiliary_client import (
    get_vision_auxiliary_client,
    auxiliary_max_tokens_param,
    _read_codex_access_token,
+    _get_auxiliary_provider,
+    _resolve_forced_provider,
+    _resolve_auto,
 )


@@ -21,6 +24,10 @@ def _clean_env(monkeypatch):
    for key in (
        "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
        "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
+        # Per-task provider/model overrides
+        "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
+        "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
+        "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
    ):
        monkeypatch.delenv(key, raising=False)

@@ -152,7 +159,7 @@ class TestGetTextAuxiliaryClient:


 class TestVisionClientFallback:
-    """Vision client uses the same full fallback chain as text."""
+    """Vision client auto mode only tries OpenRouter + Nous (multimodal-capable)."""

    def test_vision_returns_none_without_any_credentials(self):
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
@@ -160,6 +167,195 @@ class TestVisionClientFallback:
        assert client is None
        assert model is None

+    def test_vision_auto_skips_codex(self, codex_auth_dir):
+        """Even with Codex available, vision auto mode returns None (Codex can't do multimodal)."""
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
+            client, model = get_vision_auxiliary_client()
+        assert client is None
+        assert model is None
+
+    def test_vision_auto_skips_custom_endpoint(self, monkeypatch):
+        """Custom endpoint is skipped in vision auto mode."""
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
+            client, model = get_vision_auxiliary_client()
+        assert client is None
+        assert model is None
+
+    def test_vision_uses_openrouter_when_available(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_vision_auxiliary_client()
+        assert model == "google/gemini-3-flash-preview"
+        assert client is not None
+
+    def test_vision_uses_nous_when_available(self, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
+             patch("agent.auxiliary_client.OpenAI"):
+            mock_nous.return_value = {"access_token": "nous-tok"}
+            client, model = get_vision_auxiliary_client()
+        assert model == "gemini-3-flash"
+        assert client is not None
+
+    def test_vision_forced_main_uses_custom_endpoint(self, monkeypatch):
+        """When explicitly forced to 'main', vision CAN use custom endpoint."""
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_vision_auxiliary_client()
+        assert client is not None
+        assert model == "gpt-4o-mini"
+
+    def test_vision_forced_main_returns_none_without_creds(self, monkeypatch):
+        """Forced main with no credentials still returns None."""
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            client, model = get_vision_auxiliary_client()
+        assert client is None
+        assert model is None
+
+
+class TestGetAuxiliaryProvider:
+    """Tests for _get_auxiliary_provider env var resolution."""
+
+    def test_no_task_returns_auto(self):
+        assert _get_auxiliary_provider() == "auto"
+        assert _get_auxiliary_provider("") == "auto"
+
+    def test_auxiliary_prefix_takes_priority(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "openrouter")
+        assert _get_auxiliary_provider("vision") == "openrouter"
+
+    def test_context_prefix_fallback(self, monkeypatch):
+        monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
+        assert _get_auxiliary_provider("compression") == "nous"
+
+    def test_auxiliary_prefix_over_context_prefix(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_COMPRESSION_PROVIDER", "openrouter")
+        monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
+        assert _get_auxiliary_provider("compression") == "openrouter"
+
+    def test_auto_value_treated_as_auto(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "auto")
+        assert _get_auxiliary_provider("vision") == "auto"
+
+    def test_whitespace_stripped(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "  openrouter  ")
+        assert _get_auxiliary_provider("vision") == "openrouter"
+
+    def test_case_insensitive(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "OpenRouter")
+        assert _get_auxiliary_provider("vision") == "openrouter"
+
+    def test_main_provider(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "main")
+        assert _get_auxiliary_provider("web_extract") == "main"
+
+
+class TestResolveForcedProvider:
+    """Tests for _resolve_forced_provider with explicit provider selection."""
+
+    def test_forced_openrouter(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = _resolve_forced_provider("openrouter")
+        assert model == "google/gemini-3-flash-preview"
+        assert client is not None
+
+    def test_forced_openrouter_no_key(self, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
+            client, model = _resolve_forced_provider("openrouter")
+        assert client is None
+        assert model is None
+
+    def test_forced_nous(self, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
+             patch("agent.auxiliary_client.OpenAI"):
+            mock_nous.return_value = {"access_token": "nous-tok"}
+            client, model = _resolve_forced_provider("nous")
+        assert model == "gemini-3-flash"
+        assert client is not None
+
+    def test_forced_nous_not_configured(self, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
+            client, model = _resolve_forced_provider("nous")
+        assert client is None
+        assert model is None
+
+    def test_forced_main_uses_custom(self, monkeypatch):
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = _resolve_forced_provider("main")
+        assert model == "gpt-4o-mini"
+
+    def test_forced_main_skips_openrouter_nous(self, monkeypatch):
+        """Even if OpenRouter key is set, 'main' skips it."""
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = _resolve_forced_provider("main")
+        # Should use custom endpoint, not OpenRouter
+        assert model == "gpt-4o-mini"
+
+    def test_forced_main_falls_to_codex(self, codex_auth_dir, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI"):
+            client, model = _resolve_forced_provider("main")
+        from agent.auxiliary_client import CodexAuxiliaryClient
+        assert isinstance(client, CodexAuxiliaryClient)
+        assert model == "gpt-5.3-codex"
+
+    def test_forced_unknown_returns_none(self, monkeypatch):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            client, model = _resolve_forced_provider("invalid-provider")
+        assert client is None
+        assert model is None
+
+
+class TestTaskSpecificOverrides:
+    """Integration tests for per-task provider routing via get_text_auxiliary_client(task=...)."""
+
+    def test_text_with_vision_provider_override(self, monkeypatch):
+        """AUXILIARY_VISION_PROVIDER should not affect text tasks."""
+        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "nous")
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI"):
+            client, model = get_text_auxiliary_client()  # no task → auto
+        assert model == "google/gemini-3-flash-preview"  # OpenRouter, not Nous
+
+    def test_compression_task_reads_context_prefix(self, monkeypatch):
+        """Compression task should check CONTEXT_COMPRESSION_PROVIDER."""
+        monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")  # would win in auto
+        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
+             patch("agent.auxiliary_client.OpenAI"):
+            mock_nous.return_value = {"access_token": "nous-tok"}
+            client, model = get_text_auxiliary_client("compression")
+        assert model == "gemini-3-flash"  # forced to Nous, not OpenRouter
+
+    def test_web_extract_task_override(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "openrouter")
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI"):
+            client, model = get_text_auxiliary_client("web_extract")
+        assert model == "google/gemini-3-flash-preview"
+
+    def test_task_without_override_uses_auto(self, monkeypatch):
+        """A task with no provider env var falls through to auto chain."""
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI"):
+            client, model = get_text_auxiliary_client("compression")
+        assert model == "google/gemini-3-flash-preview"  # auto → OpenRouter
+

 class TestAuxiliaryMaxTokensParam:
    def test_codex_fallback_uses_max_tokens(self, monkeypatch):
--- a/tests/test_auxiliary_config_bridge.py
+++ b/tests/test_auxiliary_config_bridge.py
@@ -0,0 +1,292 @@
+"""Tests for auxiliary model config bridging — verifies that config.yaml values
+are properly mapped to environment variables by both CLI and gateway loaders.
+
+Also tests the vision_tools and browser_tool model override env vars.
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+import yaml
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+
+def _run_auxiliary_bridge(config_dict, monkeypatch):
+    """Simulate the auxiliary config → env var bridging logic shared by CLI and gateway.
+
+    This mirrors the code in cli.py load_cli_config() and gateway/run.py.
+    Both use the same pattern; we test it once here.
+    """
+    # Clear env vars
+    for key in (
+        "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
+        "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
+        "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+    # Compression bridge
+    compression_cfg = config_dict.get("compression", {})
+    if compression_cfg and isinstance(compression_cfg, dict):
+        compression_env_map = {
+            "enabled": "CONTEXT_COMPRESSION_ENABLED",
+            "threshold": "CONTEXT_COMPRESSION_THRESHOLD",
+            "summary_model": "CONTEXT_COMPRESSION_MODEL",
+            "summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
+        }
+        for cfg_key, env_var in compression_env_map.items():
+            if cfg_key in compression_cfg:
+                os.environ[env_var] = str(compression_cfg[cfg_key])
+
+    # Auxiliary bridge
+    auxiliary_cfg = config_dict.get("auxiliary", {})
+    if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
+        aux_task_env = {
+            "vision":      ("AUXILIARY_VISION_PROVIDER",      "AUXILIARY_VISION_MODEL"),
+            "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER",  "AUXILIARY_WEB_EXTRACT_MODEL"),
+        }
+        for task_key, (prov_env, model_env) in aux_task_env.items():
+            task_cfg = auxiliary_cfg.get(task_key, {})
+            if not isinstance(task_cfg, dict):
+                continue
+            prov = str(task_cfg.get("provider", "")).strip()
+            model = str(task_cfg.get("model", "")).strip()
+            if prov and prov != "auto":
+                os.environ[prov_env] = prov
+            if model:
+                os.environ[model_env] = model
+
+
+# ── Config bridging tests ────────────────────────────────────────────────────
+
+
+class TestAuxiliaryConfigBridge:
+    """Verify the config.yaml → env var bridging logic used by CLI and gateway."""
+
+    def test_vision_provider_bridged(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "vision": {"provider": "openrouter", "model": ""},
+                "web_extract": {"provider": "auto", "model": ""},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") == "openrouter"
+        # auto should not be set
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") is None
+
+    def test_vision_model_bridged(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "vision": {"provider": "auto", "model": "openai/gpt-4o"},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_MODEL") == "openai/gpt-4o"
+        # auto provider should not be set
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None
+
+    def test_web_extract_bridged(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "web_extract": {"provider": "nous", "model": "gemini-2.5-flash"},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"
+
+    def test_compression_provider_bridged(self, monkeypatch):
+        config = {
+            "compression": {
+                "summary_provider": "nous",
+                "summary_model": "gemini-3-flash",
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("CONTEXT_COMPRESSION_PROVIDER") == "nous"
+        assert os.environ.get("CONTEXT_COMPRESSION_MODEL") == "gemini-3-flash"
+
+    def test_empty_values_not_bridged(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "vision": {"provider": "auto", "model": ""},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None
+        assert os.environ.get("AUXILIARY_VISION_MODEL") is None
+
+    def test_missing_auxiliary_section_safe(self, monkeypatch):
+        """Config without auxiliary section should not crash."""
+        config = {"model": {"default": "test-model"}}
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None
+
+    def test_non_dict_task_config_ignored(self, monkeypatch):
+        """Malformed task config (e.g. string instead of dict) is safely ignored."""
+        config = {
+            "auxiliary": {
+                "vision": "openrouter",  # should be a dict
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None
+
+    def test_mixed_tasks(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "vision": {"provider": "openrouter", "model": ""},
+                "web_extract": {"provider": "auto", "model": "custom-llm"},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") == "openrouter"
+        assert os.environ.get("AUXILIARY_VISION_MODEL") is None
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") is None
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "custom-llm"
+
+    def test_all_tasks_with_overrides(self, monkeypatch):
+        config = {
+            "compression": {
+                "summary_provider": "main",
+                "summary_model": "local-model",
+            },
+            "auxiliary": {
+                "vision": {"provider": "openrouter", "model": "google/gemini-2.5-flash"},
+                "web_extract": {"provider": "nous", "model": "gemini-3-flash"},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("CONTEXT_COMPRESSION_PROVIDER") == "main"
+        assert os.environ.get("CONTEXT_COMPRESSION_MODEL") == "local-model"
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") == "openrouter"
+        assert os.environ.get("AUXILIARY_VISION_MODEL") == "google/gemini-2.5-flash"
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-3-flash"
+
+    def test_whitespace_in_values_stripped(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "vision": {"provider": "  openrouter  ", "model": "  my-model  "},
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") == "openrouter"
+        assert os.environ.get("AUXILIARY_VISION_MODEL") == "my-model"
+
+    def test_empty_auxiliary_dict_safe(self, monkeypatch):
+        config = {"auxiliary": {}}
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None
+        assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") is None
+
+
+# ── Gateway bridge parity test ───────────────────────────────────────────────
+
+
+class TestGatewayBridgeCodeParity:
+    """Verify the gateway/run.py config bridge contains the auxiliary section."""
+
+    def test_gateway_has_auxiliary_bridge(self):
+        """The gateway config bridge must include auxiliary.* bridging."""
+        gateway_path = Path(__file__).parent.parent / "gateway" / "run.py"
+        content = gateway_path.read_text()
+        # Check for key patterns that indicate the bridge is present
+        assert "AUXILIARY_VISION_PROVIDER" in content
+        assert "AUXILIARY_VISION_MODEL" in content
+        assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
+        assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
+
+    def test_gateway_has_compression_provider(self):
+        """Gateway must bridge compression.summary_provider."""
+        gateway_path = Path(__file__).parent.parent / "gateway" / "run.py"
+        content = gateway_path.read_text()
+        assert "summary_provider" in content
+        assert "CONTEXT_COMPRESSION_PROVIDER" in content
+
+
+# ── Vision model override tests ──────────────────────────────────────────────
+
+
+class TestVisionModelOverride:
+    """Test that AUXILIARY_VISION_MODEL env var overrides the default model in the handler."""
+
+    def test_env_var_overrides_default(self, monkeypatch):
+        monkeypatch.setenv("AUXILIARY_VISION_MODEL", "openai/gpt-4o")
+        from tools.vision_tools import _handle_vision_analyze
+        with patch("tools.vision_tools.vision_analyze_tool", new_callable=MagicMock) as mock_tool:
+            mock_tool.return_value = '{"success": true}'
+            _handle_vision_analyze({"image_url": "http://test.jpg", "question": "test"})
+            call_args = mock_tool.call_args
+            # 3rd positional arg = model
+            assert call_args[0][2] == "openai/gpt-4o"
+
+    def test_default_model_when_no_override(self, monkeypatch):
+        monkeypatch.delenv("AUXILIARY_VISION_MODEL", raising=False)
+        from tools.vision_tools import _handle_vision_analyze, DEFAULT_VISION_MODEL
+        with patch("tools.vision_tools.vision_analyze_tool", new_callable=MagicMock) as mock_tool:
+            mock_tool.return_value = '{"success": true}'
+            _handle_vision_analyze({"image_url": "http://test.jpg", "question": "test"})
+            call_args = mock_tool.call_args
+            expected = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
+            assert call_args[0][2] == expected
+
+
+# ── DEFAULT_CONFIG shape tests ───────────────────────────────────────────────
+
+
+class TestDefaultConfigShape:
+    """Verify the DEFAULT_CONFIG in hermes_cli/config.py has correct auxiliary structure."""
+
+    def test_auxiliary_section_exists(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+        assert "auxiliary" in DEFAULT_CONFIG
+
+    def test_vision_task_structure(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+        vision = DEFAULT_CONFIG["auxiliary"]["vision"]
+        assert "provider" in vision
+        assert "model" in vision
+        assert vision["provider"] == "auto"
+        assert vision["model"] == ""
+
+    def test_web_extract_task_structure(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+        web = DEFAULT_CONFIG["auxiliary"]["web_extract"]
+        assert "provider" in web
+        assert "model" in web
+        assert web["provider"] == "auto"
+        assert web["model"] == ""
+
+    def test_compression_provider_default(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+        compression = DEFAULT_CONFIG["compression"]
+        assert "summary_provider" in compression
+        assert compression["summary_provider"] == "auto"
+
+
+# ── CLI defaults parity ─────────────────────────────────────────────────────
+
+
+class TestCLIDefaultsHaveAuxiliaryKeys:
+    """Verify cli.py load_cli_config() defaults dict does NOT include auxiliary
+    (it comes from config.yaml deep merge, not hardcoded defaults)."""
+
+    def test_cli_defaults_can_merge_auxiliary(self):
+        """The load_cli_config deep merge logic handles keys not in defaults.
+        Verify auxiliary would be picked up from config.yaml."""
+        # This is a structural assertion: cli.py's second-pass loop
+        # carries over keys from file_config that aren't in defaults.
+        # So auxiliary config from config.yaml gets merged even though
+        # cli.py's defaults dict doesn't define it.
+        import cli as _cli_mod
+        source = Path(_cli_mod.__file__).read_text()
+        assert "auxiliary_config = defaults.get(\"auxiliary\"" in source
+        assert "AUXILIARY_VISION_PROVIDER" in source
+        assert "AUXILIARY_VISION_MODEL" in source