2026-03-08 18:06:40 -07:00
|
|
|
"""Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides."""
|
2026-02-28 21:47:51 -08:00
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
from agent.auxiliary_client import (
|
|
|
|
|
get_text_auxiliary_client,
|
|
|
|
|
get_vision_auxiliary_client,
|
|
|
|
|
auxiliary_max_tokens_param,
|
|
|
|
|
_read_codex_access_token,
|
2026-03-08 18:06:40 -07:00
|
|
|
_get_auxiliary_provider,
|
|
|
|
|
_resolve_forced_provider,
|
|
|
|
|
_resolve_auto,
|
2026-02-28 21:47:51 -08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
|
|
|
def _clean_env(monkeypatch):
|
|
|
|
|
"""Strip provider env vars so each test starts clean."""
|
|
|
|
|
for key in (
|
|
|
|
|
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
|
|
|
|
|
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
|
2026-03-08 18:06:40 -07:00
|
|
|
# Per-task provider/model overrides
|
|
|
|
|
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
|
|
|
|
|
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
|
|
|
|
|
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
|
2026-02-28 21:47:51 -08:00
|
|
|
):
|
|
|
|
|
monkeypatch.delenv(key, raising=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def codex_auth_dir(tmp_path, monkeypatch):
|
|
|
|
|
"""Provide a writable ~/.codex/ directory with a valid auth.json."""
|
|
|
|
|
codex_dir = tmp_path / ".codex"
|
|
|
|
|
codex_dir.mkdir()
|
|
|
|
|
auth_file = codex_dir / "auth.json"
|
|
|
|
|
auth_file.write_text(json.dumps({
|
|
|
|
|
"tokens": {
|
|
|
|
|
"access_token": "codex-test-token-abc123",
|
|
|
|
|
"refresh_token": "codex-refresh-xyz",
|
|
|
|
|
}
|
|
|
|
|
}))
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
"agent.auxiliary_client._read_codex_access_token",
|
|
|
|
|
lambda: "codex-test-token-abc123",
|
|
|
|
|
)
|
|
|
|
|
return codex_dir
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestReadCodexAccessToken:
|
2026-03-01 19:59:24 -08:00
|
|
|
def test_valid_auth_store(self, tmp_path, monkeypatch):
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({
|
|
|
|
|
"version": 1,
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai-codex": {
|
|
|
|
|
"tokens": {"access_token": "tok-123", "refresh_token": "r-456"},
|
|
|
|
|
},
|
|
|
|
|
},
|
2026-02-28 21:47:51 -08:00
|
|
|
}))
|
2026-03-01 19:59:24 -08:00
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
result = _read_codex_access_token()
|
2026-02-28 21:47:51 -08:00
|
|
|
assert result == "tok-123"
|
|
|
|
|
|
2026-03-01 19:59:24 -08:00
|
|
|
def test_missing_returns_none(self, tmp_path, monkeypatch):
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
result = _read_codex_access_token()
|
2026-02-28 21:47:51 -08:00
|
|
|
assert result is None
|
|
|
|
|
|
2026-03-01 19:59:24 -08:00
|
|
|
def test_empty_token_returns_none(self, tmp_path, monkeypatch):
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({
|
|
|
|
|
"version": 1,
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai-codex": {
|
|
|
|
|
"tokens": {"access_token": " ", "refresh_token": "r"},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}))
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
result = _read_codex_access_token()
|
2026-02-28 21:47:51 -08:00
|
|
|
assert result is None
|
|
|
|
|
|
|
|
|
|
def test_malformed_json_returns_none(self, tmp_path):
|
|
|
|
|
codex_dir = tmp_path / ".codex"
|
|
|
|
|
codex_dir.mkdir()
|
|
|
|
|
(codex_dir / "auth.json").write_text("{bad json")
|
|
|
|
|
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
|
|
|
|
|
result = _read_codex_access_token()
|
|
|
|
|
assert result is None
|
|
|
|
|
|
|
|
|
|
def test_missing_tokens_key_returns_none(self, tmp_path):
|
|
|
|
|
codex_dir = tmp_path / ".codex"
|
|
|
|
|
codex_dir.mkdir()
|
|
|
|
|
(codex_dir / "auth.json").write_text(json.dumps({"other": "data"}))
|
|
|
|
|
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
|
|
|
|
|
result = _read_codex_access_token()
|
|
|
|
|
assert result is None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestGetTextAuxiliaryClient:
|
|
|
|
|
"""Test the full resolution chain for get_text_auxiliary_client."""
|
|
|
|
|
|
|
|
|
|
def test_openrouter_takes_priority(self, monkeypatch, codex_auth_dir):
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_text_auxiliary_client()
|
|
|
|
|
assert model == "google/gemini-3-flash-preview"
|
|
|
|
|
mock_openai.assert_called_once()
|
|
|
|
|
call_kwargs = mock_openai.call_args
|
|
|
|
|
assert call_kwargs.kwargs["api_key"] == "or-key"
|
|
|
|
|
|
|
|
|
|
def test_nous_takes_priority_over_codex(self, monkeypatch, codex_auth_dir):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
mock_nous.return_value = {"access_token": "nous-tok"}
|
|
|
|
|
client, model = get_text_auxiliary_client()
|
|
|
|
|
assert model == "gemini-3-flash"
|
|
|
|
|
|
|
|
|
|
def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
|
|
|
|
|
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
|
|
|
|
|
monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
|
2026-02-28 21:47:51 -08:00
|
|
|
# Override the autouse monkeypatch for codex
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
"agent.auxiliary_client._read_codex_access_token",
|
|
|
|
|
lambda: "codex-test-token-abc123",
|
|
|
|
|
)
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_text_auxiliary_client()
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
assert model == "my-local-model"
|
2026-02-28 21:47:51 -08:00
|
|
|
call_kwargs = mock_openai.call_args
|
|
|
|
|
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
|
|
|
|
|
|
|
|
|
|
def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_text_auxiliary_client()
|
|
|
|
|
assert model == "gpt-5.3-codex"
|
|
|
|
|
# Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
|
|
|
|
|
from agent.auxiliary_client import CodexAuxiliaryClient
|
|
|
|
|
assert isinstance(client, CodexAuxiliaryClient)
|
|
|
|
|
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
def test_returns_none_when_nothing_available(self, monkeypatch):
|
|
|
|
|
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
|
|
|
|
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
|
|
|
|
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
2026-02-28 21:47:51 -08:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
|
2026-02-28 21:47:51 -08:00
|
|
|
client, model = get_text_auxiliary_client()
|
|
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
|
|
|
|
|
2026-03-07 08:52:06 -08:00
|
|
|
class TestVisionClientFallback:
|
2026-03-08 18:06:40 -07:00
|
|
|
"""Vision client auto mode only tries OpenRouter + Nous (multimodal-capable)."""
|
2026-02-28 21:47:51 -08:00
|
|
|
|
2026-03-07 08:52:06 -08:00
|
|
|
def test_vision_returns_none_without_any_credentials(self):
|
2026-02-28 21:47:51 -08:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
|
|
|
|
|
client, model = get_vision_auxiliary_client()
|
|
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
2026-03-08 18:44:25 -07:00
|
|
|
def test_vision_auto_includes_codex(self, codex_auth_dir):
|
|
|
|
|
"""Codex supports vision (gpt-5.3-codex), so auto mode should use it."""
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
2026-03-08 18:06:40 -07:00
|
|
|
client, model = get_vision_auxiliary_client()
|
2026-03-08 18:44:25 -07:00
|
|
|
from agent.auxiliary_client import CodexAuxiliaryClient
|
|
|
|
|
assert isinstance(client, CodexAuxiliaryClient)
|
|
|
|
|
assert model == "gpt-5.3-codex"
|
2026-03-08 18:06:40 -07:00
|
|
|
|
2026-03-09 15:36:19 -07:00
|
|
|
def test_vision_auto_falls_back_to_custom_endpoint(self, monkeypatch):
|
|
|
|
|
"""Custom endpoint is used as fallback in vision auto mode.
|
|
|
|
|
|
|
|
|
|
Many local models (Qwen-VL, LLaVA, etc.) support vision.
|
|
|
|
|
When no OpenRouter/Nous/Codex is available, try the custom endpoint.
|
|
|
|
|
"""
|
2026-03-08 18:06:40 -07:00
|
|
|
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
|
|
|
|
|
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
2026-03-09 15:36:19 -07:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
2026-03-08 18:06:40 -07:00
|
|
|
client, model = get_vision_auxiliary_client()
|
2026-03-09 15:36:19 -07:00
|
|
|
assert client is not None # Custom endpoint picked up as fallback
|
2026-03-08 18:06:40 -07:00
|
|
|
|
|
|
|
|
def test_vision_uses_openrouter_when_available(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_vision_auxiliary_client()
|
|
|
|
|
assert model == "google/gemini-3-flash-preview"
|
|
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_vision_uses_nous_when_available(self, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
mock_nous.return_value = {"access_token": "nous-tok"}
|
|
|
|
|
client, model = get_vision_auxiliary_client()
|
|
|
|
|
assert model == "gemini-3-flash"
|
|
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_vision_forced_main_uses_custom_endpoint(self, monkeypatch):
|
|
|
|
|
"""When explicitly forced to 'main', vision CAN use custom endpoint."""
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
|
|
|
|
|
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
|
|
|
|
|
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
|
2026-03-08 18:06:40 -07:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_vision_auxiliary_client()
|
|
|
|
|
assert client is not None
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
assert model == "my-local-model"
|
2026-03-08 18:06:40 -07:00
|
|
|
|
|
|
|
|
def test_vision_forced_main_returns_none_without_creds(self, monkeypatch):
|
|
|
|
|
"""Forced main with no credentials still returns None."""
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
|
|
|
|
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
2026-03-08 18:06:40 -07:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
|
2026-03-08 18:06:40 -07:00
|
|
|
client, model = get_vision_auxiliary_client()
|
|
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
2026-03-08 18:50:26 -07:00
|
|
|
def test_vision_forced_codex(self, monkeypatch, codex_auth_dir):
|
|
|
|
|
"""When forced to 'codex', vision uses Codex OAuth."""
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "codex")
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
feat: add 'openai' as auxiliary provider option
Users can now set provider: "openai" for auxiliary tasks (vision, web
extract, compression) to use OpenAI's API directly with their
OPENAI_API_KEY. This hits api.openai.com/v1 with gpt-4o-mini as the
default model — supports vision since GPT-4o handles image input.
Provider options are now: auto, openrouter, nous, openai, main.
Changes:
- agent/auxiliary_client.py: added _try_openai(), "openai" case in
_resolve_forced_provider(), updated auxiliary_max_tokens_param()
to use max_completion_tokens for OpenAI
- Updated docs: cli-config.yaml.example, AGENTS.md, and user-facing
configuration.md with Common Setups section showing OpenAI,
OpenRouter, and local model examples
- 3 new tests for OpenAI provider resolution
Tests: 2459 passed (was 2429).
2026-03-08 18:25:30 -07:00
|
|
|
client, model = get_vision_auxiliary_client()
|
2026-03-08 18:50:26 -07:00
|
|
|
from agent.auxiliary_client import CodexAuxiliaryClient
|
|
|
|
|
assert isinstance(client, CodexAuxiliaryClient)
|
|
|
|
|
assert model == "gpt-5.3-codex"
|
feat: add 'openai' as auxiliary provider option
Users can now set provider: "openai" for auxiliary tasks (vision, web
extract, compression) to use OpenAI's API directly with their
OPENAI_API_KEY. This hits api.openai.com/v1 with gpt-4o-mini as the
default model — supports vision since GPT-4o handles image input.
Provider options are now: auto, openrouter, nous, openai, main.
Changes:
- agent/auxiliary_client.py: added _try_openai(), "openai" case in
_resolve_forced_provider(), updated auxiliary_max_tokens_param()
to use max_completion_tokens for OpenAI
- Updated docs: cli-config.yaml.example, AGENTS.md, and user-facing
configuration.md with Common Setups section showing OpenAI,
OpenRouter, and local model examples
- 3 new tests for OpenAI provider resolution
Tests: 2459 passed (was 2429).
2026-03-08 18:25:30 -07:00
|
|
|
|
2026-03-08 18:06:40 -07:00
|
|
|
|
|
|
|
|
class TestGetAuxiliaryProvider:
|
|
|
|
|
"""Tests for _get_auxiliary_provider env var resolution."""
|
|
|
|
|
|
|
|
|
|
def test_no_task_returns_auto(self):
|
|
|
|
|
assert _get_auxiliary_provider() == "auto"
|
|
|
|
|
assert _get_auxiliary_provider("") == "auto"
|
|
|
|
|
|
|
|
|
|
def test_auxiliary_prefix_takes_priority(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "openrouter")
|
|
|
|
|
assert _get_auxiliary_provider("vision") == "openrouter"
|
|
|
|
|
|
|
|
|
|
def test_context_prefix_fallback(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
|
|
|
|
|
assert _get_auxiliary_provider("compression") == "nous"
|
|
|
|
|
|
|
|
|
|
def test_auxiliary_prefix_over_context_prefix(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_COMPRESSION_PROVIDER", "openrouter")
|
|
|
|
|
monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
|
|
|
|
|
assert _get_auxiliary_provider("compression") == "openrouter"
|
|
|
|
|
|
|
|
|
|
def test_auto_value_treated_as_auto(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "auto")
|
|
|
|
|
assert _get_auxiliary_provider("vision") == "auto"
|
|
|
|
|
|
|
|
|
|
def test_whitespace_stripped(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", " openrouter ")
|
|
|
|
|
assert _get_auxiliary_provider("vision") == "openrouter"
|
|
|
|
|
|
|
|
|
|
def test_case_insensitive(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "OpenRouter")
|
|
|
|
|
assert _get_auxiliary_provider("vision") == "openrouter"
|
|
|
|
|
|
|
|
|
|
def test_main_provider(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "main")
|
|
|
|
|
assert _get_auxiliary_provider("web_extract") == "main"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestResolveForcedProvider:
|
|
|
|
|
"""Tests for _resolve_forced_provider with explicit provider selection."""
|
|
|
|
|
|
|
|
|
|
def test_forced_openrouter(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = _resolve_forced_provider("openrouter")
|
|
|
|
|
assert model == "google/gemini-3-flash-preview"
|
|
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_forced_openrouter_no_key(self, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
|
|
|
|
|
client, model = _resolve_forced_provider("openrouter")
|
|
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
|
|
|
|
def test_forced_nous(self, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
mock_nous.return_value = {"access_token": "nous-tok"}
|
|
|
|
|
client, model = _resolve_forced_provider("nous")
|
|
|
|
|
assert model == "gemini-3-flash"
|
|
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_forced_nous_not_configured(self, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
|
|
|
|
|
client, model = _resolve_forced_provider("nous")
|
|
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
|
|
|
|
def test_forced_main_uses_custom(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
|
|
|
|
|
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
|
2026-03-08 18:06:40 -07:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = _resolve_forced_provider("main")
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
assert model == "my-local-model"
|
2026-03-08 18:06:40 -07:00
|
|
|
|
|
|
|
|
def test_forced_main_skips_openrouter_nous(self, monkeypatch):
|
|
|
|
|
"""Even if OpenRouter key is set, 'main' skips it."""
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
|
|
|
|
|
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
|
2026-03-08 18:06:40 -07:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = _resolve_forced_provider("main")
|
|
|
|
|
# Should use custom endpoint, not OpenRouter
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
assert model == "my-local-model"
|
2026-03-08 18:06:40 -07:00
|
|
|
|
|
|
|
|
def test_forced_main_falls_to_codex(self, codex_auth_dir, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
client, model = _resolve_forced_provider("main")
|
|
|
|
|
from agent.auxiliary_client import CodexAuxiliaryClient
|
|
|
|
|
assert isinstance(client, CodexAuxiliaryClient)
|
|
|
|
|
assert model == "gpt-5.3-codex"
|
|
|
|
|
|
2026-03-08 18:50:26 -07:00
|
|
|
def test_forced_codex(self, codex_auth_dir, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
client, model = _resolve_forced_provider("codex")
|
|
|
|
|
from agent.auxiliary_client import CodexAuxiliaryClient
|
|
|
|
|
assert isinstance(client, CodexAuxiliaryClient)
|
|
|
|
|
assert model == "gpt-5.3-codex"
|
feat: add 'openai' as auxiliary provider option
Users can now set provider: "openai" for auxiliary tasks (vision, web
extract, compression) to use OpenAI's API directly with their
OPENAI_API_KEY. This hits api.openai.com/v1 with gpt-4o-mini as the
default model — supports vision since GPT-4o handles image input.
Provider options are now: auto, openrouter, nous, openai, main.
Changes:
- agent/auxiliary_client.py: added _try_openai(), "openai" case in
_resolve_forced_provider(), updated auxiliary_max_tokens_param()
to use max_completion_tokens for OpenAI
- Updated docs: cli-config.yaml.example, AGENTS.md, and user-facing
configuration.md with Common Setups section showing OpenAI,
OpenRouter, and local model examples
- 3 new tests for OpenAI provider resolution
Tests: 2459 passed (was 2429).
2026-03-08 18:25:30 -07:00
|
|
|
|
2026-03-08 18:50:26 -07:00
|
|
|
def test_forced_codex_no_token(self, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
|
|
|
|
client, model = _resolve_forced_provider("codex")
|
feat: add 'openai' as auxiliary provider option
Users can now set provider: "openai" for auxiliary tasks (vision, web
extract, compression) to use OpenAI's API directly with their
OPENAI_API_KEY. This hits api.openai.com/v1 with gpt-4o-mini as the
default model — supports vision since GPT-4o handles image input.
Provider options are now: auto, openrouter, nous, openai, main.
Changes:
- agent/auxiliary_client.py: added _try_openai(), "openai" case in
_resolve_forced_provider(), updated auxiliary_max_tokens_param()
to use max_completion_tokens for OpenAI
- Updated docs: cli-config.yaml.example, AGENTS.md, and user-facing
configuration.md with Common Setups section showing OpenAI,
OpenRouter, and local model examples
- 3 new tests for OpenAI provider resolution
Tests: 2459 passed (was 2429).
2026-03-08 18:25:30 -07:00
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
2026-03-08 18:06:40 -07:00
|
|
|
def test_forced_unknown_returns_none(self, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
|
|
|
|
client, model = _resolve_forced_provider("invalid-provider")
|
|
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestTaskSpecificOverrides:
|
|
|
|
|
"""Integration tests for per-task provider routing via get_text_auxiliary_client(task=...)."""
|
|
|
|
|
|
|
|
|
|
def test_text_with_vision_provider_override(self, monkeypatch):
|
|
|
|
|
"""AUXILIARY_VISION_PROVIDER should not affect text tasks."""
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "nous")
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
client, model = get_text_auxiliary_client() # no task → auto
|
|
|
|
|
assert model == "google/gemini-3-flash-preview" # OpenRouter, not Nous
|
|
|
|
|
|
|
|
|
|
def test_compression_task_reads_context_prefix(self, monkeypatch):
|
|
|
|
|
"""Compression task should check CONTEXT_COMPRESSION_PROVIDER."""
|
|
|
|
|
monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") # would win in auto
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
mock_nous.return_value = {"access_token": "nous-tok"}
|
|
|
|
|
client, model = get_text_auxiliary_client("compression")
|
|
|
|
|
assert model == "gemini-3-flash" # forced to Nous, not OpenRouter
|
|
|
|
|
|
|
|
|
|
def test_web_extract_task_override(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "openrouter")
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
client, model = get_text_auxiliary_client("web_extract")
|
|
|
|
|
assert model == "google/gemini-3-flash-preview"
|
|
|
|
|
|
|
|
|
|
def test_task_without_override_uses_auto(self, monkeypatch):
|
|
|
|
|
"""A task with no provider env var falls through to auto chain."""
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
client, model = get_text_auxiliary_client("compression")
|
|
|
|
|
assert model == "google/gemini-3-flash-preview" # auto → OpenRouter
|
|
|
|
|
|
2026-02-28 21:47:51 -08:00
|
|
|
|
|
|
|
|
class TestAuxiliaryMaxTokensParam:
|
|
|
|
|
def test_codex_fallback_uses_max_tokens(self, monkeypatch):
|
|
|
|
|
"""Codex adapter translates max_tokens internally, so we return max_tokens."""
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value="tok"):
|
|
|
|
|
result = auxiliary_max_tokens_param(1024)
|
|
|
|
|
assert result == {"max_tokens": 1024}
|
|
|
|
|
|
|
|
|
|
def test_openrouter_uses_max_tokens(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
result = auxiliary_max_tokens_param(1024)
|
|
|
|
|
assert result == {"max_tokens": 1024}
|
|
|
|
|
|
|
|
|
|
def test_no_provider_uses_max_tokens(self):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
|
|
|
|
result = auxiliary_max_tokens_param(1024)
|
|
|
|
|
assert result == {"max_tokens": 1024}
|