2026-03-08 18:06:40 -07:00
|
|
|
"""Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides."""
|
2026-02-28 21:47:51 -08:00
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
from agent.auxiliary_client import (
|
|
|
|
|
get_text_auxiliary_client,
|
|
|
|
|
get_vision_auxiliary_client,
|
2026-03-14 21:14:20 -07:00
|
|
|
get_available_vision_backends,
|
2026-03-27 07:49:44 -07:00
|
|
|
resolve_vision_provider_client,
|
2026-03-14 21:14:20 -07:00
|
|
|
resolve_provider_client,
|
2026-02-28 21:47:51 -08:00
|
|
|
auxiliary_max_tokens_param,
|
|
|
|
|
_read_codex_access_token,
|
2026-03-08 18:06:40 -07:00
|
|
|
_get_auxiliary_provider,
|
|
|
|
|
_resolve_forced_provider,
|
|
|
|
|
_resolve_auto,
|
2026-02-28 21:47:51 -08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
|
|
|
def _clean_env(monkeypatch):
|
|
|
|
|
"""Strip provider env vars so each test starts clean."""
|
|
|
|
|
for key in (
|
|
|
|
|
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
|
|
|
|
|
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
|
2026-03-14 21:14:20 -07:00
|
|
|
"ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN",
|
2026-03-14 20:48:29 -07:00
|
|
|
# Per-task provider/model/direct-endpoint overrides
|
2026-03-08 18:06:40 -07:00
|
|
|
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
|
2026-03-14 20:48:29 -07:00
|
|
|
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
|
2026-03-08 18:06:40 -07:00
|
|
|
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
|
2026-03-14 20:48:29 -07:00
|
|
|
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
|
2026-03-08 18:06:40 -07:00
|
|
|
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
|
2026-02-28 21:47:51 -08:00
|
|
|
):
|
|
|
|
|
monkeypatch.delenv(key, raising=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def codex_auth_dir(tmp_path, monkeypatch):
|
|
|
|
|
"""Provide a writable ~/.codex/ directory with a valid auth.json."""
|
|
|
|
|
codex_dir = tmp_path / ".codex"
|
|
|
|
|
codex_dir.mkdir()
|
|
|
|
|
auth_file = codex_dir / "auth.json"
|
|
|
|
|
auth_file.write_text(json.dumps({
|
|
|
|
|
"tokens": {
|
|
|
|
|
"access_token": "codex-test-token-abc123",
|
|
|
|
|
"refresh_token": "codex-refresh-xyz",
|
|
|
|
|
}
|
|
|
|
|
}))
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
"agent.auxiliary_client._read_codex_access_token",
|
|
|
|
|
lambda: "codex-test-token-abc123",
|
|
|
|
|
)
|
|
|
|
|
return codex_dir
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestReadCodexAccessToken:
|
2026-03-01 19:59:24 -08:00
|
|
|
def test_valid_auth_store(self, tmp_path, monkeypatch):
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({
|
|
|
|
|
"version": 1,
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai-codex": {
|
|
|
|
|
"tokens": {"access_token": "tok-123", "refresh_token": "r-456"},
|
|
|
|
|
},
|
|
|
|
|
},
|
2026-02-28 21:47:51 -08:00
|
|
|
}))
|
2026-03-01 19:59:24 -08:00
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
result = _read_codex_access_token()
|
2026-02-28 21:47:51 -08:00
|
|
|
assert result == "tok-123"
|
|
|
|
|
|
2026-03-01 19:59:24 -08:00
|
|
|
def test_missing_returns_none(self, tmp_path, monkeypatch):
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
result = _read_codex_access_token()
|
2026-02-28 21:47:51 -08:00
|
|
|
assert result is None
|
|
|
|
|
|
2026-03-01 19:59:24 -08:00
|
|
|
def test_empty_token_returns_none(self, tmp_path, monkeypatch):
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({
|
|
|
|
|
"version": 1,
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai-codex": {
|
|
|
|
|
"tokens": {"access_token": " ", "refresh_token": "r"},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}))
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
result = _read_codex_access_token()
|
2026-02-28 21:47:51 -08:00
|
|
|
assert result is None
|
|
|
|
|
|
|
|
|
|
def test_malformed_json_returns_none(self, tmp_path):
|
|
|
|
|
codex_dir = tmp_path / ".codex"
|
|
|
|
|
codex_dir.mkdir()
|
|
|
|
|
(codex_dir / "auth.json").write_text("{bad json")
|
|
|
|
|
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
|
|
|
|
|
result = _read_codex_access_token()
|
|
|
|
|
assert result is None
|
|
|
|
|
|
|
|
|
|
def test_missing_tokens_key_returns_none(self, tmp_path):
|
|
|
|
|
codex_dir = tmp_path / ".codex"
|
|
|
|
|
codex_dir.mkdir()
|
|
|
|
|
(codex_dir / "auth.json").write_text(json.dumps({"other": "data"}))
|
|
|
|
|
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
|
|
|
|
|
result = _read_codex_access_token()
|
|
|
|
|
assert result is None
|
|
|
|
|
|
|
|
|
|
|
2026-03-21 17:36:25 -07:00
|
|
|
def test_expired_jwt_returns_none(self, tmp_path, monkeypatch):
|
|
|
|
|
"""Expired JWT tokens should be skipped so auto chain continues."""
|
|
|
|
|
import base64
|
|
|
|
|
import time as _time
|
|
|
|
|
|
|
|
|
|
# Build a JWT with exp in the past
|
|
|
|
|
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
|
|
|
|
|
payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode()
|
|
|
|
|
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
|
|
|
|
|
expired_jwt = f"{header}.{payload}.fakesig"
|
|
|
|
|
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({
|
|
|
|
|
"version": 1,
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai-codex": {
|
|
|
|
|
"tokens": {"access_token": expired_jwt, "refresh_token": "r"},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}))
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
result = _read_codex_access_token()
|
|
|
|
|
assert result is None, "Expired JWT should return None"
|
|
|
|
|
|
|
|
|
|
def test_valid_jwt_returns_token(self, tmp_path, monkeypatch):
|
|
|
|
|
"""Non-expired JWT tokens should be returned."""
|
|
|
|
|
import base64
|
|
|
|
|
import time as _time
|
|
|
|
|
|
|
|
|
|
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
|
|
|
|
|
payload_data = json.dumps({"exp": int(_time.time()) + 3600}).encode()
|
|
|
|
|
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
|
|
|
|
|
valid_jwt = f"{header}.{payload}.fakesig"
|
|
|
|
|
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({
|
|
|
|
|
"version": 1,
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai-codex": {
|
|
|
|
|
"tokens": {"access_token": valid_jwt, "refresh_token": "r"},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}))
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
result = _read_codex_access_token()
|
|
|
|
|
assert result == valid_jwt
|
|
|
|
|
|
|
|
|
|
def test_non_jwt_token_passes_through(self, tmp_path, monkeypatch):
|
|
|
|
|
"""Non-JWT tokens (no dots) should be returned as-is."""
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({
|
|
|
|
|
"version": 1,
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai-codex": {
|
|
|
|
|
"tokens": {"access_token": "plain-token-no-jwt", "refresh_token": "r"},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}))
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
result = _read_codex_access_token()
|
|
|
|
|
assert result == "plain-token-no-jwt"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestAnthropicOAuthFlag:
|
|
|
|
|
"""Test that OAuth tokens get is_oauth=True in auxiliary Anthropic client."""
|
|
|
|
|
|
|
|
|
|
def test_oauth_token_sets_flag(self, monkeypatch):
|
|
|
|
|
"""OAuth tokens (sk-ant-oat01-*) should create client with is_oauth=True."""
|
|
|
|
|
monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-test-token")
|
|
|
|
|
with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
|
|
|
|
|
mock_build.return_value = MagicMock()
|
|
|
|
|
from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient
|
|
|
|
|
client, model = _try_anthropic()
|
|
|
|
|
assert client is not None
|
|
|
|
|
assert isinstance(client, AnthropicAuxiliaryClient)
|
|
|
|
|
# The adapter inside should have is_oauth=True
|
|
|
|
|
adapter = client.chat.completions
|
|
|
|
|
assert adapter._is_oauth is True
|
|
|
|
|
|
|
|
|
|
def test_api_key_no_oauth_flag(self, monkeypatch):
|
|
|
|
|
"""Regular API keys (sk-ant-api-*) should create client with is_oauth=False."""
|
|
|
|
|
with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-testkey1234"), \
|
feat(auth): same-provider credential pools with rotation, custom endpoint support, and interactive CLI (#2647)
* feat(auth): add same-provider credential pools and rotation UX
Add same-provider credential pooling so Hermes can rotate across
multiple credentials for a single provider, recover from exhausted
credentials without jumping providers immediately, and configure
that behavior directly in hermes setup.
- agent/credential_pool.py: persisted per-provider credential pools
- hermes auth add/list/remove/reset CLI commands
- 429/402/401 recovery with pool rotation in run_agent.py
- Setup wizard integration for pool strategy configuration
- Auto-seeding from env vars and existing OAuth state
Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Salvaged from PR #2647
* fix(tests): prevent pool auto-seeding from host env in credential pool tests
Tests for non-pool Anthropic paths and auth remove were failing when
host env vars (ANTHROPIC_API_KEY) or file-backed OAuth credentials
were present. The pool auto-seeding picked these up, causing unexpected
pool entries in tests.
- Mock _select_pool_entry in auxiliary_client OAuth flag tests
- Clear Anthropic env vars and mock _seed_from_singletons in auth remove test
* feat(auth): add thread safety, least_used strategy, and request counting
- Add threading.Lock to CredentialPool for gateway thread safety
(concurrent requests from multiple gateway sessions could race on
pool state mutations without this)
- Add 'least_used' rotation strategy that selects the credential
with the lowest request_count, distributing load more evenly
- Add request_count field to PooledCredential for usage tracking
- Add mark_used() method to increment per-credential request counts
- Wrap select(), mark_exhausted_and_rotate(), and try_refresh_current()
with lock acquisition
- Add tests: least_used selection, mark_used counting, concurrent
thread safety (4 threads × 20 selects with no corruption)
* feat(auth): add interactive mode for bare 'hermes auth' command
When 'hermes auth' is called without a subcommand, it now launches an
interactive wizard that:
1. Shows full credential pool status across all providers
2. Offers a menu: add, remove, reset cooldowns, set strategy
3. For OAuth-capable providers (anthropic, nous, openai-codex), the
add flow explicitly asks 'API key or OAuth login?' — making it
clear that both auth types are supported for the same provider
4. Strategy picker shows all 4 options (fill_first, round_robin,
least_used, random) with the current selection marked
5. Remove flow shows entries with indices for easy selection
The subcommand paths (hermes auth add/list/remove/reset) still work
exactly as before for scripted/non-interactive use.
* fix(tests): update runtime_provider tests for config.yaml source of truth (#4165)
Tests were using OPENAI_BASE_URL env var which is no longer consulted
after #4165. Updated to use model config (provider, base_url, api_key)
which is the new single source of truth for custom endpoint URLs.
* feat(auth): support custom endpoint credential pools keyed by provider name
Custom OpenAI-compatible endpoints all share provider='custom', making
the provider-keyed pool useless. Now pools for custom endpoints are
keyed by 'custom:<normalized_name>' where the name comes from the
custom_providers config list (auto-generated from URL hostname).
- Pool key format: 'custom:together.ai', 'custom:local-(localhost:8080)'
- load_pool('custom:name') seeds from custom_providers api_key AND
model.api_key when base_url matches
- hermes auth add/list now shows custom endpoints alongside registry
providers
- _resolve_openrouter_runtime and _resolve_named_custom_runtime check
pool before falling back to single config key
- 6 new tests covering custom pool keying, seeding, and listing
* docs: add Excalidraw diagram of full credential pool flow
Comprehensive architecture diagram showing:
- Credential sources (env vars, auth.json OAuth, config.yaml, CLI)
- Pool storage and auto-seeding
- Runtime resolution paths (registry, custom, OpenRouter)
- Error recovery (429 retry-then-rotate, 402 immediate, 401 refresh)
- CLI management commands and strategy configuration
Open at: https://excalidraw.com/#json=2Ycqhqpi6f12E_3ITyiwh,c7u9jSt5BwrmiVzHGbm87g
* fix(tests): update setup wizard pool tests for unified select_provider_and_model flow
The setup wizard now delegates to select_provider_and_model() instead
of using its own prompt_choice-based provider picker. Tests needed:
- Mock select_provider_and_model as no-op (provider pre-written to config)
- Call _stub_tts BEFORE custom prompt_choice mock (it overwrites it)
- Pre-write model.provider to config so the pool step is reached
* docs: add comprehensive credential pool documentation
- New page: website/docs/user-guide/features/credential-pools.md
Full guide covering quick start, CLI commands, rotation strategies,
error recovery, custom endpoint pools, auto-discovery, thread safety,
architecture, and storage format.
- Updated fallback-providers.md to reference credential pools as the
first layer of resilience (same-provider rotation before cross-provider)
- Added hermes auth to CLI commands reference with usage examples
- Added credential_pool_strategies to configuration guide
* chore: remove excalidraw diagram from repo (external link only)
* refactor: simplify credential pool code — extract helpers, collapse extras, dedup patterns
- _load_config_safe(): replace 4 identical try/except/import blocks
- _iter_custom_providers(): shared generator for custom provider iteration
- PooledCredential.extra dict: collapse 11 round-trip-only fields
(token_type, scope, client_id, portal_base_url, obtained_at,
expires_in, agent_key_id, agent_key_expires_in, agent_key_reused,
agent_key_obtained_at, tls) into a single extra dict with
__getattr__ for backward-compatible access
- _available_entries(): shared exhaustion-check between select and peek
- Dedup anthropic OAuth seeding (hermes_pkce + claude_code identical)
- SimpleNamespace replaces class _Args boilerplate in auth_commands
- _try_resolve_from_custom_pool(): shared pool-check in runtime_provider
Net -17 lines. All 383 targeted tests pass.
---------
Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
2026-03-31 03:10:01 -07:00
|
|
|
patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
|
|
|
|
|
patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
|
2026-03-21 17:36:25 -07:00
|
|
|
mock_build.return_value = MagicMock()
|
|
|
|
|
from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient
|
|
|
|
|
client, model = _try_anthropic()
|
|
|
|
|
assert client is not None
|
|
|
|
|
assert isinstance(client, AnthropicAuxiliaryClient)
|
|
|
|
|
adapter = client.chat.completions
|
|
|
|
|
assert adapter._is_oauth is False
|
|
|
|
|
|
feat(auth): same-provider credential pools with rotation, custom endpoint support, and interactive CLI (#2647)
* feat(auth): add same-provider credential pools and rotation UX
Add same-provider credential pooling so Hermes can rotate across
multiple credentials for a single provider, recover from exhausted
credentials without jumping providers immediately, and configure
that behavior directly in hermes setup.
- agent/credential_pool.py: persisted per-provider credential pools
- hermes auth add/list/remove/reset CLI commands
- 429/402/401 recovery with pool rotation in run_agent.py
- Setup wizard integration for pool strategy configuration
- Auto-seeding from env vars and existing OAuth state
Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Salvaged from PR #2647
* fix(tests): prevent pool auto-seeding from host env in credential pool tests
Tests for non-pool Anthropic paths and auth remove were failing when
host env vars (ANTHROPIC_API_KEY) or file-backed OAuth credentials
were present. The pool auto-seeding picked these up, causing unexpected
pool entries in tests.
- Mock _select_pool_entry in auxiliary_client OAuth flag tests
- Clear Anthropic env vars and mock _seed_from_singletons in auth remove test
* feat(auth): add thread safety, least_used strategy, and request counting
- Add threading.Lock to CredentialPool for gateway thread safety
(concurrent requests from multiple gateway sessions could race on
pool state mutations without this)
- Add 'least_used' rotation strategy that selects the credential
with the lowest request_count, distributing load more evenly
- Add request_count field to PooledCredential for usage tracking
- Add mark_used() method to increment per-credential request counts
- Wrap select(), mark_exhausted_and_rotate(), and try_refresh_current()
with lock acquisition
- Add tests: least_used selection, mark_used counting, concurrent
thread safety (4 threads × 20 selects with no corruption)
* feat(auth): add interactive mode for bare 'hermes auth' command
When 'hermes auth' is called without a subcommand, it now launches an
interactive wizard that:
1. Shows full credential pool status across all providers
2. Offers a menu: add, remove, reset cooldowns, set strategy
3. For OAuth-capable providers (anthropic, nous, openai-codex), the
add flow explicitly asks 'API key or OAuth login?' — making it
clear that both auth types are supported for the same provider
4. Strategy picker shows all 4 options (fill_first, round_robin,
least_used, random) with the current selection marked
5. Remove flow shows entries with indices for easy selection
The subcommand paths (hermes auth add/list/remove/reset) still work
exactly as before for scripted/non-interactive use.
* fix(tests): update runtime_provider tests for config.yaml source of truth (#4165)
Tests were using OPENAI_BASE_URL env var which is no longer consulted
after #4165. Updated to use model config (provider, base_url, api_key)
which is the new single source of truth for custom endpoint URLs.
* feat(auth): support custom endpoint credential pools keyed by provider name
Custom OpenAI-compatible endpoints all share provider='custom', making
the provider-keyed pool useless. Now pools for custom endpoints are
keyed by 'custom:<normalized_name>' where the name comes from the
custom_providers config list (auto-generated from URL hostname).
- Pool key format: 'custom:together.ai', 'custom:local-(localhost:8080)'
- load_pool('custom:name') seeds from custom_providers api_key AND
model.api_key when base_url matches
- hermes auth add/list now shows custom endpoints alongside registry
providers
- _resolve_openrouter_runtime and _resolve_named_custom_runtime check
pool before falling back to single config key
- 6 new tests covering custom pool keying, seeding, and listing
* docs: add Excalidraw diagram of full credential pool flow
Comprehensive architecture diagram showing:
- Credential sources (env vars, auth.json OAuth, config.yaml, CLI)
- Pool storage and auto-seeding
- Runtime resolution paths (registry, custom, OpenRouter)
- Error recovery (429 retry-then-rotate, 402 immediate, 401 refresh)
- CLI management commands and strategy configuration
Open at: https://excalidraw.com/#json=2Ycqhqpi6f12E_3ITyiwh,c7u9jSt5BwrmiVzHGbm87g
* fix(tests): update setup wizard pool tests for unified select_provider_and_model flow
The setup wizard now delegates to select_provider_and_model() instead
of using its own prompt_choice-based provider picker. Tests needed:
- Mock select_provider_and_model as no-op (provider pre-written to config)
- Call _stub_tts BEFORE custom prompt_choice mock (it overwrites it)
- Pre-write model.provider to config so the pool step is reached
* docs: add comprehensive credential pool documentation
- New page: website/docs/user-guide/features/credential-pools.md
Full guide covering quick start, CLI commands, rotation strategies,
error recovery, custom endpoint pools, auto-discovery, thread safety,
architecture, and storage format.
- Updated fallback-providers.md to reference credential pools as the
first layer of resilience (same-provider rotation before cross-provider)
- Added hermes auth to CLI commands reference with usage examples
- Added credential_pool_strategies to configuration guide
* chore: remove excalidraw diagram from repo (external link only)
* refactor: simplify credential pool code — extract helpers, collapse extras, dedup patterns
- _load_config_safe(): replace 4 identical try/except/import blocks
- _iter_custom_providers(): shared generator for custom provider iteration
- PooledCredential.extra dict: collapse 11 round-trip-only fields
(token_type, scope, client_id, portal_base_url, obtained_at,
expires_in, agent_key_id, agent_key_expires_in, agent_key_reused,
agent_key_obtained_at, tls) into a single extra dict with
__getattr__ for backward-compatible access
- _available_entries(): shared exhaustion-check between select and peek
- Dedup anthropic OAuth seeding (hermes_pkce + claude_code identical)
- SimpleNamespace replaces class _Args boilerplate in auth_commands
- _try_resolve_from_custom_pool(): shared pool-check in runtime_provider
Net -17 lines. All 383 targeted tests pass.
---------
Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
2026-03-31 03:10:01 -07:00
|
|
|
def test_pool_entry_takes_priority_over_legacy_resolution(self):
|
|
|
|
|
class _Entry:
|
|
|
|
|
access_token = "sk-ant-oat01-pooled"
|
|
|
|
|
base_url = "https://api.anthropic.com"
|
|
|
|
|
|
|
|
|
|
class _Pool:
|
|
|
|
|
def has_credentials(self):
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def select(self):
|
|
|
|
|
return _Entry()
|
|
|
|
|
|
|
|
|
|
with (
|
|
|
|
|
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
|
|
|
|
|
patch("agent.anthropic_adapter.resolve_anthropic_token", side_effect=AssertionError("legacy path should not run")),
|
|
|
|
|
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()) as mock_build,
|
|
|
|
|
):
|
|
|
|
|
from agent.auxiliary_client import _try_anthropic
|
|
|
|
|
|
|
|
|
|
client, model = _try_anthropic()
|
|
|
|
|
|
|
|
|
|
assert client is not None
|
|
|
|
|
assert model == "claude-haiku-4-5-20251001"
|
|
|
|
|
assert mock_build.call_args.args[0] == "sk-ant-oat01-pooled"
|
|
|
|
|
|
2026-03-21 17:36:25 -07:00
|
|
|
|
|
|
|
|
class TestExpiredCodexFallback:
|
|
|
|
|
"""Test that expired Codex tokens don't block the auto chain."""
|
|
|
|
|
|
|
|
|
|
def test_expired_codex_falls_through_to_next(self, tmp_path, monkeypatch):
|
|
|
|
|
"""When Codex token is expired, auto chain should skip it and try next provider."""
|
|
|
|
|
import base64
|
|
|
|
|
import time as _time
|
|
|
|
|
|
|
|
|
|
# Expired Codex JWT
|
|
|
|
|
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
|
|
|
|
|
payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode()
|
|
|
|
|
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
|
|
|
|
|
expired_jwt = f"{header}.{payload}.fakesig"
|
|
|
|
|
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({
|
|
|
|
|
"version": 1,
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai-codex": {
|
|
|
|
|
"tokens": {"access_token": expired_jwt, "refresh_token": "r"},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}))
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
|
|
|
|
|
# Set up Anthropic as fallback
|
|
|
|
|
monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-test-fallback")
|
|
|
|
|
with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
|
|
|
|
|
mock_build.return_value = MagicMock()
|
|
|
|
|
from agent.auxiliary_client import _resolve_auto, AnthropicAuxiliaryClient
|
|
|
|
|
client, model = _resolve_auto()
|
|
|
|
|
# Should NOT be Codex, should be Anthropic (or another available provider)
|
|
|
|
|
assert not isinstance(client, type(None)), "Should find a provider after expired Codex"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_expired_codex_openrouter_wins(self, tmp_path, monkeypatch):
|
|
|
|
|
"""With expired Codex + OpenRouter key, OpenRouter should win (1st in chain)."""
|
|
|
|
|
import base64
|
|
|
|
|
import time as _time
|
|
|
|
|
|
|
|
|
|
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
|
|
|
|
|
payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode()
|
|
|
|
|
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
|
|
|
|
|
expired_jwt = f"{header}.{payload}.fakesig"
|
|
|
|
|
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({
|
|
|
|
|
"version": 1,
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai-codex": {
|
|
|
|
|
"tokens": {"access_token": expired_jwt, "refresh_token": "r"},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}))
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key")
|
|
|
|
|
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
mock_openai.return_value = MagicMock()
|
|
|
|
|
from agent.auxiliary_client import _resolve_auto
|
|
|
|
|
client, model = _resolve_auto()
|
|
|
|
|
assert client is not None
|
|
|
|
|
# OpenRouter is 1st in chain, should win
|
|
|
|
|
mock_openai.assert_called()
|
|
|
|
|
|
|
|
|
|
def test_expired_codex_custom_endpoint_wins(self, tmp_path, monkeypatch):
|
|
|
|
|
"""With expired Codex + custom endpoint (Ollama), custom should win (3rd in chain)."""
|
|
|
|
|
import base64
|
|
|
|
|
import time as _time
|
|
|
|
|
|
|
|
|
|
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
|
|
|
|
|
payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode()
|
|
|
|
|
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
|
|
|
|
|
expired_jwt = f"{header}.{payload}.fakesig"
|
|
|
|
|
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({
|
|
|
|
|
"version": 1,
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai-codex": {
|
|
|
|
|
"tokens": {"access_token": expired_jwt, "refresh_token": "r"},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}))
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
|
|
|
|
|
# Simulate Ollama or custom endpoint
|
|
|
|
|
with patch("agent.auxiliary_client._resolve_custom_runtime",
|
|
|
|
|
return_value=("http://localhost:11434/v1", "sk-dummy")):
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
mock_openai.return_value = MagicMock()
|
|
|
|
|
from agent.auxiliary_client import _resolve_auto
|
|
|
|
|
client, model = _resolve_auto()
|
|
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_hermes_oauth_file_sets_oauth_flag(self, monkeypatch):
|
2026-04-02 08:43:06 -07:00
|
|
|
"""OAuth-style tokens should get is_oauth=*** (token is not sk-ant-api-*)."""
|
2026-03-21 17:36:25 -07:00
|
|
|
# Mock resolve_anthropic_token to return an OAuth-style token
|
2026-03-30 20:36:56 -07:00
|
|
|
with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="hermes-oauth-jwt-token"), \
|
2026-04-02 08:43:06 -07:00
|
|
|
patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
|
|
|
|
|
patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
|
2026-03-21 17:36:25 -07:00
|
|
|
mock_build.return_value = MagicMock()
|
|
|
|
|
from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient
|
|
|
|
|
client, model = _try_anthropic()
|
|
|
|
|
assert client is not None, "Should resolve token"
|
|
|
|
|
adapter = client.chat.completions
|
|
|
|
|
assert adapter._is_oauth is True, "Non-sk-ant-api token should set is_oauth=True"
|
|
|
|
|
|
|
|
|
|
def test_jwt_missing_exp_passes_through(self, tmp_path, monkeypatch):
|
|
|
|
|
"""JWT with valid JSON but no exp claim should pass through."""
|
|
|
|
|
import base64
|
|
|
|
|
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
|
|
|
|
|
payload_data = json.dumps({"sub": "user123"}).encode() # no exp
|
|
|
|
|
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
|
|
|
|
|
no_exp_jwt = f"{header}.{payload}.fakesig"
|
|
|
|
|
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({
|
|
|
|
|
"version": 1,
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai-codex": {
|
|
|
|
|
"tokens": {"access_token": no_exp_jwt, "refresh_token": "r"},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}))
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
result = _read_codex_access_token()
|
|
|
|
|
assert result == no_exp_jwt, "JWT without exp should pass through"
|
|
|
|
|
|
|
|
|
|
def test_jwt_invalid_json_payload_passes_through(self, tmp_path, monkeypatch):
|
|
|
|
|
"""JWT with valid base64 but invalid JSON payload should pass through."""
|
|
|
|
|
import base64
|
|
|
|
|
header = base64.urlsafe_b64encode(b'{"alg":"RS256"}').rstrip(b"=").decode()
|
|
|
|
|
payload = base64.urlsafe_b64encode(b"not-json-content").rstrip(b"=").decode()
|
|
|
|
|
bad_jwt = f"{header}.{payload}.fakesig"
|
|
|
|
|
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "auth.json").write_text(json.dumps({
|
|
|
|
|
"version": 1,
|
|
|
|
|
"providers": {
|
|
|
|
|
"openai-codex": {
|
|
|
|
|
"tokens": {"access_token": bad_jwt, "refresh_token": "r"},
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}))
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
result = _read_codex_access_token()
|
|
|
|
|
assert result == bad_jwt, "JWT with invalid JSON payload should pass through"
|
|
|
|
|
|
|
|
|
|
def test_claude_code_oauth_env_sets_flag(self, monkeypatch):
|
|
|
|
|
"""CLAUDE_CODE_OAUTH_TOKEN env var should get is_oauth=True."""
|
2026-03-30 20:36:56 -07:00
|
|
|
monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN", "cc-oauth-token-test")
|
2026-03-21 17:36:25 -07:00
|
|
|
monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
|
|
|
|
|
with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
|
|
|
|
|
mock_build.return_value = MagicMock()
|
|
|
|
|
from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient
|
|
|
|
|
client, model = _try_anthropic()
|
|
|
|
|
assert client is not None
|
|
|
|
|
adapter = client.chat.completions
|
|
|
|
|
assert adapter._is_oauth is True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestExplicitProviderRouting:
|
|
|
|
|
"""Test explicit provider selection bypasses auto chain correctly."""
|
|
|
|
|
|
|
|
|
|
def test_explicit_anthropic_oauth(self, monkeypatch):
|
|
|
|
|
"""provider='anthropic' + OAuth token should work with is_oauth=True."""
|
|
|
|
|
monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-explicit-test")
|
|
|
|
|
with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
|
|
|
|
|
mock_build.return_value = MagicMock()
|
|
|
|
|
client, model = resolve_provider_client("anthropic")
|
|
|
|
|
assert client is not None
|
|
|
|
|
# Verify OAuth flag propagated
|
|
|
|
|
adapter = client.chat.completions
|
|
|
|
|
assert adapter._is_oauth is True
|
|
|
|
|
|
|
|
|
|
def test_explicit_anthropic_api_key(self, monkeypatch):
|
|
|
|
|
"""provider='anthropic' + regular API key should work with is_oauth=False."""
|
|
|
|
|
with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api-regular-key"), \
|
feat(auth): same-provider credential pools with rotation, custom endpoint support, and interactive CLI (#2647)
* feat(auth): add same-provider credential pools and rotation UX
Add same-provider credential pooling so Hermes can rotate across
multiple credentials for a single provider, recover from exhausted
credentials without jumping providers immediately, and configure
that behavior directly in hermes setup.
- agent/credential_pool.py: persisted per-provider credential pools
- hermes auth add/list/remove/reset CLI commands
- 429/402/401 recovery with pool rotation in run_agent.py
- Setup wizard integration for pool strategy configuration
- Auto-seeding from env vars and existing OAuth state
Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Salvaged from PR #2647
* fix(tests): prevent pool auto-seeding from host env in credential pool tests
Tests for non-pool Anthropic paths and auth remove were failing when
host env vars (ANTHROPIC_API_KEY) or file-backed OAuth credentials
were present. The pool auto-seeding picked these up, causing unexpected
pool entries in tests.
- Mock _select_pool_entry in auxiliary_client OAuth flag tests
- Clear Anthropic env vars and mock _seed_from_singletons in auth remove test
* feat(auth): add thread safety, least_used strategy, and request counting
- Add threading.Lock to CredentialPool for gateway thread safety
(concurrent requests from multiple gateway sessions could race on
pool state mutations without this)
- Add 'least_used' rotation strategy that selects the credential
with the lowest request_count, distributing load more evenly
- Add request_count field to PooledCredential for usage tracking
- Add mark_used() method to increment per-credential request counts
- Wrap select(), mark_exhausted_and_rotate(), and try_refresh_current()
with lock acquisition
- Add tests: least_used selection, mark_used counting, concurrent
thread safety (4 threads × 20 selects with no corruption)
* feat(auth): add interactive mode for bare 'hermes auth' command
When 'hermes auth' is called without a subcommand, it now launches an
interactive wizard that:
1. Shows full credential pool status across all providers
2. Offers a menu: add, remove, reset cooldowns, set strategy
3. For OAuth-capable providers (anthropic, nous, openai-codex), the
add flow explicitly asks 'API key or OAuth login?' — making it
clear that both auth types are supported for the same provider
4. Strategy picker shows all 4 options (fill_first, round_robin,
least_used, random) with the current selection marked
5. Remove flow shows entries with indices for easy selection
The subcommand paths (hermes auth add/list/remove/reset) still work
exactly as before for scripted/non-interactive use.
* fix(tests): update runtime_provider tests for config.yaml source of truth (#4165)
Tests were using OPENAI_BASE_URL env var which is no longer consulted
after #4165. Updated to use model config (provider, base_url, api_key)
which is the new single source of truth for custom endpoint URLs.
* feat(auth): support custom endpoint credential pools keyed by provider name
Custom OpenAI-compatible endpoints all share provider='custom', making
the provider-keyed pool useless. Now pools for custom endpoints are
keyed by 'custom:<normalized_name>' where the name comes from the
custom_providers config list (auto-generated from URL hostname).
- Pool key format: 'custom:together.ai', 'custom:local-(localhost:8080)'
- load_pool('custom:name') seeds from custom_providers api_key AND
model.api_key when base_url matches
- hermes auth add/list now shows custom endpoints alongside registry
providers
- _resolve_openrouter_runtime and _resolve_named_custom_runtime check
pool before falling back to single config key
- 6 new tests covering custom pool keying, seeding, and listing
* docs: add Excalidraw diagram of full credential pool flow
Comprehensive architecture diagram showing:
- Credential sources (env vars, auth.json OAuth, config.yaml, CLI)
- Pool storage and auto-seeding
- Runtime resolution paths (registry, custom, OpenRouter)
- Error recovery (429 retry-then-rotate, 402 immediate, 401 refresh)
- CLI management commands and strategy configuration
Open at: https://excalidraw.com/#json=2Ycqhqpi6f12E_3ITyiwh,c7u9jSt5BwrmiVzHGbm87g
* fix(tests): update setup wizard pool tests for unified select_provider_and_model flow
The setup wizard now delegates to select_provider_and_model() instead
of using its own prompt_choice-based provider picker. Tests needed:
- Mock select_provider_and_model as no-op (provider pre-written to config)
- Call _stub_tts BEFORE custom prompt_choice mock (it overwrites it)
- Pre-write model.provider to config so the pool step is reached
* docs: add comprehensive credential pool documentation
- New page: website/docs/user-guide/features/credential-pools.md
Full guide covering quick start, CLI commands, rotation strategies,
error recovery, custom endpoint pools, auto-discovery, thread safety,
architecture, and storage format.
- Updated fallback-providers.md to reference credential pools as the
first layer of resilience (same-provider rotation before cross-provider)
- Added hermes auth to CLI commands reference with usage examples
- Added credential_pool_strategies to configuration guide
* chore: remove excalidraw diagram from repo (external link only)
* refactor: simplify credential pool code — extract helpers, collapse extras, dedup patterns
- _load_config_safe(): replace 4 identical try/except/import blocks
- _iter_custom_providers(): shared generator for custom provider iteration
- PooledCredential.extra dict: collapse 11 round-trip-only fields
(token_type, scope, client_id, portal_base_url, obtained_at,
expires_in, agent_key_id, agent_key_expires_in, agent_key_reused,
agent_key_obtained_at, tls) into a single extra dict with
__getattr__ for backward-compatible access
- _available_entries(): shared exhaustion-check between select and peek
- Dedup anthropic OAuth seeding (hermes_pkce + claude_code identical)
- SimpleNamespace replaces class _Args boilerplate in auth_commands
- _try_resolve_from_custom_pool(): shared pool-check in runtime_provider
Net -17 lines. All 383 targeted tests pass.
---------
Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
2026-03-31 03:10:01 -07:00
|
|
|
patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
|
|
|
|
|
patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
|
2026-03-21 17:36:25 -07:00
|
|
|
mock_build.return_value = MagicMock()
|
|
|
|
|
client, model = resolve_provider_client("anthropic")
|
|
|
|
|
assert client is not None
|
|
|
|
|
adapter = client.chat.completions
|
|
|
|
|
assert adapter._is_oauth is False
|
|
|
|
|
|
|
|
|
|
def test_explicit_openrouter(self, monkeypatch):
|
|
|
|
|
"""provider='openrouter' should use OPENROUTER_API_KEY."""
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-explicit")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
mock_openai.return_value = MagicMock()
|
|
|
|
|
client, model = resolve_provider_client("openrouter")
|
|
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_explicit_kimi(self, monkeypatch):
|
|
|
|
|
"""provider='kimi-coding' should use KIMI_API_KEY."""
|
|
|
|
|
monkeypatch.setenv("KIMI_API_KEY", "kimi-test-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
mock_openai.return_value = MagicMock()
|
|
|
|
|
client, model = resolve_provider_client("kimi-coding")
|
|
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_explicit_minimax(self, monkeypatch):
|
|
|
|
|
"""provider='minimax' should use MINIMAX_API_KEY."""
|
|
|
|
|
monkeypatch.setenv("MINIMAX_API_KEY", "mm-test-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
mock_openai.return_value = MagicMock()
|
|
|
|
|
client, model = resolve_provider_client("minimax")
|
|
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_explicit_deepseek(self, monkeypatch):
|
|
|
|
|
"""provider='deepseek' should use DEEPSEEK_API_KEY."""
|
|
|
|
|
monkeypatch.setenv("DEEPSEEK_API_KEY", "ds-test-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
mock_openai.return_value = MagicMock()
|
|
|
|
|
client, model = resolve_provider_client("deepseek")
|
|
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_explicit_zai(self, monkeypatch):
|
|
|
|
|
"""provider='zai' should use GLM_API_KEY."""
|
|
|
|
|
monkeypatch.setenv("GLM_API_KEY", "zai-test-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
mock_openai.return_value = MagicMock()
|
|
|
|
|
client, model = resolve_provider_client("zai")
|
|
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_explicit_unknown_returns_none(self, monkeypatch):
|
|
|
|
|
"""Unknown provider should return None."""
|
|
|
|
|
client, model = resolve_provider_client("nonexistent-provider")
|
|
|
|
|
assert client is None
|
|
|
|
|
|
|
|
|
|
|
2026-02-28 21:47:51 -08:00
|
|
|
class TestGetTextAuxiliaryClient:
|
|
|
|
|
"""Test the full resolution chain for get_text_auxiliary_client."""
|
|
|
|
|
|
|
|
|
|
def test_openrouter_takes_priority(self, monkeypatch, codex_auth_dir):
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_text_auxiliary_client()
|
|
|
|
|
assert model == "google/gemini-3-flash-preview"
|
|
|
|
|
mock_openai.assert_called_once()
|
|
|
|
|
call_kwargs = mock_openai.call_args
|
|
|
|
|
assert call_kwargs.kwargs["api_key"] == "or-key"
|
|
|
|
|
|
|
|
|
|
def test_nous_takes_priority_over_codex(self, monkeypatch, codex_auth_dir):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
mock_nous.return_value = {"access_token": "nous-tok"}
|
|
|
|
|
client, model = get_text_auxiliary_client()
|
2026-03-26 13:49:43 -07:00
|
|
|
assert model == "google/gemini-3-flash-preview"
|
2026-02-28 21:47:51 -08:00
|
|
|
|
|
|
|
|
def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
|
refactor: make config.yaml the single source of truth for endpoint URLs (#4165)
OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.
Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
(both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars
Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
2026-03-30 22:02:53 -07:00
|
|
|
config = {
|
|
|
|
|
"model": {
|
|
|
|
|
"provider": "custom",
|
|
|
|
|
"base_url": "http://localhost:1234/v1",
|
|
|
|
|
"default": "my-local-model",
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-02-28 21:47:51 -08:00
|
|
|
monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
|
refactor: make config.yaml the single source of truth for endpoint URLs (#4165)
OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.
Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
(both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars
Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
2026-03-30 22:02:53 -07:00
|
|
|
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
|
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
|
2026-02-28 21:47:51 -08:00
|
|
|
# Override the autouse monkeypatch for codex
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
"agent.auxiliary_client._read_codex_access_token",
|
|
|
|
|
lambda: "codex-test-token-abc123",
|
|
|
|
|
)
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_text_auxiliary_client()
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
assert model == "my-local-model"
|
2026-02-28 21:47:51 -08:00
|
|
|
call_kwargs = mock_openai.call_args
|
|
|
|
|
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
|
|
|
|
|
|
2026-03-14 20:48:29 -07:00
|
|
|
def test_task_direct_endpoint_override(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key")
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_text_auxiliary_client("web_extract")
|
|
|
|
|
assert model == "task-model"
|
|
|
|
|
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
|
|
|
|
|
assert mock_openai.call_args.kwargs["api_key"] == "task-key"
|
|
|
|
|
|
2026-03-29 21:05:36 -07:00
|
|
|
def test_task_direct_endpoint_without_openai_key_uses_placeholder(self, monkeypatch):
|
|
|
|
|
"""Local endpoints without an API key should use 'no-key-required' placeholder."""
|
2026-03-14 20:48:29 -07:00
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_text_auxiliary_client("web_extract")
|
2026-03-29 21:05:36 -07:00
|
|
|
assert client is not None
|
|
|
|
|
assert model == "task-model"
|
|
|
|
|
assert mock_openai.call_args.kwargs["api_key"] == "no-key-required"
|
|
|
|
|
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
|
2026-03-14 20:48:29 -07:00
|
|
|
|
2026-03-14 20:58:12 -07:00
|
|
|
def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
|
|
|
|
|
config = {
|
|
|
|
|
"model": {
|
|
|
|
|
"provider": "custom",
|
|
|
|
|
"base_url": "http://localhost:1234/v1",
|
|
|
|
|
"default": "my-local-model",
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
|
|
|
|
|
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
|
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
|
|
|
|
|
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_text_auxiliary_client()
|
|
|
|
|
|
|
|
|
|
assert client is not None
|
|
|
|
|
assert model == "my-local-model"
|
|
|
|
|
call_kwargs = mock_openai.call_args
|
|
|
|
|
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
|
|
|
|
|
|
2026-02-28 21:47:51 -08:00
|
|
|
def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_text_auxiliary_client()
|
2026-03-14 23:21:09 -07:00
|
|
|
assert model == "gpt-5.2-codex"
|
2026-02-28 21:47:51 -08:00
|
|
|
# Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
|
|
|
|
|
from agent.auxiliary_client import CodexAuxiliaryClient
|
|
|
|
|
assert isinstance(client, CodexAuxiliaryClient)
|
|
|
|
|
|
feat(auth): same-provider credential pools with rotation, custom endpoint support, and interactive CLI (#2647)
* feat(auth): add same-provider credential pools and rotation UX
Add same-provider credential pooling so Hermes can rotate across
multiple credentials for a single provider, recover from exhausted
credentials without jumping providers immediately, and configure
that behavior directly in hermes setup.
- agent/credential_pool.py: persisted per-provider credential pools
- hermes auth add/list/remove/reset CLI commands
- 429/402/401 recovery with pool rotation in run_agent.py
- Setup wizard integration for pool strategy configuration
- Auto-seeding from env vars and existing OAuth state
Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Salvaged from PR #2647
* fix(tests): prevent pool auto-seeding from host env in credential pool tests
Tests for non-pool Anthropic paths and auth remove were failing when
host env vars (ANTHROPIC_API_KEY) or file-backed OAuth credentials
were present. The pool auto-seeding picked these up, causing unexpected
pool entries in tests.
- Mock _select_pool_entry in auxiliary_client OAuth flag tests
- Clear Anthropic env vars and mock _seed_from_singletons in auth remove test
* feat(auth): add thread safety, least_used strategy, and request counting
- Add threading.Lock to CredentialPool for gateway thread safety
(concurrent requests from multiple gateway sessions could race on
pool state mutations without this)
- Add 'least_used' rotation strategy that selects the credential
with the lowest request_count, distributing load more evenly
- Add request_count field to PooledCredential for usage tracking
- Add mark_used() method to increment per-credential request counts
- Wrap select(), mark_exhausted_and_rotate(), and try_refresh_current()
with lock acquisition
- Add tests: least_used selection, mark_used counting, concurrent
thread safety (4 threads × 20 selects with no corruption)
* feat(auth): add interactive mode for bare 'hermes auth' command
When 'hermes auth' is called without a subcommand, it now launches an
interactive wizard that:
1. Shows full credential pool status across all providers
2. Offers a menu: add, remove, reset cooldowns, set strategy
3. For OAuth-capable providers (anthropic, nous, openai-codex), the
add flow explicitly asks 'API key or OAuth login?' — making it
clear that both auth types are supported for the same provider
4. Strategy picker shows all 4 options (fill_first, round_robin,
least_used, random) with the current selection marked
5. Remove flow shows entries with indices for easy selection
The subcommand paths (hermes auth add/list/remove/reset) still work
exactly as before for scripted/non-interactive use.
* fix(tests): update runtime_provider tests for config.yaml source of truth (#4165)
Tests were using OPENAI_BASE_URL env var which is no longer consulted
after #4165. Updated to use model config (provider, base_url, api_key)
which is the new single source of truth for custom endpoint URLs.
* feat(auth): support custom endpoint credential pools keyed by provider name
Custom OpenAI-compatible endpoints all share provider='custom', making
the provider-keyed pool useless. Now pools for custom endpoints are
keyed by 'custom:<normalized_name>' where the name comes from the
custom_providers config list (auto-generated from URL hostname).
- Pool key format: 'custom:together.ai', 'custom:local-(localhost:8080)'
- load_pool('custom:name') seeds from custom_providers api_key AND
model.api_key when base_url matches
- hermes auth add/list now shows custom endpoints alongside registry
providers
- _resolve_openrouter_runtime and _resolve_named_custom_runtime check
pool before falling back to single config key
- 6 new tests covering custom pool keying, seeding, and listing
* docs: add Excalidraw diagram of full credential pool flow
Comprehensive architecture diagram showing:
- Credential sources (env vars, auth.json OAuth, config.yaml, CLI)
- Pool storage and auto-seeding
- Runtime resolution paths (registry, custom, OpenRouter)
- Error recovery (429 retry-then-rotate, 402 immediate, 401 refresh)
- CLI management commands and strategy configuration
Open at: https://excalidraw.com/#json=2Ycqhqpi6f12E_3ITyiwh,c7u9jSt5BwrmiVzHGbm87g
* fix(tests): update setup wizard pool tests for unified select_provider_and_model flow
The setup wizard now delegates to select_provider_and_model() instead
of using its own prompt_choice-based provider picker. Tests needed:
- Mock select_provider_and_model as no-op (provider pre-written to config)
- Call _stub_tts BEFORE custom prompt_choice mock (it overwrites it)
- Pre-write model.provider to config so the pool step is reached
* docs: add comprehensive credential pool documentation
- New page: website/docs/user-guide/features/credential-pools.md
Full guide covering quick start, CLI commands, rotation strategies,
error recovery, custom endpoint pools, auto-discovery, thread safety,
architecture, and storage format.
- Updated fallback-providers.md to reference credential pools as the
first layer of resilience (same-provider rotation before cross-provider)
- Added hermes auth to CLI commands reference with usage examples
- Added credential_pool_strategies to configuration guide
* chore: remove excalidraw diagram from repo (external link only)
* refactor: simplify credential pool code — extract helpers, collapse extras, dedup patterns
- _load_config_safe(): replace 4 identical try/except/import blocks
- _iter_custom_providers(): shared generator for custom provider iteration
- PooledCredential.extra dict: collapse 11 round-trip-only fields
(token_type, scope, client_id, portal_base_url, obtained_at,
expires_in, agent_key_id, agent_key_expires_in, agent_key_reused,
agent_key_obtained_at, tls) into a single extra dict with
__getattr__ for backward-compatible access
- _available_entries(): shared exhaustion-check between select and peek
- Dedup anthropic OAuth seeding (hermes_pkce + claude_code identical)
- SimpleNamespace replaces class _Args boilerplate in auth_commands
- _try_resolve_from_custom_pool(): shared pool-check in runtime_provider
Net -17 lines. All 383 targeted tests pass.
---------
Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
2026-03-31 03:10:01 -07:00
|
|
|
def test_codex_pool_entry_takes_priority_over_auth_store(self):
|
|
|
|
|
class _Entry:
|
|
|
|
|
access_token = "pooled-codex-token"
|
|
|
|
|
base_url = "https://chatgpt.com/backend-api/codex"
|
|
|
|
|
|
|
|
|
|
class _Pool:
|
|
|
|
|
def has_credentials(self):
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def select(self):
|
|
|
|
|
return _Entry()
|
|
|
|
|
|
|
|
|
|
with (
|
|
|
|
|
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"),
|
|
|
|
|
patch("hermes_cli.auth._read_codex_tokens", side_effect=AssertionError("legacy codex store should not run")),
|
|
|
|
|
):
|
|
|
|
|
from agent.auxiliary_client import _try_codex
|
|
|
|
|
|
|
|
|
|
client, model = _try_codex()
|
|
|
|
|
|
|
|
|
|
from agent.auxiliary_client import CodexAuxiliaryClient
|
|
|
|
|
|
|
|
|
|
assert isinstance(client, CodexAuxiliaryClient)
|
|
|
|
|
assert model == "gpt-5.2-codex"
|
|
|
|
|
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
def test_returns_none_when_nothing_available(self, monkeypatch):
|
|
|
|
|
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
|
|
|
|
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
|
|
|
|
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
2026-02-28 21:47:51 -08:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
|
2026-02-28 21:47:51 -08:00
|
|
|
client, model = get_text_auxiliary_client()
|
|
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
|
|
|
|
|
2026-03-07 08:52:06 -08:00
|
|
|
class TestVisionClientFallback:
|
2026-03-14 21:14:20 -07:00
|
|
|
"""Vision client auto mode resolves known-good multimodal backends."""
|
2026-02-28 21:47:51 -08:00
|
|
|
|
2026-03-07 08:52:06 -08:00
|
|
|
def test_vision_returns_none_without_any_credentials(self):
|
2026-03-14 21:14:20 -07:00
|
|
|
with (
|
|
|
|
|
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
|
|
|
|
patch("agent.auxiliary_client._try_anthropic", return_value=(None, None)),
|
|
|
|
|
):
|
2026-02-28 21:47:51 -08:00
|
|
|
client, model = get_vision_auxiliary_client()
|
|
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
2026-03-14 21:14:20 -07:00
|
|
|
def test_vision_auto_includes_anthropic_when_configured(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
|
|
|
|
|
with (
|
|
|
|
|
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
|
|
|
|
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
|
|
|
|
|
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
|
|
|
|
|
):
|
|
|
|
|
backends = get_available_vision_backends()
|
|
|
|
|
|
|
|
|
|
assert "anthropic" in backends
|
|
|
|
|
|
|
|
|
|
def test_resolve_provider_client_returns_native_anthropic_wrapper(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
|
|
|
|
|
with (
|
|
|
|
|
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
|
|
|
|
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
|
|
|
|
|
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
|
|
|
|
|
):
|
|
|
|
|
client, model = resolve_provider_client("anthropic")
|
|
|
|
|
|
|
|
|
|
assert client is not None
|
|
|
|
|
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
|
|
|
|
|
assert model == "claude-haiku-4-5-20251001"
|
|
|
|
|
|
feat(auth): same-provider credential pools with rotation, custom endpoint support, and interactive CLI (#2647)
* feat(auth): add same-provider credential pools and rotation UX
Add same-provider credential pooling so Hermes can rotate across
multiple credentials for a single provider, recover from exhausted
credentials without jumping providers immediately, and configure
that behavior directly in hermes setup.
- agent/credential_pool.py: persisted per-provider credential pools
- hermes auth add/list/remove/reset CLI commands
- 429/402/401 recovery with pool rotation in run_agent.py
- Setup wizard integration for pool strategy configuration
- Auto-seeding from env vars and existing OAuth state
Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Salvaged from PR #2647
* fix(tests): prevent pool auto-seeding from host env in credential pool tests
Tests for non-pool Anthropic paths and auth remove were failing when
host env vars (ANTHROPIC_API_KEY) or file-backed OAuth credentials
were present. The pool auto-seeding picked these up, causing unexpected
pool entries in tests.
- Mock _select_pool_entry in auxiliary_client OAuth flag tests
- Clear Anthropic env vars and mock _seed_from_singletons in auth remove test
* feat(auth): add thread safety, least_used strategy, and request counting
- Add threading.Lock to CredentialPool for gateway thread safety
(concurrent requests from multiple gateway sessions could race on
pool state mutations without this)
- Add 'least_used' rotation strategy that selects the credential
with the lowest request_count, distributing load more evenly
- Add request_count field to PooledCredential for usage tracking
- Add mark_used() method to increment per-credential request counts
- Wrap select(), mark_exhausted_and_rotate(), and try_refresh_current()
with lock acquisition
- Add tests: least_used selection, mark_used counting, concurrent
thread safety (4 threads × 20 selects with no corruption)
* feat(auth): add interactive mode for bare 'hermes auth' command
When 'hermes auth' is called without a subcommand, it now launches an
interactive wizard that:
1. Shows full credential pool status across all providers
2. Offers a menu: add, remove, reset cooldowns, set strategy
3. For OAuth-capable providers (anthropic, nous, openai-codex), the
add flow explicitly asks 'API key or OAuth login?' — making it
clear that both auth types are supported for the same provider
4. Strategy picker shows all 4 options (fill_first, round_robin,
least_used, random) with the current selection marked
5. Remove flow shows entries with indices for easy selection
The subcommand paths (hermes auth add/list/remove/reset) still work
exactly as before for scripted/non-interactive use.
* fix(tests): update runtime_provider tests for config.yaml source of truth (#4165)
Tests were using OPENAI_BASE_URL env var which is no longer consulted
after #4165. Updated to use model config (provider, base_url, api_key)
which is the new single source of truth for custom endpoint URLs.
* feat(auth): support custom endpoint credential pools keyed by provider name
Custom OpenAI-compatible endpoints all share provider='custom', making
the provider-keyed pool useless. Now pools for custom endpoints are
keyed by 'custom:<normalized_name>' where the name comes from the
custom_providers config list (auto-generated from URL hostname).
- Pool key format: 'custom:together.ai', 'custom:local-(localhost:8080)'
- load_pool('custom:name') seeds from custom_providers api_key AND
model.api_key when base_url matches
- hermes auth add/list now shows custom endpoints alongside registry
providers
- _resolve_openrouter_runtime and _resolve_named_custom_runtime check
pool before falling back to single config key
- 6 new tests covering custom pool keying, seeding, and listing
* docs: add Excalidraw diagram of full credential pool flow
Comprehensive architecture diagram showing:
- Credential sources (env vars, auth.json OAuth, config.yaml, CLI)
- Pool storage and auto-seeding
- Runtime resolution paths (registry, custom, OpenRouter)
- Error recovery (429 retry-then-rotate, 402 immediate, 401 refresh)
- CLI management commands and strategy configuration
Open at: https://excalidraw.com/#json=2Ycqhqpi6f12E_3ITyiwh,c7u9jSt5BwrmiVzHGbm87g
* fix(tests): update setup wizard pool tests for unified select_provider_and_model flow
The setup wizard now delegates to select_provider_and_model() instead
of using its own prompt_choice-based provider picker. Tests needed:
- Mock select_provider_and_model as no-op (provider pre-written to config)
- Call _stub_tts BEFORE custom prompt_choice mock (it overwrites it)
- Pre-write model.provider to config so the pool step is reached
* docs: add comprehensive credential pool documentation
- New page: website/docs/user-guide/features/credential-pools.md
Full guide covering quick start, CLI commands, rotation strategies,
error recovery, custom endpoint pools, auto-discovery, thread safety,
architecture, and storage format.
- Updated fallback-providers.md to reference credential pools as the
first layer of resilience (same-provider rotation before cross-provider)
- Added hermes auth to CLI commands reference with usage examples
- Added credential_pool_strategies to configuration guide
* chore: remove excalidraw diagram from repo (external link only)
* refactor: simplify credential pool code — extract helpers, collapse extras, dedup patterns
- _load_config_safe(): replace 4 identical try/except/import blocks
- _iter_custom_providers(): shared generator for custom provider iteration
- PooledCredential.extra dict: collapse 11 round-trip-only fields
(token_type, scope, client_id, portal_base_url, obtained_at,
expires_in, agent_key_id, agent_key_expires_in, agent_key_reused,
agent_key_obtained_at, tls) into a single extra dict with
__getattr__ for backward-compatible access
- _available_entries(): shared exhaustion-check between select and peek
- Dedup anthropic OAuth seeding (hermes_pkce + claude_code identical)
- SimpleNamespace replaces class _Args boilerplate in auth_commands
- _try_resolve_from_custom_pool(): shared pool-check in runtime_provider
Net -17 lines. All 383 targeted tests pass.
---------
Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
2026-03-31 03:10:01 -07:00
|
|
|
|
|
|
|
|
class TestAuxiliaryPoolAwareness:
|
|
|
|
|
def test_try_nous_uses_pool_entry(self):
|
|
|
|
|
class _Entry:
|
|
|
|
|
access_token = "pooled-access-token"
|
|
|
|
|
agent_key = "pooled-agent-key"
|
|
|
|
|
inference_base_url = "https://inference.pool.example/v1"
|
|
|
|
|
|
|
|
|
|
class _Pool:
|
|
|
|
|
def has_credentials(self):
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def select(self):
|
|
|
|
|
return _Entry()
|
|
|
|
|
|
|
|
|
|
with (
|
|
|
|
|
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
|
|
|
|
):
|
|
|
|
|
from agent.auxiliary_client import _try_nous
|
|
|
|
|
|
|
|
|
|
client, model = _try_nous()
|
|
|
|
|
|
|
|
|
|
assert client is not None
|
|
|
|
|
assert model == "gemini-3-flash"
|
|
|
|
|
call_kwargs = mock_openai.call_args.kwargs
|
|
|
|
|
assert call_kwargs["api_key"] == "pooled-agent-key"
|
|
|
|
|
assert call_kwargs["base_url"] == "https://inference.pool.example/v1"
|
|
|
|
|
|
2026-03-17 23:40:22 -07:00
|
|
|
def test_resolve_provider_client_copilot_uses_runtime_credentials(self, monkeypatch):
|
|
|
|
|
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
|
|
|
|
|
monkeypatch.delenv("GH_TOKEN", raising=False)
|
|
|
|
|
|
|
|
|
|
with (
|
|
|
|
|
patch(
|
|
|
|
|
"hermes_cli.auth.resolve_api_key_provider_credentials",
|
|
|
|
|
return_value={
|
|
|
|
|
"provider": "copilot",
|
|
|
|
|
"api_key": "gh-cli-token",
|
|
|
|
|
"base_url": "https://api.githubcopilot.com",
|
|
|
|
|
"source": "gh auth token",
|
|
|
|
|
},
|
|
|
|
|
),
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
|
|
|
|
):
|
|
|
|
|
client, model = resolve_provider_client("copilot", model="gpt-5.4")
|
|
|
|
|
|
|
|
|
|
assert client is not None
|
|
|
|
|
assert model == "gpt-5.4"
|
|
|
|
|
call_kwargs = mock_openai.call_args.kwargs
|
|
|
|
|
assert call_kwargs["api_key"] == "gh-cli-token"
|
|
|
|
|
assert call_kwargs["base_url"] == "https://api.githubcopilot.com"
|
|
|
|
|
assert call_kwargs["default_headers"]["Editor-Version"]
|
|
|
|
|
|
2026-03-14 21:14:20 -07:00
|
|
|
def test_vision_auto_uses_anthropic_when_no_higher_priority_backend(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
|
|
|
|
|
with (
|
|
|
|
|
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
|
|
|
|
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
|
|
|
|
|
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
|
|
|
|
|
):
|
|
|
|
|
client, model = get_vision_auxiliary_client()
|
|
|
|
|
|
|
|
|
|
assert client is not None
|
|
|
|
|
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
|
|
|
|
|
assert model == "claude-haiku-4-5-20251001"
|
|
|
|
|
|
|
|
|
|
def test_selected_anthropic_provider_is_preferred_for_vision_auto(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
|
|
|
|
|
|
|
|
|
|
def fake_load_config():
|
|
|
|
|
return {"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"}}
|
|
|
|
|
|
|
|
|
|
with (
|
|
|
|
|
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
|
|
|
|
|
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
|
|
|
|
|
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
|
|
|
|
patch("hermes_cli.config.load_config", fake_load_config),
|
|
|
|
|
):
|
|
|
|
|
client, model = get_vision_auxiliary_client()
|
|
|
|
|
|
|
|
|
|
assert client is not None
|
|
|
|
|
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
|
|
|
|
|
assert model == "claude-haiku-4-5-20251001"
|
|
|
|
|
|
2026-03-27 07:49:44 -07:00
|
|
|
def test_selected_codex_provider_short_circuits_vision_auto(self, monkeypatch):
|
|
|
|
|
def fake_load_config():
|
|
|
|
|
return {"model": {"provider": "openai-codex", "default": "gpt-5.2-codex"}}
|
|
|
|
|
|
|
|
|
|
codex_client = MagicMock()
|
|
|
|
|
with (
|
|
|
|
|
patch("hermes_cli.config.load_config", fake_load_config),
|
|
|
|
|
patch("agent.auxiliary_client._try_codex", return_value=(codex_client, "gpt-5.2-codex")) as mock_codex,
|
|
|
|
|
patch("agent.auxiliary_client._try_openrouter") as mock_openrouter,
|
|
|
|
|
patch("agent.auxiliary_client._try_nous") as mock_nous,
|
|
|
|
|
patch("agent.auxiliary_client._try_anthropic") as mock_anthropic,
|
|
|
|
|
patch("agent.auxiliary_client._try_custom_endpoint") as mock_custom,
|
|
|
|
|
):
|
|
|
|
|
provider, client, model = resolve_vision_provider_client()
|
|
|
|
|
|
|
|
|
|
assert provider == "openai-codex"
|
|
|
|
|
assert client is codex_client
|
|
|
|
|
assert model == "gpt-5.2-codex"
|
|
|
|
|
mock_codex.assert_called_once()
|
|
|
|
|
mock_openrouter.assert_not_called()
|
|
|
|
|
mock_nous.assert_not_called()
|
|
|
|
|
mock_anthropic.assert_not_called()
|
|
|
|
|
mock_custom.assert_not_called()
|
|
|
|
|
|
2026-03-08 18:44:25 -07:00
|
|
|
def test_vision_auto_includes_codex(self, codex_auth_dir):
|
|
|
|
|
"""Codex supports vision (gpt-5.3-codex), so auto mode should use it."""
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
2026-03-08 18:06:40 -07:00
|
|
|
client, model = get_vision_auxiliary_client()
|
2026-03-08 18:44:25 -07:00
|
|
|
from agent.auxiliary_client import CodexAuxiliaryClient
|
|
|
|
|
assert isinstance(client, CodexAuxiliaryClient)
|
2026-03-14 23:21:09 -07:00
|
|
|
assert model == "gpt-5.2-codex"
|
2026-03-08 18:06:40 -07:00
|
|
|
|
2026-03-09 15:36:19 -07:00
|
|
|
def test_vision_auto_falls_back_to_custom_endpoint(self, monkeypatch):
|
|
|
|
|
"""Custom endpoint is used as fallback in vision auto mode.
|
|
|
|
|
|
|
|
|
|
Many local models (Qwen-VL, LLaVA, etc.) support vision.
|
|
|
|
|
When no OpenRouter/Nous/Codex is available, try the custom endpoint.
|
|
|
|
|
"""
|
2026-04-02 08:43:06 -07:00
|
|
|
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
|
|
|
|
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
2026-03-09 15:36:19 -07:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
2026-04-02 08:43:06 -07:00
|
|
|
patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)), \
|
|
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._resolve_custom_runtime",
|
|
|
|
|
return_value=("http://localhost:1234/v1", "local-key")), \
|
2026-03-09 15:36:19 -07:00
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
2026-03-08 18:06:40 -07:00
|
|
|
client, model = get_vision_auxiliary_client()
|
2026-03-09 15:36:19 -07:00
|
|
|
assert client is not None # Custom endpoint picked up as fallback
|
2026-03-08 18:06:40 -07:00
|
|
|
|
2026-03-14 20:48:29 -07:00
|
|
|
def test_vision_direct_endpoint_override(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_API_KEY", "vision-key")
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_vision_auxiliary_client()
|
|
|
|
|
assert model == "vision-model"
|
|
|
|
|
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:4567/v1"
|
|
|
|
|
assert mock_openai.call_args.kwargs["api_key"] == "vision-key"
|
|
|
|
|
|
2026-03-29 21:05:36 -07:00
|
|
|
def test_vision_direct_endpoint_without_key_uses_placeholder(self, monkeypatch):
|
|
|
|
|
"""Vision endpoint without API key should use 'no-key-required' placeholder."""
|
2026-03-14 20:48:29 -07:00
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_vision_auxiliary_client()
|
2026-03-29 21:05:36 -07:00
|
|
|
assert client is not None
|
|
|
|
|
assert model == "vision-model"
|
|
|
|
|
assert mock_openai.call_args.kwargs["api_key"] == "no-key-required"
|
2026-03-14 20:48:29 -07:00
|
|
|
|
2026-03-08 18:06:40 -07:00
|
|
|
def test_vision_uses_openrouter_when_available(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_vision_auxiliary_client()
|
|
|
|
|
assert model == "google/gemini-3-flash-preview"
|
|
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_vision_uses_nous_when_available(self, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
mock_nous.return_value = {"access_token": "nous-tok"}
|
|
|
|
|
client, model = get_vision_auxiliary_client()
|
2026-03-26 13:49:43 -07:00
|
|
|
assert model == "google/gemini-3-flash-preview"
|
2026-03-08 18:06:40 -07:00
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_vision_forced_main_uses_custom_endpoint(self, monkeypatch):
|
|
|
|
|
"""When explicitly forced to 'main', vision CAN use custom endpoint."""
|
refactor: make config.yaml the single source of truth for endpoint URLs (#4165)
OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.
Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
(both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars
Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
2026-03-30 22:02:53 -07:00
|
|
|
config = {
|
|
|
|
|
"model": {
|
|
|
|
|
"provider": "custom",
|
|
|
|
|
"base_url": "http://localhost:1234/v1",
|
|
|
|
|
"default": "my-local-model",
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-03-08 18:06:40 -07:00
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
|
|
|
|
|
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
refactor: make config.yaml the single source of truth for endpoint URLs (#4165)
OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.
Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
(both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars
Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
2026-03-30 22:02:53 -07:00
|
|
|
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
|
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
|
2026-03-08 18:06:40 -07:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_vision_auxiliary_client()
|
|
|
|
|
assert client is not None
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
assert model == "my-local-model"
|
2026-03-08 18:06:40 -07:00
|
|
|
|
|
|
|
|
def test_vision_forced_main_returns_none_without_creds(self, monkeypatch):
|
|
|
|
|
"""Forced main with no credentials still returns None."""
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
|
|
|
|
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
2026-03-08 18:06:40 -07:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
|
2026-03-08 18:06:40 -07:00
|
|
|
client, model = get_vision_auxiliary_client()
|
|
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
2026-03-08 18:50:26 -07:00
|
|
|
def test_vision_forced_codex(self, monkeypatch, codex_auth_dir):
|
|
|
|
|
"""When forced to 'codex', vision uses Codex OAuth."""
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "codex")
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
feat: add 'openai' as auxiliary provider option
Users can now set provider: "openai" for auxiliary tasks (vision, web
extract, compression) to use OpenAI's API directly with their
OPENAI_API_KEY. This hits api.openai.com/v1 with gpt-4o-mini as the
default model — supports vision since GPT-4o handles image input.
Provider options are now: auto, openrouter, nous, openai, main.
Changes:
- agent/auxiliary_client.py: added _try_openai(), "openai" case in
_resolve_forced_provider(), updated auxiliary_max_tokens_param()
to use max_completion_tokens for OpenAI
- Updated docs: cli-config.yaml.example, AGENTS.md, and user-facing
configuration.md with Common Setups section showing OpenAI,
OpenRouter, and local model examples
- 3 new tests for OpenAI provider resolution
Tests: 2459 passed (was 2429).
2026-03-08 18:25:30 -07:00
|
|
|
client, model = get_vision_auxiliary_client()
|
2026-03-08 18:50:26 -07:00
|
|
|
from agent.auxiliary_client import CodexAuxiliaryClient
|
|
|
|
|
assert isinstance(client, CodexAuxiliaryClient)
|
2026-03-14 23:21:09 -07:00
|
|
|
assert model == "gpt-5.2-codex"
|
feat: add 'openai' as auxiliary provider option
Users can now set provider: "openai" for auxiliary tasks (vision, web
extract, compression) to use OpenAI's API directly with their
OPENAI_API_KEY. This hits api.openai.com/v1 with gpt-4o-mini as the
default model — supports vision since GPT-4o handles image input.
Provider options are now: auto, openrouter, nous, openai, main.
Changes:
- agent/auxiliary_client.py: added _try_openai(), "openai" case in
_resolve_forced_provider(), updated auxiliary_max_tokens_param()
to use max_completion_tokens for OpenAI
- Updated docs: cli-config.yaml.example, AGENTS.md, and user-facing
configuration.md with Common Setups section showing OpenAI,
OpenRouter, and local model examples
- 3 new tests for OpenAI provider resolution
Tests: 2459 passed (was 2429).
2026-03-08 18:25:30 -07:00
|
|
|
|
2026-03-08 18:06:40 -07:00
|
|
|
|
|
|
|
|
class TestGetAuxiliaryProvider:
|
|
|
|
|
"""Tests for _get_auxiliary_provider env var resolution."""
|
|
|
|
|
|
|
|
|
|
def test_no_task_returns_auto(self):
|
|
|
|
|
assert _get_auxiliary_provider() == "auto"
|
|
|
|
|
assert _get_auxiliary_provider("") == "auto"
|
|
|
|
|
|
|
|
|
|
def test_auxiliary_prefix_takes_priority(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "openrouter")
|
|
|
|
|
assert _get_auxiliary_provider("vision") == "openrouter"
|
|
|
|
|
|
|
|
|
|
def test_context_prefix_fallback(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
|
|
|
|
|
assert _get_auxiliary_provider("compression") == "nous"
|
|
|
|
|
|
|
|
|
|
def test_auxiliary_prefix_over_context_prefix(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_COMPRESSION_PROVIDER", "openrouter")
|
|
|
|
|
monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
|
|
|
|
|
assert _get_auxiliary_provider("compression") == "openrouter"
|
|
|
|
|
|
|
|
|
|
def test_auto_value_treated_as_auto(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "auto")
|
|
|
|
|
assert _get_auxiliary_provider("vision") == "auto"
|
|
|
|
|
|
|
|
|
|
def test_whitespace_stripped(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", " openrouter ")
|
|
|
|
|
assert _get_auxiliary_provider("vision") == "openrouter"
|
|
|
|
|
|
|
|
|
|
def test_case_insensitive(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "OpenRouter")
|
|
|
|
|
assert _get_auxiliary_provider("vision") == "openrouter"
|
|
|
|
|
|
|
|
|
|
def test_main_provider(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "main")
|
|
|
|
|
assert _get_auxiliary_provider("web_extract") == "main"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestResolveForcedProvider:
|
|
|
|
|
"""Tests for _resolve_forced_provider with explicit provider selection."""
|
|
|
|
|
|
|
|
|
|
def test_forced_openrouter(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = _resolve_forced_provider("openrouter")
|
|
|
|
|
assert model == "google/gemini-3-flash-preview"
|
|
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_forced_openrouter_no_key(self, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
|
|
|
|
|
client, model = _resolve_forced_provider("openrouter")
|
|
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
|
|
|
|
def test_forced_nous(self, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
mock_nous.return_value = {"access_token": "nous-tok"}
|
|
|
|
|
client, model = _resolve_forced_provider("nous")
|
2026-03-26 13:49:43 -07:00
|
|
|
assert model == "google/gemini-3-flash-preview"
|
2026-03-08 18:06:40 -07:00
|
|
|
assert client is not None
|
|
|
|
|
|
|
|
|
|
def test_forced_nous_not_configured(self, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
|
|
|
|
|
client, model = _resolve_forced_provider("nous")
|
|
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
|
|
|
|
def test_forced_main_uses_custom(self, monkeypatch):
|
refactor: make config.yaml the single source of truth for endpoint URLs (#4165)
OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.
Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
(both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars
Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
2026-03-30 22:02:53 -07:00
|
|
|
config = {
|
|
|
|
|
"model": {
|
|
|
|
|
"provider": "custom",
|
|
|
|
|
"base_url": "http://local:8080/v1",
|
|
|
|
|
"default": "my-local-model",
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-03-08 18:06:40 -07:00
|
|
|
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
refactor: make config.yaml the single source of truth for endpoint URLs (#4165)
OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.
Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
(both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars
Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
2026-03-30 22:02:53 -07:00
|
|
|
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
|
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
|
2026-03-08 18:06:40 -07:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = _resolve_forced_provider("main")
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
assert model == "my-local-model"
|
2026-03-08 18:06:40 -07:00
|
|
|
|
2026-03-14 20:58:12 -07:00
|
|
|
def test_forced_main_uses_config_saved_custom_endpoint(self, monkeypatch):
|
|
|
|
|
config = {
|
|
|
|
|
"model": {
|
|
|
|
|
"provider": "custom",
|
|
|
|
|
"base_url": "http://local:8080/v1",
|
|
|
|
|
"default": "my-local-model",
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
|
|
|
|
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
|
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = _resolve_forced_provider("main")
|
|
|
|
|
assert client is not None
|
|
|
|
|
assert model == "my-local-model"
|
|
|
|
|
call_kwargs = mock_openai.call_args
|
|
|
|
|
assert call_kwargs.kwargs["base_url"] == "http://local:8080/v1"
|
|
|
|
|
|
2026-03-08 18:06:40 -07:00
|
|
|
def test_forced_main_skips_openrouter_nous(self, monkeypatch):
|
|
|
|
|
"""Even if OpenRouter key is set, 'main' skips it."""
|
refactor: make config.yaml the single source of truth for endpoint URLs (#4165)
OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.
Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
(both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars
Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
2026-03-30 22:02:53 -07:00
|
|
|
config = {
|
|
|
|
|
"model": {
|
|
|
|
|
"provider": "custom",
|
|
|
|
|
"base_url": "http://local:8080/v1",
|
|
|
|
|
"default": "my-local-model",
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-03-08 18:06:40 -07:00
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
monkeypatch.setenv("OPENAI_API_KEY", "local-key")
|
refactor: make config.yaml the single source of truth for endpoint URLs (#4165)
OPENAI_BASE_URL was written to .env AND config.yaml, creating a dual-source
confusion. Users (especially Docker) would see the URL in .env and assume
that's where all config lives, then wonder why LLM_MODEL in .env didn't work.
Changes:
- Remove all 27 save_env_value("OPENAI_BASE_URL", ...) calls across main.py,
setup.py, and tools_config.py
- Remove OPENAI_BASE_URL env var reading from runtime_provider.py, cli.py,
models.py, and gateway/run.py
- Remove LLM_MODEL/HERMES_MODEL env var reading from gateway/run.py and
auxiliary_client.py — config.yaml model.default is authoritative
- Vision base URL now saved to config.yaml auxiliary.vision.base_url
(both setup wizard and tools_config paths)
- Tests updated to set config values instead of env vars
Convention enforced: .env is for SECRETS only (API keys). All other
configuration (model names, base URLs, provider selection) lives
exclusively in config.yaml.
2026-03-30 22:02:53 -07:00
|
|
|
monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
|
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
|
2026-03-08 18:06:40 -07:00
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = _resolve_forced_provider("main")
|
|
|
|
|
# Should use custom endpoint, not OpenRouter
|
fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)
* fix: prevent model/provider mismatch when switching providers during active gateway
When _update_config_for_provider() writes the new provider and base_url
to config.yaml, the gateway (which re-reads config per-message) can pick
up the change before model selection completes. This causes the old model
name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's
API (e.g. MiniMax), which fails.
Changes:
- _update_config_for_provider() now accepts an optional default_model
parameter. When provided and the current model.default is empty or
uses OpenRouter format (contains '/'), it sets a safe default model
for the new provider.
- All setup.py callers for direct-API providers (zai, kimi, minimax,
minimax-cn, anthropic) now pass a provider-appropriate default model.
- _setup_provider_model_selection() now validates the 'Keep current'
choice: if the current model uses OpenRouter format and wouldn't work
with the new provider, it warns and switches to the provider's first
default model instead of silently keeping the incompatible name.
Reported by a user on Home Assistant whose gateway started sending
'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup.
* fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini
When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL),
the auxiliary client (context compression, vision, session search) would
send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to
the local server, which only serves one model — causing 404 errors
mid-task.
Changes:
- _try_custom_endpoint() now reads the user's configured main model via
_read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL →
config.yaml model.default) instead of hardcoding 'gpt-4o-mini'.
- resolve_provider_client() auto mode now detects when an OpenRouter-
formatted model override (containing '/') would be sent to a non-
OpenRouter provider (like a local server) and drops it in favor of
the provider's default model.
- Test isolation fixes: properly clear env vars in 'nothing available'
tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00
|
|
|
assert model == "my-local-model"
|
2026-03-08 18:06:40 -07:00
|
|
|
|
|
|
|
|
def test_forced_main_falls_to_codex(self, codex_auth_dir, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
client, model = _resolve_forced_provider("main")
|
|
|
|
|
from agent.auxiliary_client import CodexAuxiliaryClient
|
|
|
|
|
assert isinstance(client, CodexAuxiliaryClient)
|
2026-03-14 23:21:09 -07:00
|
|
|
assert model == "gpt-5.2-codex"
|
2026-03-08 18:06:40 -07:00
|
|
|
|
2026-03-08 18:50:26 -07:00
|
|
|
def test_forced_codex(self, codex_auth_dir, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
client, model = _resolve_forced_provider("codex")
|
|
|
|
|
from agent.auxiliary_client import CodexAuxiliaryClient
|
|
|
|
|
assert isinstance(client, CodexAuxiliaryClient)
|
2026-03-14 23:21:09 -07:00
|
|
|
assert model == "gpt-5.2-codex"
|
feat: add 'openai' as auxiliary provider option
Users can now set provider: "openai" for auxiliary tasks (vision, web
extract, compression) to use OpenAI's API directly with their
OPENAI_API_KEY. This hits api.openai.com/v1 with gpt-4o-mini as the
default model — supports vision since GPT-4o handles image input.
Provider options are now: auto, openrouter, nous, openai, main.
Changes:
- agent/auxiliary_client.py: added _try_openai(), "openai" case in
_resolve_forced_provider(), updated auxiliary_max_tokens_param()
to use max_completion_tokens for OpenAI
- Updated docs: cli-config.yaml.example, AGENTS.md, and user-facing
configuration.md with Common Setups section showing OpenAI,
OpenRouter, and local model examples
- 3 new tests for OpenAI provider resolution
Tests: 2459 passed (was 2429).
2026-03-08 18:25:30 -07:00
|
|
|
|
2026-03-08 18:50:26 -07:00
|
|
|
def test_forced_codex_no_token(self, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
|
|
|
|
client, model = _resolve_forced_provider("codex")
|
feat: add 'openai' as auxiliary provider option
Users can now set provider: "openai" for auxiliary tasks (vision, web
extract, compression) to use OpenAI's API directly with their
OPENAI_API_KEY. This hits api.openai.com/v1 with gpt-4o-mini as the
default model — supports vision since GPT-4o handles image input.
Provider options are now: auto, openrouter, nous, openai, main.
Changes:
- agent/auxiliary_client.py: added _try_openai(), "openai" case in
_resolve_forced_provider(), updated auxiliary_max_tokens_param()
to use max_completion_tokens for OpenAI
- Updated docs: cli-config.yaml.example, AGENTS.md, and user-facing
configuration.md with Common Setups section showing OpenAI,
OpenRouter, and local model examples
- 3 new tests for OpenAI provider resolution
Tests: 2459 passed (was 2429).
2026-03-08 18:25:30 -07:00
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
2026-03-08 18:06:40 -07:00
|
|
|
def test_forced_unknown_returns_none(self, monkeypatch):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
|
|
|
|
client, model = _resolve_forced_provider("invalid-provider")
|
|
|
|
|
assert client is None
|
|
|
|
|
assert model is None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestTaskSpecificOverrides:
|
|
|
|
|
"""Integration tests for per-task provider routing via get_text_auxiliary_client(task=...)."""
|
|
|
|
|
|
|
|
|
|
def test_text_with_vision_provider_override(self, monkeypatch):
|
|
|
|
|
"""AUXILIARY_VISION_PROVIDER should not affect text tasks."""
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "nous")
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
client, model = get_text_auxiliary_client() # no task → auto
|
|
|
|
|
assert model == "google/gemini-3-flash-preview" # OpenRouter, not Nous
|
|
|
|
|
|
|
|
|
|
def test_compression_task_reads_context_prefix(self, monkeypatch):
|
2026-03-17 04:46:15 -07:00
|
|
|
"""Compression task should check CONTEXT_COMPRESSION_PROVIDER env var."""
|
2026-03-08 18:06:40 -07:00
|
|
|
monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") # would win in auto
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
|
|
|
|
|
patch("agent.auxiliary_client.OpenAI"):
|
2026-03-17 04:46:15 -07:00
|
|
|
mock_nous.return_value = {"access_token": "***"}
|
2026-03-08 18:06:40 -07:00
|
|
|
client, model = get_text_auxiliary_client("compression")
|
2026-03-17 04:46:15 -07:00
|
|
|
# Config-first: model comes from config.yaml summary_model default,
|
|
|
|
|
# but provider is forced to Nous via env var
|
|
|
|
|
assert client is not None
|
2026-03-08 18:06:40 -07:00
|
|
|
|
|
|
|
|
def test_web_extract_task_override(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "openrouter")
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
client, model = get_text_auxiliary_client("web_extract")
|
|
|
|
|
assert model == "google/gemini-3-flash-preview"
|
|
|
|
|
|
2026-03-14 20:48:29 -07:00
|
|
|
def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "config.yaml").write_text(
|
|
|
|
|
"""auxiliary:
|
|
|
|
|
web_extract:
|
|
|
|
|
base_url: http://localhost:3456/v1
|
|
|
|
|
api_key: config-key
|
|
|
|
|
model: config-model
|
|
|
|
|
"""
|
|
|
|
|
)
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_text_auxiliary_client("web_extract")
|
|
|
|
|
assert model == "config-model"
|
|
|
|
|
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:3456/v1"
|
|
|
|
|
assert mock_openai.call_args.kwargs["api_key"] == "config-key"
|
|
|
|
|
|
2026-03-08 18:06:40 -07:00
|
|
|
def test_task_without_override_uses_auto(self, monkeypatch):
|
|
|
|
|
"""A task with no provider env var falls through to auto chain."""
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI"):
|
|
|
|
|
client, model = get_text_auxiliary_client("compression")
|
|
|
|
|
assert model == "google/gemini-3-flash-preview" # auto → OpenRouter
|
|
|
|
|
|
2026-03-17 04:46:15 -07:00
|
|
|
def test_compression_summary_base_url_from_config(self, monkeypatch, tmp_path):
|
|
|
|
|
"""compression.summary_base_url should produce a custom-endpoint client."""
|
|
|
|
|
hermes_home = tmp_path / "hermes"
|
|
|
|
|
hermes_home.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(hermes_home / "config.yaml").write_text(
|
|
|
|
|
"""compression:
|
|
|
|
|
summary_provider: custom
|
|
|
|
|
summary_model: glm-4.7
|
|
|
|
|
summary_base_url: https://api.z.ai/api/coding/paas/v4
|
|
|
|
|
"""
|
|
|
|
|
)
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
|
|
|
|
# Custom endpoints need an API key to build the client
|
|
|
|
|
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
|
|
|
|
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
|
|
|
|
client, model = get_text_auxiliary_client("compression")
|
|
|
|
|
assert model == "glm-4.7"
|
|
|
|
|
assert mock_openai.call_args.kwargs["base_url"] == "https://api.z.ai/api/coding/paas/v4"
|
|
|
|
|
|
2026-02-28 21:47:51 -08:00
|
|
|
|
|
|
|
|
class TestAuxiliaryMaxTokensParam:
|
|
|
|
|
def test_codex_fallback_uses_max_tokens(self, monkeypatch):
|
|
|
|
|
"""Codex adapter translates max_tokens internally, so we return max_tokens."""
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value="tok"):
|
|
|
|
|
result = auxiliary_max_tokens_param(1024)
|
|
|
|
|
assert result == {"max_tokens": 1024}
|
|
|
|
|
|
|
|
|
|
def test_openrouter_uses_max_tokens(self, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
|
|
|
|
|
result = auxiliary_max_tokens_param(1024)
|
|
|
|
|
assert result == {"max_tokens": 1024}
|
|
|
|
|
|
|
|
|
|
def test_no_provider_uses_max_tokens(self):
|
|
|
|
|
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
|
|
|
|
patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
|
|
|
|
result = auxiliary_max_tokens_param(1024)
|
|
|
|
|
assert result == {"max_tokens": 1024}
|