2026-03-11 01:33:29 +05:30
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
from hermes_cli.auth import _update_config_for_provider, get_active_provider
|
|
|
|
|
from hermes_cli.config import load_config, save_config
|
|
|
|
|
from hermes_cli.setup import setup_model_provider
|
|
|
|
|
|
|
|
|
|
|
2026-03-17 04:01:37 -07:00
|
|
|
def _maybe_keep_current_tts(question, choices):
|
|
|
|
|
if question != "Select TTS provider:":
|
|
|
|
|
return None
|
|
|
|
|
assert choices[-1].startswith("Keep current (")
|
|
|
|
|
return len(choices) - 1
|
|
|
|
|
|
|
|
|
|
|
2026-03-11 01:33:29 +05:30
|
|
|
def _clear_provider_env(monkeypatch):
|
|
|
|
|
for key in (
|
|
|
|
|
"NOUS_API_KEY",
|
|
|
|
|
"OPENROUTER_API_KEY",
|
|
|
|
|
"OPENAI_BASE_URL",
|
|
|
|
|
"OPENAI_API_KEY",
|
|
|
|
|
"LLM_MODEL",
|
|
|
|
|
):
|
|
|
|
|
monkeypatch.delenv(key, raising=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_nous_oauth_setup_keeps_current_model_when_syncing_disk_provider(
|
|
|
|
|
tmp_path, monkeypatch
|
|
|
|
|
):
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
|
|
|
_clear_provider_env(monkeypatch)
|
|
|
|
|
|
|
|
|
|
config = load_config()
|
|
|
|
|
|
2026-03-17 04:01:37 -07:00
|
|
|
def fake_prompt_choice(question, choices, default=0):
|
|
|
|
|
if question == "Select your inference provider:":
|
2026-03-24 12:50:24 -07:00
|
|
|
return 1 # Nous Portal
|
2026-03-17 04:01:37 -07:00
|
|
|
if question == "Configure vision:":
|
|
|
|
|
return len(choices) - 1
|
|
|
|
|
if question == "Select default model:":
|
|
|
|
|
assert choices[-1] == "Keep current (anthropic/claude-opus-4.6)"
|
|
|
|
|
return len(choices) - 1
|
|
|
|
|
tts_idx = _maybe_keep_current_tts(question, choices)
|
|
|
|
|
if tts_idx is not None:
|
|
|
|
|
return tts_idx
|
|
|
|
|
raise AssertionError(f"Unexpected prompt_choice call: {question}")
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
|
2026-03-11 01:33:29 +05:30
|
|
|
monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
|
2026-03-17 04:01:37 -07:00
|
|
|
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
|
2026-03-11 01:33:29 +05:30
|
|
|
|
|
|
|
|
def _fake_login_nous(*args, **kwargs):
|
|
|
|
|
auth_path = tmp_path / "auth.json"
|
|
|
|
|
auth_path.write_text(json.dumps({"active_provider": "nous", "providers": {}}))
|
|
|
|
|
_update_config_for_provider("nous", "https://inference.example.com/v1")
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_login_nous)
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
"hermes_cli.auth.resolve_nous_runtime_credentials",
|
|
|
|
|
lambda *args, **kwargs: {
|
|
|
|
|
"base_url": "https://inference.example.com/v1",
|
|
|
|
|
"api_key": "nous-key",
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
"hermes_cli.auth.fetch_nous_models",
|
|
|
|
|
lambda *args, **kwargs: ["gemini-3-flash"],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
setup_model_provider(config)
|
|
|
|
|
save_config(config)
|
|
|
|
|
|
|
|
|
|
reloaded = load_config()
|
|
|
|
|
|
|
|
|
|
assert isinstance(reloaded["model"], dict)
|
|
|
|
|
assert reloaded["model"]["provider"] == "nous"
|
|
|
|
|
assert reloaded["model"]["base_url"] == "https://inference.example.com/v1"
|
|
|
|
|
assert reloaded["model"]["default"] == "anthropic/claude-opus-4.6"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_custom_setup_clears_active_oauth_provider(tmp_path, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
|
|
|
_clear_provider_env(monkeypatch)
|
|
|
|
|
|
|
|
|
|
auth_path = tmp_path / "auth.json"
|
|
|
|
|
auth_path.write_text(json.dumps({"active_provider": "nous", "providers": {}}))
|
|
|
|
|
|
|
|
|
|
config = load_config()
|
|
|
|
|
|
2026-03-17 04:01:37 -07:00
|
|
|
def fake_prompt_choice(question, choices, default=0):
|
|
|
|
|
if question == "Select your inference provider:":
|
|
|
|
|
return 3
|
|
|
|
|
tts_idx = _maybe_keep_current_tts(question, choices)
|
|
|
|
|
if tts_idx is not None:
|
|
|
|
|
return tts_idx
|
|
|
|
|
raise AssertionError(f"Unexpected prompt_choice call: {question}")
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
|
2026-03-11 01:33:29 +05:30
|
|
|
|
feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source
resolution chain that correctly identifies context windows per provider.
Key changes:
- New agent/models_dev.py: Fetches and caches the models.dev registry
(3800+ models across 100+ providers with per-provider context windows).
In-memory cache (1hr TTL) + disk cache for cold starts.
- Rewritten get_model_context_length() resolution chain:
0. Config override (model.context_length)
1. Custom providers per-model context_length
2. Persistent disk cache
3. Endpoint /models (local servers)
4. Anthropic /v1/models API (max_input_tokens, API-key only)
5. OpenRouter live API (existing, unchanged)
6. Nous suffix-match via OpenRouter (dot/dash normalization)
7. models.dev registry lookup (provider-aware)
8. Thin hardcoded defaults (broad family patterns)
9. 128K fallback (was 2M)
- Provider-aware context: same model now correctly resolves to different
context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic,
128K on GitHub Copilot). Provider name flows through ContextCompressor.
- DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns.
models.dev replaces the per-model hardcoding.
- CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K]
to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M.
- hermes model: prompts for context_length when configuring custom
endpoints. Supports shorthand (32k, 128K). Saved to custom_providers
per-model config.
- custom_providers schema extended with optional models dict for
per-model context_length (backward compatible).
- Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against
OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash
normalization. Handles all 15 current Nous models.
- Anthropic direct: queries /v1/models for max_input_tokens. Only works
with regular API keys (sk-ant-api*), not OAuth tokens. Falls through
to models.dev for OAuth users.
Tests: 5574 passed (18 new tests for models_dev + updated probe tiers)
Docs: Updated configuration.md context length section, AGENTS.md
Co-authored-by: Test <test@test.com>
2026-03-20 06:04:33 -07:00
|
|
|
# _model_flow_custom uses builtins.input (URL, key, model, context_length)
|
|
|
|
|
input_values = iter([
|
|
|
|
|
"https://custom.example/v1",
|
|
|
|
|
"custom-api-key",
|
|
|
|
|
"custom/model",
|
|
|
|
|
"", # context_length (blank = auto-detect)
|
|
|
|
|
])
|
|
|
|
|
monkeypatch.setattr("builtins.input", lambda _prompt="": next(input_values))
|
2026-03-17 04:01:37 -07:00
|
|
|
monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
|
|
|
|
|
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
|
feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source
resolution chain that correctly identifies context windows per provider.
Key changes:
- New agent/models_dev.py: Fetches and caches the models.dev registry
(3800+ models across 100+ providers with per-provider context windows).
In-memory cache (1hr TTL) + disk cache for cold starts.
- Rewritten get_model_context_length() resolution chain:
0. Config override (model.context_length)
1. Custom providers per-model context_length
2. Persistent disk cache
3. Endpoint /models (local servers)
4. Anthropic /v1/models API (max_input_tokens, API-key only)
5. OpenRouter live API (existing, unchanged)
6. Nous suffix-match via OpenRouter (dot/dash normalization)
7. models.dev registry lookup (provider-aware)
8. Thin hardcoded defaults (broad family patterns)
9. 128K fallback (was 2M)
- Provider-aware context: same model now correctly resolves to different
context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic,
128K on GitHub Copilot). Provider name flows through ContextCompressor.
- DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns.
models.dev replaces the per-model hardcoding.
- CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K]
to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M.
- hermes model: prompts for context_length when configuring custom
endpoints. Supports shorthand (32k, 128K). Saved to custom_providers
per-model config.
- custom_providers schema extended with optional models dict for
per-model context_length (backward compatible).
- Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against
OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash
normalization. Handles all 15 current Nous models.
- Anthropic direct: queries /v1/models for max_input_tokens. Only works
with regular API keys (sk-ant-api*), not OAuth tokens. Falls through
to models.dev for OAuth users.
Tests: 5574 passed (18 new tests for models_dev + updated probe tiers)
Docs: Updated configuration.md context length section, AGENTS.md
Co-authored-by: Test <test@test.com>
2026-03-20 06:04:33 -07:00
|
|
|
monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *args, **kwargs: None)
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
"hermes_cli.models.probe_api_models",
|
|
|
|
|
lambda api_key, base_url: {"models": ["m"], "probed_url": base_url + "/models"},
|
|
|
|
|
)
|
2026-03-11 01:33:29 +05:30
|
|
|
|
|
|
|
|
setup_model_provider(config)
|
|
|
|
|
|
feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source
resolution chain that correctly identifies context windows per provider.
Key changes:
- New agent/models_dev.py: Fetches and caches the models.dev registry
(3800+ models across 100+ providers with per-provider context windows).
In-memory cache (1hr TTL) + disk cache for cold starts.
- Rewritten get_model_context_length() resolution chain:
0. Config override (model.context_length)
1. Custom providers per-model context_length
2. Persistent disk cache
3. Endpoint /models (local servers)
4. Anthropic /v1/models API (max_input_tokens, API-key only)
5. OpenRouter live API (existing, unchanged)
6. Nous suffix-match via OpenRouter (dot/dash normalization)
7. models.dev registry lookup (provider-aware)
8. Thin hardcoded defaults (broad family patterns)
9. 128K fallback (was 2M)
- Provider-aware context: same model now correctly resolves to different
context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic,
128K on GitHub Copilot). Provider name flows through ContextCompressor.
- DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns.
models.dev replaces the per-model hardcoding.
- CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K]
to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M.
- hermes model: prompts for context_length when configuring custom
endpoints. Supports shorthand (32k, 128K). Saved to custom_providers
per-model config.
- custom_providers schema extended with optional models dict for
per-model context_length (backward compatible).
- Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against
OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash
normalization. Handles all 15 current Nous models.
- Anthropic direct: queries /v1/models for max_input_tokens. Only works
with regular API keys (sk-ant-api*), not OAuth tokens. Falls through
to models.dev for OAuth users.
Tests: 5574 passed (18 new tests for models_dev + updated probe tiers)
Docs: Updated configuration.md context length section, AGENTS.md
Co-authored-by: Test <test@test.com>
2026-03-20 06:04:33 -07:00
|
|
|
# Core assertion: switching to custom endpoint clears OAuth provider
|
2026-03-11 01:33:29 +05:30
|
|
|
assert get_active_provider() is None
|
feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source
resolution chain that correctly identifies context windows per provider.
Key changes:
- New agent/models_dev.py: Fetches and caches the models.dev registry
(3800+ models across 100+ providers with per-provider context windows).
In-memory cache (1hr TTL) + disk cache for cold starts.
- Rewritten get_model_context_length() resolution chain:
0. Config override (model.context_length)
1. Custom providers per-model context_length
2. Persistent disk cache
3. Endpoint /models (local servers)
4. Anthropic /v1/models API (max_input_tokens, API-key only)
5. OpenRouter live API (existing, unchanged)
6. Nous suffix-match via OpenRouter (dot/dash normalization)
7. models.dev registry lookup (provider-aware)
8. Thin hardcoded defaults (broad family patterns)
9. 128K fallback (was 2M)
- Provider-aware context: same model now correctly resolves to different
context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic,
128K on GitHub Copilot). Provider name flows through ContextCompressor.
- DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns.
models.dev replaces the per-model hardcoding.
- CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K]
to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M.
- hermes model: prompts for context_length when configuring custom
endpoints. Supports shorthand (32k, 128K). Saved to custom_providers
per-model config.
- custom_providers schema extended with optional models dict for
per-model context_length (backward compatible).
- Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against
OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash
normalization. Handles all 15 current Nous models.
- Anthropic direct: queries /v1/models for max_input_tokens. Only works
with regular API keys (sk-ant-api*), not OAuth tokens. Falls through
to models.dev for OAuth users.
Tests: 5574 passed (18 new tests for models_dev + updated probe tiers)
Docs: Updated configuration.md context length section, AGENTS.md
Co-authored-by: Test <test@test.com>
2026-03-20 06:04:33 -07:00
|
|
|
|
|
|
|
|
# _model_flow_custom writes config via its own load/save cycle
|
|
|
|
|
reloaded = load_config()
|
|
|
|
|
if isinstance(reloaded.get("model"), dict):
|
|
|
|
|
assert reloaded["model"].get("provider") == "custom"
|
|
|
|
|
assert reloaded["model"].get("default") == "custom/model"
|
2026-03-13 21:06:06 -07:00
|
|
|
|
|
|
|
|
|
2026-03-13 21:18:29 -07:00
|
|
|
def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, monkeypatch):
|
2026-03-13 21:06:06 -07:00
|
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key")
|
|
|
|
|
_clear_provider_env(monkeypatch)
|
|
|
|
|
monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key")
|
|
|
|
|
|
|
|
|
|
config = load_config()
|
|
|
|
|
|
2026-03-17 04:01:37 -07:00
|
|
|
def fake_prompt_choice(question, choices, default=0):
|
|
|
|
|
if question == "Select your inference provider:":
|
2026-03-24 12:50:24 -07:00
|
|
|
return 2 # OpenAI Codex
|
2026-03-17 04:01:37 -07:00
|
|
|
if question == "Select default model:":
|
|
|
|
|
return 0
|
|
|
|
|
tts_idx = _maybe_keep_current_tts(question, choices)
|
|
|
|
|
if tts_idx is not None:
|
|
|
|
|
return tts_idx
|
|
|
|
|
raise AssertionError(f"Unexpected prompt_choice call: {question}")
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
|
2026-03-13 21:06:06 -07:00
|
|
|
monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
|
|
|
|
|
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
|
|
|
|
|
monkeypatch.setattr("hermes_cli.auth._login_openai_codex", lambda *args, **kwargs: None)
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
"hermes_cli.auth.resolve_codex_runtime_credentials",
|
|
|
|
|
lambda *args, **kwargs: {
|
|
|
|
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
|
|
|
|
"api_key": "codex-access-token",
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
captured = {}
|
|
|
|
|
|
|
|
|
|
def _fake_get_codex_model_ids(access_token=None):
|
|
|
|
|
captured["access_token"] = access_token
|
2026-03-13 21:12:55 -07:00
|
|
|
return ["gpt-5.2-codex", "gpt-5.2"]
|
2026-03-13 21:06:06 -07:00
|
|
|
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
"hermes_cli.codex_models.get_codex_model_ids",
|
|
|
|
|
_fake_get_codex_model_ids,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
setup_model_provider(config)
|
|
|
|
|
save_config(config)
|
|
|
|
|
|
|
|
|
|
reloaded = load_config()
|
|
|
|
|
|
|
|
|
|
assert captured["access_token"] == "codex-access-token"
|
|
|
|
|
assert isinstance(reloaded["model"], dict)
|
|
|
|
|
assert reloaded["model"]["provider"] == "openai-codex"
|
2026-03-13 21:12:55 -07:00
|
|
|
assert reloaded["model"]["default"] == "gpt-5.2-codex"
|
2026-03-13 21:06:06 -07:00
|
|
|
assert reloaded["model"]["base_url"] == "https://chatgpt.com/backend-api/codex"
|