Replace the fragile hardcoded context length system with a multi-source resolution chain that correctly identifies context windows per provider. Key changes: - New agent/models_dev.py: Fetches and caches the models.dev registry (3800+ models across 100+ providers with per-provider context windows). In-memory cache (1hr TTL) + disk cache for cold starts. - Rewritten get_model_context_length() resolution chain: 0. Config override (model.context_length) 1. Custom providers per-model context_length 2. Persistent disk cache 3. Endpoint /models (local servers) 4. Anthropic /v1/models API (max_input_tokens, API-key only) 5. OpenRouter live API (existing, unchanged) 6. Nous suffix-match via OpenRouter (dot/dash normalization) 7. models.dev registry lookup (provider-aware) 8. Thin hardcoded defaults (broad family patterns) 9. 128K fallback (was 2M) - Provider-aware context: same model now correctly resolves to different context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic, 128K on GitHub Copilot). Provider name flows through ContextCompressor. - DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns. models.dev replaces the per-model hardcoding. - CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K] to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M. - hermes model: prompts for context_length when configuring custom endpoints. Supports shorthand (32k, 128K). Saved to custom_providers per-model config. - custom_providers schema extended with optional models dict for per-model context_length (backward compatible). - Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash normalization. Handles all 15 current Nous models. - Anthropic direct: queries /v1/models for max_input_tokens. Only works with regular API keys (sk-ant-api*), not OAuth tokens. Falls through to models.dev for OAuth users. Tests: 5574 passed (18 new tests for models_dev + updated probe tiers) Docs: Updated configuration.md context length section, AGENTS.md Co-authored-by: Test <test@test.com>
471 lines
18 KiB
Python
471 lines
18 KiB
Python
import importlib
|
|
import sys
|
|
import types
|
|
from contextlib import nullcontext
|
|
from types import SimpleNamespace
|
|
|
|
from hermes_cli.auth import AuthError
|
|
from hermes_cli import main as hermes_main
|
|
|
|
|
|
def _install_prompt_toolkit_stubs():
|
|
class _Dummy:
|
|
def __init__(self, *args, **kwargs):
|
|
pass
|
|
|
|
class _Condition:
|
|
def __init__(self, func):
|
|
self.func = func
|
|
|
|
def __bool__(self):
|
|
return bool(self.func())
|
|
|
|
class _ANSI(str):
|
|
pass
|
|
|
|
root = types.ModuleType("prompt_toolkit")
|
|
history = types.ModuleType("prompt_toolkit.history")
|
|
styles = types.ModuleType("prompt_toolkit.styles")
|
|
patch_stdout = types.ModuleType("prompt_toolkit.patch_stdout")
|
|
application = types.ModuleType("prompt_toolkit.application")
|
|
layout = types.ModuleType("prompt_toolkit.layout")
|
|
processors = types.ModuleType("prompt_toolkit.layout.processors")
|
|
filters = types.ModuleType("prompt_toolkit.filters")
|
|
dimension = types.ModuleType("prompt_toolkit.layout.dimension")
|
|
menus = types.ModuleType("prompt_toolkit.layout.menus")
|
|
widgets = types.ModuleType("prompt_toolkit.widgets")
|
|
key_binding = types.ModuleType("prompt_toolkit.key_binding")
|
|
completion = types.ModuleType("prompt_toolkit.completion")
|
|
formatted_text = types.ModuleType("prompt_toolkit.formatted_text")
|
|
|
|
history.FileHistory = _Dummy
|
|
styles.Style = _Dummy
|
|
patch_stdout.patch_stdout = lambda *args, **kwargs: nullcontext()
|
|
application.Application = _Dummy
|
|
layout.Layout = _Dummy
|
|
layout.HSplit = _Dummy
|
|
layout.Window = _Dummy
|
|
layout.FormattedTextControl = _Dummy
|
|
layout.ConditionalContainer = _Dummy
|
|
processors.Processor = _Dummy
|
|
processors.Transformation = _Dummy
|
|
processors.PasswordProcessor = _Dummy
|
|
processors.ConditionalProcessor = _Dummy
|
|
filters.Condition = _Condition
|
|
dimension.Dimension = _Dummy
|
|
menus.CompletionsMenu = _Dummy
|
|
widgets.TextArea = _Dummy
|
|
key_binding.KeyBindings = _Dummy
|
|
completion.Completer = _Dummy
|
|
completion.Completion = _Dummy
|
|
formatted_text.ANSI = _ANSI
|
|
root.print_formatted_text = lambda *args, **kwargs: None
|
|
|
|
sys.modules.setdefault("prompt_toolkit", root)
|
|
sys.modules.setdefault("prompt_toolkit.history", history)
|
|
sys.modules.setdefault("prompt_toolkit.styles", styles)
|
|
sys.modules.setdefault("prompt_toolkit.patch_stdout", patch_stdout)
|
|
sys.modules.setdefault("prompt_toolkit.application", application)
|
|
sys.modules.setdefault("prompt_toolkit.layout", layout)
|
|
sys.modules.setdefault("prompt_toolkit.layout.processors", processors)
|
|
sys.modules.setdefault("prompt_toolkit.filters", filters)
|
|
sys.modules.setdefault("prompt_toolkit.layout.dimension", dimension)
|
|
sys.modules.setdefault("prompt_toolkit.layout.menus", menus)
|
|
sys.modules.setdefault("prompt_toolkit.widgets", widgets)
|
|
sys.modules.setdefault("prompt_toolkit.key_binding", key_binding)
|
|
sys.modules.setdefault("prompt_toolkit.completion", completion)
|
|
sys.modules.setdefault("prompt_toolkit.formatted_text", formatted_text)
|
|
|
|
|
|
def _import_cli():
|
|
try:
|
|
importlib.import_module("prompt_toolkit")
|
|
except ModuleNotFoundError:
|
|
_install_prompt_toolkit_stubs()
|
|
return importlib.import_module("cli")
|
|
|
|
|
|
def test_hermes_cli_init_does_not_eagerly_resolve_runtime_provider(monkeypatch):
|
|
cli = _import_cli()
|
|
calls = {"count": 0}
|
|
|
|
def _unexpected_runtime_resolve(**kwargs):
|
|
calls["count"] += 1
|
|
raise AssertionError("resolve_runtime_provider should not be called in HermesCLI.__init__")
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _unexpected_runtime_resolve)
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
|
|
|
|
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
|
|
|
|
assert shell is not None
|
|
assert calls["count"] == 0
|
|
|
|
|
|
def test_runtime_resolution_failure_is_not_sticky(monkeypatch):
|
|
cli = _import_cli()
|
|
calls = {"count": 0}
|
|
|
|
def _runtime_resolve(**kwargs):
|
|
calls["count"] += 1
|
|
if calls["count"] == 1:
|
|
raise RuntimeError("temporary auth failure")
|
|
return {
|
|
"provider": "openrouter",
|
|
"api_mode": "chat_completions",
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
"api_key": "test-key",
|
|
"source": "env/config",
|
|
}
|
|
|
|
class _DummyAgent:
|
|
def __init__(self, *args, **kwargs):
|
|
self.kwargs = kwargs
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
|
|
monkeypatch.setattr(cli, "AIAgent", _DummyAgent)
|
|
|
|
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
|
|
|
|
assert shell._init_agent() is False
|
|
assert shell._init_agent() is True
|
|
assert calls["count"] == 2
|
|
assert shell.agent is not None
|
|
|
|
|
|
def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch):
|
|
cli = _import_cli()
|
|
|
|
def _runtime_resolve(**kwargs):
|
|
return {
|
|
"provider": "openai-codex",
|
|
"api_mode": "codex_responses",
|
|
"base_url": "https://same-endpoint.example/v1",
|
|
"api_key": "same-key",
|
|
"source": "env/config",
|
|
}
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
|
|
|
|
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
|
|
shell.provider = "openrouter"
|
|
shell.api_mode = "chat_completions"
|
|
shell.base_url = "https://same-endpoint.example/v1"
|
|
shell.api_key = "same-key"
|
|
shell.agent = object()
|
|
|
|
assert shell._ensure_runtime_credentials() is True
|
|
assert shell.agent is None
|
|
assert shell.provider == "openai-codex"
|
|
assert shell.api_mode == "codex_responses"
|
|
|
|
|
|
def test_cli_turn_routing_uses_primary_when_disabled(monkeypatch):
|
|
cli = _import_cli()
|
|
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
|
|
shell.provider = "openrouter"
|
|
shell.api_mode = "chat_completions"
|
|
shell.base_url = "https://openrouter.ai/api/v1"
|
|
shell.api_key = "sk-primary"
|
|
shell._smart_model_routing = {"enabled": False}
|
|
|
|
result = shell._resolve_turn_agent_config("what time is it in tokyo?")
|
|
|
|
assert result["model"] == "gpt-5"
|
|
assert result["runtime"]["provider"] == "openrouter"
|
|
assert result["label"] is None
|
|
|
|
|
|
def test_cli_turn_routing_uses_cheap_model_when_simple(monkeypatch):
|
|
cli = _import_cli()
|
|
|
|
def _runtime_resolve(**kwargs):
|
|
assert kwargs["requested"] == "zai"
|
|
return {
|
|
"provider": "zai",
|
|
"api_mode": "chat_completions",
|
|
"base_url": "https://open.z.ai/api/v1",
|
|
"api_key": "cheap-key",
|
|
"source": "env/config",
|
|
}
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
|
|
|
|
shell = cli.HermesCLI(model="anthropic/claude-sonnet-4", compact=True, max_turns=1)
|
|
shell.provider = "openrouter"
|
|
shell.api_mode = "chat_completions"
|
|
shell.base_url = "https://openrouter.ai/api/v1"
|
|
shell.api_key = "primary-key"
|
|
shell._smart_model_routing = {
|
|
"enabled": True,
|
|
"cheap_model": {"provider": "zai", "model": "glm-5-air"},
|
|
"max_simple_chars": 160,
|
|
"max_simple_words": 28,
|
|
}
|
|
|
|
result = shell._resolve_turn_agent_config("what time is it in tokyo?")
|
|
|
|
assert result["model"] == "glm-5-air"
|
|
assert result["runtime"]["provider"] == "zai"
|
|
assert result["runtime"]["api_key"] == "cheap-key"
|
|
assert result["label"] is not None
|
|
|
|
|
|
def test_cli_prefers_config_provider_over_stale_env_override(monkeypatch):
|
|
cli = _import_cli()
|
|
|
|
monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter")
|
|
config_copy = dict(cli.CLI_CONFIG)
|
|
model_copy = dict(config_copy.get("model", {}))
|
|
model_copy["provider"] = "custom"
|
|
model_copy["base_url"] = "https://api.fireworks.ai/inference/v1"
|
|
config_copy["model"] = model_copy
|
|
monkeypatch.setattr(cli, "CLI_CONFIG", config_copy)
|
|
|
|
shell = cli.HermesCLI(model="fireworks/minimax-m2p5", compact=True, max_turns=1)
|
|
|
|
assert shell.requested_provider == "custom"
|
|
|
|
|
|
def test_codex_provider_replaces_incompatible_default_model(monkeypatch):
|
|
"""When provider resolves to openai-codex and no model was explicitly
|
|
chosen, the global config default (e.g. anthropic/claude-opus-4.6) must
|
|
be replaced with a Codex-compatible model. Fixes #651."""
|
|
cli = _import_cli()
|
|
|
|
monkeypatch.delenv("LLM_MODEL", raising=False)
|
|
monkeypatch.delenv("OPENAI_MODEL", raising=False)
|
|
# Ensure local user config does not leak a model into the test
|
|
monkeypatch.setitem(cli.CLI_CONFIG, "model", {
|
|
"default": "",
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
})
|
|
|
|
def _runtime_resolve(**kwargs):
|
|
return {
|
|
"provider": "openai-codex",
|
|
"api_mode": "codex_responses",
|
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
|
"api_key": "test-key",
|
|
"source": "env/config",
|
|
}
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
|
|
monkeypatch.setattr(
|
|
"hermes_cli.codex_models.get_codex_model_ids",
|
|
lambda access_token=None: ["gpt-5.2-codex", "gpt-5.1-codex-mini"],
|
|
)
|
|
|
|
shell = cli.HermesCLI(compact=True, max_turns=1)
|
|
|
|
assert shell._model_is_default is True
|
|
assert shell._ensure_runtime_credentials() is True
|
|
assert shell.provider == "openai-codex"
|
|
assert "anthropic" not in shell.model
|
|
assert "claude" not in shell.model
|
|
assert shell.model == "gpt-5.2-codex"
|
|
|
|
|
|
def test_codex_provider_uses_config_model(monkeypatch):
|
|
"""Model comes from config.yaml, not LLM_MODEL env var.
|
|
Config.yaml is the single source of truth to avoid multi-agent conflicts."""
|
|
cli = _import_cli()
|
|
|
|
# LLM_MODEL env var should be IGNORED (even if set)
|
|
monkeypatch.setenv("LLM_MODEL", "should-be-ignored")
|
|
monkeypatch.delenv("OPENAI_MODEL", raising=False)
|
|
|
|
# Set model via config
|
|
monkeypatch.setitem(cli.CLI_CONFIG, "model", {
|
|
"default": "gpt-5.2-codex",
|
|
"provider": "openai-codex",
|
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
|
})
|
|
|
|
def _runtime_resolve(**kwargs):
|
|
return {
|
|
"provider": "openai-codex",
|
|
"api_mode": "codex_responses",
|
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
|
"api_key": "fake-codex-token",
|
|
"source": "env/config",
|
|
}
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
|
|
# Prevent live API call from overriding the config model
|
|
monkeypatch.setattr(
|
|
"hermes_cli.codex_models.get_codex_model_ids",
|
|
lambda access_token=None: ["gpt-5.2-codex"],
|
|
)
|
|
|
|
shell = cli.HermesCLI(compact=True, max_turns=1)
|
|
|
|
assert shell._ensure_runtime_credentials() is True
|
|
assert shell.provider == "openai-codex"
|
|
# Model from config (may be normalized by codex provider logic)
|
|
assert "codex" in shell.model.lower()
|
|
# LLM_MODEL env var is NOT used
|
|
assert shell.model != "should-be-ignored"
|
|
|
|
|
|
def test_codex_config_model_not_replaced_by_normalization(monkeypatch):
|
|
"""When the user sets model.default in config.yaml to a specific codex
|
|
model, _normalize_model_for_provider must NOT replace it with the latest
|
|
available model from the API. Regression test for #1887."""
|
|
cli = _import_cli()
|
|
|
|
monkeypatch.delenv("LLM_MODEL", raising=False)
|
|
monkeypatch.delenv("OPENAI_MODEL", raising=False)
|
|
|
|
# User explicitly configured gpt-5.3-codex in config.yaml
|
|
monkeypatch.setitem(cli.CLI_CONFIG, "model", {
|
|
"default": "gpt-5.3-codex",
|
|
"provider": "openai-codex",
|
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
|
})
|
|
|
|
def _runtime_resolve(**kwargs):
|
|
return {
|
|
"provider": "openai-codex",
|
|
"api_mode": "codex_responses",
|
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
|
"api_key": "fake-key",
|
|
"source": "env/config",
|
|
}
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
|
|
# API returns a DIFFERENT model than what the user configured
|
|
monkeypatch.setattr(
|
|
"hermes_cli.codex_models.get_codex_model_ids",
|
|
lambda access_token=None: ["gpt-5.4", "gpt-5.3-codex"],
|
|
)
|
|
|
|
shell = cli.HermesCLI(compact=True, max_turns=1)
|
|
|
|
# Config model is NOT the global default — user made a deliberate choice
|
|
assert shell._model_is_default is False
|
|
assert shell._ensure_runtime_credentials() is True
|
|
assert shell.provider == "openai-codex"
|
|
# Model must stay as user configured, not replaced by gpt-5.4
|
|
assert shell.model == "gpt-5.3-codex"
|
|
|
|
|
|
def test_codex_provider_preserves_explicit_codex_model(monkeypatch):
|
|
"""If the user explicitly passes a Codex-compatible model, it must be
|
|
preserved even when the provider resolves to openai-codex."""
|
|
cli = _import_cli()
|
|
|
|
monkeypatch.delenv("LLM_MODEL", raising=False)
|
|
monkeypatch.delenv("OPENAI_MODEL", raising=False)
|
|
|
|
def _runtime_resolve(**kwargs):
|
|
return {
|
|
"provider": "openai-codex",
|
|
"api_mode": "codex_responses",
|
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
|
"api_key": "test-key",
|
|
"source": "env/config",
|
|
}
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
|
|
|
|
shell = cli.HermesCLI(model="gpt-5.1-codex-mini", compact=True, max_turns=1)
|
|
|
|
assert shell._model_is_default is False
|
|
assert shell._ensure_runtime_credentials() is True
|
|
assert shell.model == "gpt-5.1-codex-mini"
|
|
|
|
|
|
def test_codex_provider_strips_provider_prefix_from_model(monkeypatch):
|
|
"""openai/gpt-5.3-codex should become gpt-5.3-codex — the Codex
|
|
Responses API does not accept provider-prefixed model slugs."""
|
|
cli = _import_cli()
|
|
|
|
monkeypatch.delenv("LLM_MODEL", raising=False)
|
|
monkeypatch.delenv("OPENAI_MODEL", raising=False)
|
|
|
|
def _runtime_resolve(**kwargs):
|
|
return {
|
|
"provider": "openai-codex",
|
|
"api_mode": "codex_responses",
|
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
|
"api_key": "test-key",
|
|
"source": "env/config",
|
|
}
|
|
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
|
|
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
|
|
|
|
shell = cli.HermesCLI(model="openai/gpt-5.3-codex", compact=True, max_turns=1)
|
|
|
|
assert shell._ensure_runtime_credentials() is True
|
|
assert shell.model == "gpt-5.3-codex"
|
|
|
|
|
|
def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys):
|
|
monkeypatch.setattr(
|
|
"hermes_cli.config.load_config",
|
|
lambda: {"model": {"default": "gpt-5", "provider": "invalid-provider"}},
|
|
)
|
|
monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None)
|
|
monkeypatch.setattr("hermes_cli.config.get_env_value", lambda key: "")
|
|
monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: None)
|
|
|
|
def _resolve_provider(requested, **kwargs):
|
|
if requested == "invalid-provider":
|
|
raise AuthError("Unknown provider 'invalid-provider'.", code="invalid_provider")
|
|
return "openrouter"
|
|
|
|
monkeypatch.setattr("hermes_cli.auth.resolve_provider", _resolve_provider)
|
|
monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: len(choices) - 1)
|
|
|
|
hermes_main.cmd_model(SimpleNamespace())
|
|
output = capsys.readouterr().out
|
|
|
|
assert "Warning:" in output
|
|
assert "falling back to auto provider detection" in output.lower()
|
|
assert "No change." in output
|
|
|
|
|
|
def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys):
|
|
monkeypatch.setattr(
|
|
"hermes_cli.config.get_env_value",
|
|
lambda key: "" if key in {"OPENAI_BASE_URL", "OPENAI_API_KEY"} else "",
|
|
)
|
|
saved_env = {}
|
|
monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: saved_env.__setitem__(key, value))
|
|
monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: saved_env.__setitem__("MODEL", model))
|
|
monkeypatch.setattr("hermes_cli.auth.deactivate_provider", lambda: None)
|
|
monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *args, **kwargs: None)
|
|
monkeypatch.setattr(
|
|
"hermes_cli.models.probe_api_models",
|
|
lambda api_key, base_url: {
|
|
"models": ["llm"],
|
|
"probed_url": "http://localhost:8000/v1/models",
|
|
"resolved_base_url": "http://localhost:8000/v1",
|
|
"suggested_base_url": "http://localhost:8000/v1",
|
|
"used_fallback": True,
|
|
},
|
|
)
|
|
monkeypatch.setattr(
|
|
"hermes_cli.config.load_config",
|
|
lambda: {"model": {"default": "", "provider": "custom", "base_url": ""}},
|
|
)
|
|
monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None)
|
|
|
|
answers = iter(["http://localhost:8000", "local-key", "llm", ""])
|
|
monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers))
|
|
|
|
hermes_main._model_flow_custom({})
|
|
output = capsys.readouterr().out
|
|
|
|
assert "Saving the working base URL instead" in output
|
|
assert saved_env["OPENAI_BASE_URL"] == "http://localhost:8000/v1"
|
|
assert saved_env["OPENAI_API_KEY"] == "local-key"
|
|
assert saved_env["MODEL"] == "llm" |