Files
hermes-agent/tests/test_cli_provider_resolution.py

471 lines
18 KiB
Python
Raw Normal View History

import importlib
import sys
import types
from contextlib import nullcontext
from types import SimpleNamespace
from hermes_cli.auth import AuthError
from hermes_cli import main as hermes_main
def _install_prompt_toolkit_stubs():
class _Dummy:
def __init__(self, *args, **kwargs):
pass
class _Condition:
def __init__(self, func):
self.func = func
def __bool__(self):
return bool(self.func())
class _ANSI(str):
pass
root = types.ModuleType("prompt_toolkit")
history = types.ModuleType("prompt_toolkit.history")
styles = types.ModuleType("prompt_toolkit.styles")
patch_stdout = types.ModuleType("prompt_toolkit.patch_stdout")
application = types.ModuleType("prompt_toolkit.application")
layout = types.ModuleType("prompt_toolkit.layout")
processors = types.ModuleType("prompt_toolkit.layout.processors")
filters = types.ModuleType("prompt_toolkit.filters")
dimension = types.ModuleType("prompt_toolkit.layout.dimension")
menus = types.ModuleType("prompt_toolkit.layout.menus")
widgets = types.ModuleType("prompt_toolkit.widgets")
key_binding = types.ModuleType("prompt_toolkit.key_binding")
completion = types.ModuleType("prompt_toolkit.completion")
formatted_text = types.ModuleType("prompt_toolkit.formatted_text")
history.FileHistory = _Dummy
styles.Style = _Dummy
patch_stdout.patch_stdout = lambda *args, **kwargs: nullcontext()
application.Application = _Dummy
layout.Layout = _Dummy
layout.HSplit = _Dummy
layout.Window = _Dummy
layout.FormattedTextControl = _Dummy
layout.ConditionalContainer = _Dummy
processors.Processor = _Dummy
processors.Transformation = _Dummy
processors.PasswordProcessor = _Dummy
processors.ConditionalProcessor = _Dummy
filters.Condition = _Condition
dimension.Dimension = _Dummy
menus.CompletionsMenu = _Dummy
widgets.TextArea = _Dummy
key_binding.KeyBindings = _Dummy
completion.Completer = _Dummy
completion.Completion = _Dummy
formatted_text.ANSI = _ANSI
root.print_formatted_text = lambda *args, **kwargs: None
sys.modules.setdefault("prompt_toolkit", root)
sys.modules.setdefault("prompt_toolkit.history", history)
sys.modules.setdefault("prompt_toolkit.styles", styles)
sys.modules.setdefault("prompt_toolkit.patch_stdout", patch_stdout)
sys.modules.setdefault("prompt_toolkit.application", application)
sys.modules.setdefault("prompt_toolkit.layout", layout)
sys.modules.setdefault("prompt_toolkit.layout.processors", processors)
sys.modules.setdefault("prompt_toolkit.filters", filters)
sys.modules.setdefault("prompt_toolkit.layout.dimension", dimension)
sys.modules.setdefault("prompt_toolkit.layout.menus", menus)
sys.modules.setdefault("prompt_toolkit.widgets", widgets)
sys.modules.setdefault("prompt_toolkit.key_binding", key_binding)
sys.modules.setdefault("prompt_toolkit.completion", completion)
sys.modules.setdefault("prompt_toolkit.formatted_text", formatted_text)
def _import_cli():
try:
importlib.import_module("prompt_toolkit")
except ModuleNotFoundError:
_install_prompt_toolkit_stubs()
return importlib.import_module("cli")
def test_hermes_cli_init_does_not_eagerly_resolve_runtime_provider(monkeypatch):
cli = _import_cli()
calls = {"count": 0}
def _unexpected_runtime_resolve(**kwargs):
calls["count"] += 1
raise AssertionError("resolve_runtime_provider should not be called in HermesCLI.__init__")
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _unexpected_runtime_resolve)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
assert shell is not None
assert calls["count"] == 0
def test_runtime_resolution_failure_is_not_sticky(monkeypatch):
cli = _import_cli()
calls = {"count": 0}
def _runtime_resolve(**kwargs):
calls["count"] += 1
if calls["count"] == 1:
raise RuntimeError("temporary auth failure")
return {
"provider": "openrouter",
"api_mode": "chat_completions",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "test-key",
"source": "env/config",
}
class _DummyAgent:
def __init__(self, *args, **kwargs):
self.kwargs = kwargs
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
monkeypatch.setattr(cli, "AIAgent", _DummyAgent)
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
assert shell._init_agent() is False
assert shell._init_agent() is True
assert calls["count"] == 2
assert shell.agent is not None
def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch):
cli = _import_cli()
def _runtime_resolve(**kwargs):
return {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://same-endpoint.example/v1",
"api_key": "same-key",
"source": "env/config",
}
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
shell.provider = "openrouter"
shell.api_mode = "chat_completions"
shell.base_url = "https://same-endpoint.example/v1"
shell.api_key = "same-key"
shell.agent = object()
assert shell._ensure_runtime_credentials() is True
assert shell.agent is None
assert shell.provider == "openai-codex"
assert shell.api_mode == "codex_responses"
fix: hermes update causes dual gateways on macOS (launchd) (#1567) * feat: add optional smart model routing Add a conservative cheap-vs-strong routing option that can send very short/simple turns to a cheaper model across providers while keeping the primary model for complex work. Wire it through CLI, gateway, and cron, and document the config.yaml workflow. * fix(gateway): remove recursive ExecStop from systemd units, extend TimeoutStopSec to 60s * fix(gateway): avoid recursive ExecStop in user systemd unit * fix: extend ExecStop removal and TimeoutStopSec=60 to system unit The cherry-picked PR #1448 fix only covered the user systemd unit. The system unit had the same TimeoutStopSec=15 and could benefit from the same 60s timeout for clean shutdown. Also adds a regression test for the system unit. --------- Co-authored-by: Ninja <ninja@local> * feat(skills): add blender-mcp optional skill for 3D modeling Control a running Blender instance from Hermes via socket connection to the blender-mcp addon (port 9876). Supports creating 3D objects, materials, animations, and running arbitrary bpy code. Placed in optional-skills/ since it requires Blender 4.3+ desktop with a third-party addon manually started each session. * feat(acp): support slash commands in ACP adapter (#1532) Adds /help, /model, /tools, /context, /reset, /compact, /version to the ACP adapter (VS Code, Zed, JetBrains). Commands are handled directly in the server without instantiating the TUI — each command queries agent/session state and returns plain text. Unrecognized /commands fall through to the LLM as normal messages. /model uses detect_provider_for_model() for auto-detection when switching models, matching the CLI and gateway behavior. Fixes #1402 * fix(logging): improve error logging in session search tool (#1533) * fix(gateway): restart on retryable startup failures (#1517) * feat(email): add skip_attachments option via config.yaml * feat(email): add skip_attachments option via config.yaml Adds a config.yaml-driven option to skip email attachments in the gateway email adapter. Useful for malware protection and bandwidth savings. Configure in config.yaml: platforms: email: skip_attachments: true Based on PR #1521 by @an420eth, changed from env var to config.yaml (via PlatformConfig.extra) to match the project's config-first pattern. * docs: document skip_attachments option for email adapter * fix(telegram): retry on transient TLS failures during connect and send Add exponential-backoff retry (3 attempts) around initialize() to handle transient TLS resets during gateway startup. Also catches TimedOut and OSError in addition to NetworkError. Add exponential-backoff retry (3 attempts) around send_message() for NetworkError during message delivery, wrapping the existing Markdown fallback logic. Both imports are guarded with try/except ImportError for test environments where telegram is mocked. Based on PR #1527 by cmd8. Closes #1526. * feat: permissive block_anchor thresholds and unicode normalization (#1539) Salvaged from PR #1528 by an420eth. Closes #517. Improves _strategy_block_anchor in fuzzy_match.py: - Add unicode normalization (smart quotes, em/en-dashes, ellipsis, non-breaking spaces → ASCII) so LLM-produced unicode artifacts don't break anchor line matching - Lower thresholds: 0.10 for unique matches (was 0.70), 0.30 for multiple candidates — if first/last lines match exactly, the block is almost certainly correct - Use original (non-normalized) content for offset calculation to preserve correct character positions Tested: 3 new scenarios fixed (em-dash anchors, non-breaking space anchors, very-low-similarity unique matches), zero regressions on all 9 existing fuzzy match tests. Co-authored-by: an420eth <an420eth@users.noreply.github.com> * feat(cli): add file path autocomplete in the input prompt (#1545) When typing a path-like token (./ ../ ~/ / or containing /), the CLI now shows filesystem completions in the dropdown menu. Directories show a trailing slash and 'dir' label; files show their size. Completions are case-insensitive and capped at 30 entries. Triggered by tokens like: edit ./src/ma → shows ./src/main.py, ./src/manifest.json, ... check ~/doc → shows ~/docs/, ~/documents/, ... read /etc/hos → shows /etc/hosts, /etc/hostname, ... open tools/reg → shows tools/registry.py Slash command autocomplete (/help, /model, etc.) is unaffected — it still triggers when the input starts with /. Inspired by OpenCode PR #145 (file path completion menu). Implementation: - hermes_cli/commands.py: _extract_path_word() detects path-like tokens, _path_completions() yields filesystem Completions with size labels, get_completions() routes to paths vs slash commands - tests/hermes_cli/test_path_completion.py: 26 tests covering path extraction, prefix filtering, directory markers, home expansion, case-insensitivity, integration with slash commands * feat(privacy): redact PII from LLM context when privacy.redact_pii is enabled Add privacy.redact_pii config option (boolean, default false). When enabled, the gateway redacts personally identifiable information from the system prompt before sending it to the LLM provider: - Phone numbers (user IDs on WhatsApp/Signal) → hashed to user_<sha256> - User IDs → hashed to user_<sha256> - Chat IDs → numeric portion hashed, platform prefix preserved - Home channel IDs → hashed - Names/usernames → NOT affected (user-chosen, publicly visible) Hashes are deterministic (same user → same hash) so the model can still distinguish users in group chats. Routing and delivery use the original values internally — redaction only affects LLM context. Inspired by OpenClaw PR #47959. * fix(privacy): skip PII redaction on Discord/Slack (mentions need real IDs) Discord uses <@user_id> for mentions and Slack uses <@U12345> — the LLM needs the real ID to tag users. Redaction now only applies to WhatsApp, Signal, and Telegram where IDs are pure routing metadata. Add 4 platform-specific tests covering Discord, WhatsApp, Signal, Slack. * feat: smart approvals + /stop command (inspired by OpenAI Codex) * feat: smart approvals — LLM-based risk assessment for dangerous commands Adds a 'smart' approval mode that uses the auxiliary LLM to assess whether a flagged command is genuinely dangerous or a false positive, auto-approving low-risk commands without prompting the user. Inspired by OpenAI Codex's Smart Approvals guardian subagent (openai/codex#13860). Config (config.yaml): approvals: mode: manual # manual (default), smart, off Modes: - manual — current behavior, always prompt the user - smart — aux LLM evaluates risk: APPROVE (auto-allow), DENY (block), or ESCALATE (fall through to manual prompt) - off — skip all approval prompts (equivalent to --yolo) When smart mode auto-approves, the pattern gets session-level approval so subsequent uses of the same pattern don't trigger another LLM call. When it denies, the command is blocked without user prompt. When uncertain, it escalates to the normal manual approval flow. The LLM prompt is carefully scoped: it sees only the command text and the flagged reason, assesses actual risk vs false positive, and returns a single-word verdict. * feat: make smart approval model configurable via config.yaml Adds auxiliary.approval section to config.yaml with the same provider/model/base_url/api_key pattern as other aux tasks (vision, web_extract, compression, etc.). Config: auxiliary: approval: provider: auto model: '' # fast/cheap model recommended base_url: '' api_key: '' Bridged to env vars in both CLI and gateway paths so the aux client picks them up automatically. * feat: add /stop command to kill all background processes Adds a /stop slash command that kills all running background processes at once. Currently users have to process(list) then process(kill) for each one individually. Inspired by OpenAI Codex's separation of interrupt (Ctrl+C stops current turn) from /stop (cleans up background processes). See openai/codex#14602. Ctrl+C continues to only interrupt the active agent turn — background dev servers, watchers, etc. are preserved. /stop is the explicit way to clean them all up. * feat: first-class plugin architecture + hide status bar cost by default (#1544) The persistent status bar now shows context %, token counts, and duration but NOT $ cost by default. Cost display is opt-in via: display: show_cost: true in config.yaml, or: hermes config set display.show_cost true The /usage command still shows full cost breakdown since the user explicitly asked for it — this only affects the always-visible bar. Status bar without cost: ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ 15m Status bar with show_cost: true: ⚕ claude-sonnet-4 │ 12K/200K │ 6% │ $0.06 │ 15m * feat: improve memory prioritization + aggressive skill updates (inspired by OpenAI Codex) * feat: improve memory prioritization — user preferences over procedural knowledge Inspired by OpenAI Codex's memory prompt improvements (openai/codex#14493) which focus memory writes on user preferences and recurring patterns rather than procedural task details. Key insight: 'Optimize for reducing future user steering — the most valuable memory prevents the user from having to repeat themselves.' Changes: - MEMORY_GUIDANCE (prompt_builder.py): added prioritization hierarchy and the core principle about reducing user steering - MEMORY_SCHEMA (memory_tool.py): reordered WHEN TO SAVE list to put corrections first, added explicit PRIORITY guidance - Memory nudge (run_agent.py): now asks specifically about preferences, corrections, and workflow patterns instead of generic 'anything' - Memory flush (run_agent.py): now instructs to prioritize user preferences and corrections over task-specific details * feat: more aggressive skill creation and update prompting Press harder on skill updates — the agent should proactively patch skills when it encounters issues during use, not wait to be asked. Changes: - SKILLS_GUIDANCE: 'consider saving' → 'save'; added explicit instruction to patch skills immediately when found outdated/wrong - Skills header: added instruction to update loaded skills before finishing if they had missing steps or wrong commands - Skill nudge: more assertive ('save the approach' not 'consider saving'), now also prompts for updating existing skills used in the task - Skill nudge interval: lowered default from 15 to 10 iterations - skill_manage schema: added 'patch it immediately' to update triggers * feat: first-class plugin architecture (#1555) Plugin system for extending Hermes with custom tools, hooks, and integrations — no source code changes required. Core system (hermes_cli/plugins.py): - Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and pip entry_points (hermes_agent.plugins group) - PluginContext with register_tool() and register_hook() - 6 lifecycle hooks: pre/post tool_call, pre/post llm_call, on_session_start/end - Namespace package handling for relative imports in plugins - Graceful error isolation — broken plugins never crash the agent Integration (model_tools.py): - Plugin discovery runs after built-in + MCP tools - Plugin tools bypass toolset filter via get_plugin_tool_names() - Pre/post tool call hooks fire in handle_function_call() CLI: - /plugins command shows loaded plugins, tool counts, status - Added to COMMANDS dict for autocomplete Docs: - Getting started guide (build-a-hermes-plugin.md) — full tutorial building a calculator plugin step by step - Reference page (features/plugins.md) — quick overview + tables - Covers: file structure, schemas, handlers, hooks, data files, bundled skills, env var gating, pip distribution, common mistakes Tests: 16 tests covering discovery, loading, hooks, tool visibility. * fix: hermes update causes dual gateways on macOS (launchd) Three bugs worked together to create the dual-gateway problem: 1. cmd_update only checked systemd for gateway restart, completely ignoring launchd on macOS. After killing the PID it would print 'Restart it with: hermes gateway run' even when launchd was about to auto-respawn the process. 2. launchd's KeepAlive.SuccessfulExit=false respawns the gateway after SIGTERM (non-zero exit), so the user's manual restart created a second instance. 3. The launchd plist lacked --replace (systemd had it), so the respawned gateway didn't kill stale instances on startup. Fixes: - Add --replace to launchd ProgramArguments (matches systemd) - Add launchd detection to cmd_update's auto-restart logic - Print 'auto-restart via launchd' instead of manual restart hint * fix: add launchd plist auto-refresh + explicit restart in cmd_update Two integration issues with the initial fix: 1. Existing macOS users with old plist (no --replace) would never get the fix until manual uninstall/reinstall. Added refresh_launchd_plist_if_needed() — mirrors the existing refresh_systemd_unit_if_needed(). Called from launchd_start(), launchd_restart(), and cmd_update. 2. cmd_update relied on KeepAlive respawn after SIGTERM rather than explicit launchctl stop/start. This caused races: launchd would respawn the old process before the PID file was cleaned up. Now does explicit stop+start (matching how systemd gets an explicit systemctl restart), with plist refresh first so the new --replace flag is picked up. --------- Co-authored-by: Ninja <ninja@local> Co-authored-by: alireza78a <alireza78a@users.noreply.github.com> Co-authored-by: Oktay Aydin <113846926+aydnOktay@users.noreply.github.com> Co-authored-by: JP Lew <polydegen@protonmail.com> Co-authored-by: an420eth <an420eth@users.noreply.github.com>
2026-03-16 12:36:29 -07:00
def test_cli_turn_routing_uses_primary_when_disabled(monkeypatch):
cli = _import_cli()
shell = cli.HermesCLI(model="gpt-5", compact=True, max_turns=1)
shell.provider = "openrouter"
shell.api_mode = "chat_completions"
shell.base_url = "https://openrouter.ai/api/v1"
shell.api_key = "sk-primary"
shell._smart_model_routing = {"enabled": False}
result = shell._resolve_turn_agent_config("what time is it in tokyo?")
assert result["model"] == "gpt-5"
assert result["runtime"]["provider"] == "openrouter"
assert result["label"] is None
def test_cli_turn_routing_uses_cheap_model_when_simple(monkeypatch):
cli = _import_cli()
def _runtime_resolve(**kwargs):
assert kwargs["requested"] == "zai"
return {
"provider": "zai",
"api_mode": "chat_completions",
"base_url": "https://open.z.ai/api/v1",
"api_key": "cheap-key",
"source": "env/config",
}
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
shell = cli.HermesCLI(model="anthropic/claude-sonnet-4", compact=True, max_turns=1)
shell.provider = "openrouter"
shell.api_mode = "chat_completions"
shell.base_url = "https://openrouter.ai/api/v1"
shell.api_key = "primary-key"
shell._smart_model_routing = {
"enabled": True,
"cheap_model": {"provider": "zai", "model": "glm-5-air"},
"max_simple_chars": 160,
"max_simple_words": 28,
}
result = shell._resolve_turn_agent_config("what time is it in tokyo?")
assert result["model"] == "glm-5-air"
assert result["runtime"]["provider"] == "zai"
assert result["runtime"]["api_key"] == "cheap-key"
assert result["label"] is not None
def test_cli_prefers_config_provider_over_stale_env_override(monkeypatch):
cli = _import_cli()
monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter")
config_copy = dict(cli.CLI_CONFIG)
model_copy = dict(config_copy.get("model", {}))
model_copy["provider"] = "custom"
model_copy["base_url"] = "https://api.fireworks.ai/inference/v1"
config_copy["model"] = model_copy
monkeypatch.setattr(cli, "CLI_CONFIG", config_copy)
shell = cli.HermesCLI(model="fireworks/minimax-m2p5", compact=True, max_turns=1)
assert shell.requested_provider == "custom"
def test_codex_provider_replaces_incompatible_default_model(monkeypatch):
"""When provider resolves to openai-codex and no model was explicitly
chosen, the global config default (e.g. anthropic/claude-opus-4.6) must
be replaced with a Codex-compatible model. Fixes #651."""
cli = _import_cli()
monkeypatch.delenv("LLM_MODEL", raising=False)
monkeypatch.delenv("OPENAI_MODEL", raising=False)
# Ensure local user config does not leak a model into the test
monkeypatch.setitem(cli.CLI_CONFIG, "model", {
"default": "",
"base_url": "https://openrouter.ai/api/v1",
})
def _runtime_resolve(**kwargs):
return {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "test-key",
"source": "env/config",
}
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
monkeypatch.setattr(
"hermes_cli.codex_models.get_codex_model_ids",
lambda access_token=None: ["gpt-5.2-codex", "gpt-5.1-codex-mini"],
)
shell = cli.HermesCLI(compact=True, max_turns=1)
assert shell._model_is_default is True
assert shell._ensure_runtime_credentials() is True
assert shell.provider == "openai-codex"
assert "anthropic" not in shell.model
assert "claude" not in shell.model
assert shell.model == "gpt-5.2-codex"
def test_codex_provider_uses_config_model(monkeypatch):
"""Model comes from config.yaml, not LLM_MODEL env var.
Config.yaml is the single source of truth to avoid multi-agent conflicts."""
cli = _import_cli()
# LLM_MODEL env var should be IGNORED (even if set)
monkeypatch.setenv("LLM_MODEL", "should-be-ignored")
monkeypatch.delenv("OPENAI_MODEL", raising=False)
# Set model via config
monkeypatch.setitem(cli.CLI_CONFIG, "model", {
"default": "gpt-5.2-codex",
"provider": "openai-codex",
"base_url": "https://chatgpt.com/backend-api/codex",
})
def _runtime_resolve(**kwargs):
return {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "fake-codex-token",
"source": "env/config",
}
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
# Prevent live API call from overriding the config model
monkeypatch.setattr(
"hermes_cli.codex_models.get_codex_model_ids",
lambda access_token=None: ["gpt-5.2-codex"],
)
shell = cli.HermesCLI(compact=True, max_turns=1)
assert shell._ensure_runtime_credentials() is True
assert shell.provider == "openai-codex"
# Model from config (may be normalized by codex provider logic)
assert "codex" in shell.model.lower()
# LLM_MODEL env var is NOT used
assert shell.model != "should-be-ignored"
def test_codex_config_model_not_replaced_by_normalization(monkeypatch):
"""When the user sets model.default in config.yaml to a specific codex
model, _normalize_model_for_provider must NOT replace it with the latest
available model from the API. Regression test for #1887."""
cli = _import_cli()
monkeypatch.delenv("LLM_MODEL", raising=False)
monkeypatch.delenv("OPENAI_MODEL", raising=False)
# User explicitly configured gpt-5.3-codex in config.yaml
monkeypatch.setitem(cli.CLI_CONFIG, "model", {
"default": "gpt-5.3-codex",
"provider": "openai-codex",
"base_url": "https://chatgpt.com/backend-api/codex",
})
def _runtime_resolve(**kwargs):
return {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "fake-key",
"source": "env/config",
}
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
# API returns a DIFFERENT model than what the user configured
monkeypatch.setattr(
"hermes_cli.codex_models.get_codex_model_ids",
lambda access_token=None: ["gpt-5.4", "gpt-5.3-codex"],
)
shell = cli.HermesCLI(compact=True, max_turns=1)
# Config model is NOT the global default — user made a deliberate choice
assert shell._model_is_default is False
assert shell._ensure_runtime_credentials() is True
assert shell.provider == "openai-codex"
# Model must stay as user configured, not replaced by gpt-5.4
assert shell.model == "gpt-5.3-codex"
def test_codex_provider_preserves_explicit_codex_model(monkeypatch):
"""If the user explicitly passes a Codex-compatible model, it must be
preserved even when the provider resolves to openai-codex."""
cli = _import_cli()
monkeypatch.delenv("LLM_MODEL", raising=False)
monkeypatch.delenv("OPENAI_MODEL", raising=False)
def _runtime_resolve(**kwargs):
return {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "test-key",
"source": "env/config",
}
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
shell = cli.HermesCLI(model="gpt-5.1-codex-mini", compact=True, max_turns=1)
assert shell._model_is_default is False
assert shell._ensure_runtime_credentials() is True
assert shell.model == "gpt-5.1-codex-mini"
def test_codex_provider_strips_provider_prefix_from_model(monkeypatch):
"""openai/gpt-5.3-codex should become gpt-5.3-codex — the Codex
Responses API does not accept provider-prefixed model slugs."""
cli = _import_cli()
monkeypatch.delenv("LLM_MODEL", raising=False)
monkeypatch.delenv("OPENAI_MODEL", raising=False)
def _runtime_resolve(**kwargs):
return {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "test-key",
"source": "env/config",
}
monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
shell = cli.HermesCLI(model="openai/gpt-5.3-codex", compact=True, max_turns=1)
assert shell._ensure_runtime_credentials() is True
assert shell.model == "gpt-5.3-codex"
def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys):
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: {"model": {"default": "gpt-5", "provider": "invalid-provider"}},
)
monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None)
monkeypatch.setattr("hermes_cli.config.get_env_value", lambda key: "")
monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: None)
def _resolve_provider(requested, **kwargs):
if requested == "invalid-provider":
raise AuthError("Unknown provider 'invalid-provider'.", code="invalid_provider")
return "openrouter"
monkeypatch.setattr("hermes_cli.auth.resolve_provider", _resolve_provider)
monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: len(choices) - 1)
hermes_main.cmd_model(SimpleNamespace())
output = capsys.readouterr().out
assert "Warning:" in output
assert "falling back to auto provider detection" in output.lower()
assert "No change." in output
def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys):
monkeypatch.setattr(
"hermes_cli.config.get_env_value",
lambda key: "" if key in {"OPENAI_BASE_URL", "OPENAI_API_KEY"} else "",
)
saved_env = {}
monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: saved_env.__setitem__(key, value))
monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: saved_env.__setitem__("MODEL", model))
monkeypatch.setattr("hermes_cli.auth.deactivate_provider", lambda: None)
monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *args, **kwargs: None)
monkeypatch.setattr(
"hermes_cli.models.probe_api_models",
lambda api_key, base_url: {
"models": ["llm"],
"probed_url": "http://localhost:8000/v1/models",
"resolved_base_url": "http://localhost:8000/v1",
"suggested_base_url": "http://localhost:8000/v1",
"used_fallback": True,
},
)
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: {"model": {"default": "", "provider": "custom", "base_url": ""}},
)
monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None)
answers = iter(["http://localhost:8000", "local-key", "llm"])
monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers))
hermes_main._model_flow_custom({})
output = capsys.readouterr().out
assert "Saving the working base URL instead" in output
assert saved_env["OPENAI_BASE_URL"] == "http://localhost:8000/v1"
assert saved_env["OPENAI_API_KEY"] == "local-key"
assert saved_env["MODEL"] == "llm"