From f996d7950b7aa582bd0b071a9a25d2d3500416fb Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Sun, 8 Mar 2026 18:16:58 -0700
Subject: [PATCH] fix: trust user-selected models with OpenAI Codex provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Codex model normalization was rejecting any model without 'codex'
in its name, forcing a fallback to gpt-5.3-codex. This blocked models
like gpt-5.4 that the Codex API actually supports.

The fix simplifies _normalize_model_for_provider() to two operations:
1. Strip provider prefixes (API needs bare slugs)
2. Replace the *untouched default* model with a Codex-compatible one

If the user explicitly chose a model — any model — we trust them and
let the API be the judge. No allowlists, no slug checks.

Also removes the 'codex not in slug' filter from _read_cache_models()
so the local cache preserves all API-available models.

Inspired by OpenClaw's approach which explicitly lists non-codex models
(gpt-5.4, gpt-5.2) as valid Codex models.
---
 cli.py                                |  81 +++++++-------
 hermes_cli/codex_models.py            |   2 -
 tests/test_cli_provider_resolution.py |  16 +--
 tests/test_codex_models.py            | 151 +++++++++++++++++++++++++-
 4 files changed, 194 insertions(+), 56 deletions(-)

diff --git a/cli.py b/cli.py
index cd13b820c..234b2f273 100755
--- a/cli.py
+++ b/cli.py
@@ -1135,14 +1135,18 @@ class HermesCLI:
             self._app.invalidate()
 
     def _normalize_model_for_provider(self, resolved_provider: str) -> bool:
-        """Normalize obviously incompatible model/provider pairings.
+        """Strip provider prefixes and swap the default model for Codex.
 
-        When the resolved provider is ``openai-codex``, the Codex Responses API
-        only accepts Codex-compatible model slugs (e.g. ``gpt-5.3-codex``).
-        If the active model is incompatible (e.g. the OpenRouter default
-        ``anthropic/claude-opus-4.6``), swap it for the best available Codex
-        model.  Also strips provider prefixes the API does not accept
-        (``openai/gpt-5.3-codex`` → ``gpt-5.3-codex``).
+        When the resolved provider is ``openai-codex``:
+
+        1. Strip any ``provider/`` prefix (the Codex Responses API only
+           accepts bare model slugs like ``gpt-5.4``, not ``openai/gpt-5.4``).
+        2. If the active model is still the *untouched default* (user never
+           explicitly chose a model), replace it with a Codex-compatible
+           default so the first session doesn't immediately error.
+
+        If the user explicitly chose a model — *any* model — we trust them
+        and let the API be the judge.  No allowlists, no slug checks.
 
         Returns True when the active model was changed.
         """
@@ -1150,46 +1154,39 @@ class HermesCLI:
             return False
 
         current_model = (self.model or "").strip()
-        current_slug = current_model.split("/")[-1] if current_model else ""
+        changed = False
 
-        # Keep explicit Codex models, but strip any provider prefix that the
-        # Codex Responses API does not accept.
-        if current_slug and "codex" in current_slug.lower():
-            if current_slug != current_model:
-                self.model = current_slug
-                if not self._model_is_default:
-                    self.console.print(
-                        f"[yellow]⚠️  Stripped provider prefix from '{current_model}'; "
-                        f"using '{current_slug}' for OpenAI Codex.[/]"
-                    )
-                return True
-            return False
-
-        # Model is not Codex-compatible — replace with the best available
-        fallback_model = "gpt-5.3-codex"
-        try:
-            from hermes_cli.codex_models import get_codex_model_ids
-
-            codex_models = get_codex_model_ids(
-                access_token=self.api_key if self.api_key else None,
-            )
-            fallback_model = next(
-                (mid for mid in codex_models if "codex" in mid.lower()),
-                fallback_model,
-            )
-        except Exception:
-            pass
-
-        if current_model != fallback_model:
+        # 1. Strip provider prefix ("openai/gpt-5.4" → "gpt-5.4")
+        if "/" in current_model:
+            slug = current_model.split("/", 1)[1]
             if not self._model_is_default:
                 self.console.print(
-                    f"[yellow]⚠️  Model '{current_model}' is not supported with "
-                    f"OpenAI Codex; switching to '{fallback_model}'.[/]"
+                    f"[yellow]⚠️  Stripped provider prefix from '{current_model}'; "
+                    f"using '{slug}' for OpenAI Codex.[/]"
                 )
-            self.model = fallback_model
-            return True
+            self.model = slug
+            current_model = slug
+            changed = True
 
-        return False
+        # 2. Replace untouched default with a Codex model
+        if self._model_is_default:
+            fallback_model = "gpt-5.3-codex"
+            try:
+                from hermes_cli.codex_models import get_codex_model_ids
+
+                available = get_codex_model_ids(
+                    access_token=self.api_key if self.api_key else None,
+                )
+                if available:
+                    fallback_model = available[0]
+            except Exception:
+                pass
+
+            if current_model != fallback_model:
+                self.model = fallback_model
+                changed = True
+
+        return changed
 
     def _ensure_runtime_credentials(self) -> bool:
         """
diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py
index 416c76add..8259b7064 100644
--- a/hermes_cli/codex_models.py
+++ b/hermes_cli/codex_models.py
@@ -94,8 +94,6 @@ def _read_cache_models(codex_home: Path) -> List[str]:
             if not isinstance(slug, str) or not slug.strip():
                 continue
             slug = slug.strip()
-            if "codex" not in slug.lower():
-                continue
             if item.get("supported_in_api") is False:
                 continue
             visibility = item.get("visibility")
diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py
index cdae01d0c..f4a446ac8 100644
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/test_cli_provider_resolution.py
@@ -197,10 +197,10 @@ def test_codex_provider_replaces_incompatible_default_model(monkeypatch):
     assert shell.model == "gpt-5.2-codex"
 
 
-def test_codex_provider_replaces_incompatible_envvar_model(monkeypatch):
-    """Exact scenario from #651: LLM_MODEL is set to a non-Codex model and
-    provider resolves to openai-codex.  The model must be replaced and a
-    warning printed since the user explicitly chose it."""
+def test_codex_provider_trusts_explicit_envvar_model(monkeypatch):
+    """When the user explicitly sets LLM_MODEL, we trust their choice and
+    let the API be the judge — even if it's a non-OpenAI model.  Only
+    provider prefixes are stripped; the bare model passes through."""
     cli = _import_cli()
 
     monkeypatch.setenv("LLM_MODEL", "claude-opus-4-6")
@@ -217,18 +217,14 @@ def test_codex_provider_replaces_incompatible_envvar_model(monkeypatch):
 
     monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve)
     monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
-    monkeypatch.setattr(
-        "hermes_cli.codex_models.get_codex_model_ids",
-        lambda access_token=None: ["gpt-5.2-codex", "gpt-5.1-codex-mini"],
-    )
 
     shell = cli.HermesCLI(compact=True, max_turns=1)
 
     assert shell._model_is_default is False
     assert shell._ensure_runtime_credentials() is True
     assert shell.provider == "openai-codex"
-    assert "claude" not in shell.model
-    assert shell.model == "gpt-5.2-codex"
+    # User explicitly chose this model — it passes through untouched
+    assert shell.model == "claude-opus-4-6"
 
 
 def test_codex_provider_preserves_explicit_codex_model(monkeypatch):
diff --git a/tests/test_codex_models.py b/tests/test_codex_models.py
index e6cc2fdec..f7e463587 100644
--- a/tests/test_codex_models.py
+++ b/tests/test_codex_models.py
@@ -1,4 +1,9 @@
 import json
+import os
+import sys
+from unittest.mock import patch
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 
 from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids
 
@@ -13,7 +18,7 @@ def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch
                 "models": [
                     {"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True},
                     {"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True},
-                    {"slug": "gpt-4o", "priority": 1, "supported_in_api": True},
+                    {"slug": "gpt-5.4", "priority": 1, "supported_in_api": True},
                     {"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"},
                 ]
             }
@@ -26,7 +31,8 @@ def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch
     assert models[0] == "gpt-5.2-codex"
     assert "gpt-5.1-codex" in models
     assert "gpt-5.3-codex" in models
-    assert "gpt-4o" not in models
+    # Non-codex-suffixed models are included when the cache says they're available
+    assert "gpt-5.4" in models
     assert "gpt-5-hidden-codex" not in models
 
 
@@ -38,3 +44,144 @@ def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatc
     models = get_codex_model_ids()
 
     assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS
+
+
+# ── Tests for _normalize_model_for_provider ──────────────────────────
+
+
+def _make_cli(model="anthropic/claude-opus-4.6", **kwargs):
+    """Create a HermesCLI with minimal mocking."""
+    import cli as _cli_mod
+    from cli import HermesCLI
+
+    _clean_config = {
+        "model": {
+            "default": "anthropic/claude-opus-4.6",
+            "base_url": "https://openrouter.ai/api/v1",
+            "provider": "auto",
+        },
+        "display": {"compact": False, "tool_progress": "all", "resume_display": "full"},
+        "agent": {},
+        "terminal": {"env_type": "local"},
+    }
+    clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
+    with (
+        patch("cli.get_tool_definitions", return_value=[]),
+        patch.dict("os.environ", clean_env, clear=False),
+        patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}),
+    ):
+        cli = HermesCLI(model=model, **kwargs)
+    return cli
+
+
+class TestNormalizeModelForProvider:
+    """_normalize_model_for_provider() trusts user-selected models.
+
+    Only two things happen:
+    1. Provider prefixes are stripped (API needs bare slugs)
+    2. The *untouched default* model is swapped for a Codex model
+    Everything else passes through — the API is the judge.
+    """
+
+    def test_non_codex_provider_is_noop(self):
+        cli = _make_cli(model="gpt-5.4")
+        changed = cli._normalize_model_for_provider("openrouter")
+        assert changed is False
+        assert cli.model == "gpt-5.4"
+
+    def test_bare_codex_model_passes_through(self):
+        cli = _make_cli(model="gpt-5.3-codex")
+        changed = cli._normalize_model_for_provider("openai-codex")
+        assert changed is False
+        assert cli.model == "gpt-5.3-codex"
+
+    def test_bare_non_codex_model_passes_through(self):
+        """gpt-5.4 (no 'codex' suffix) passes through — user chose it."""
+        cli = _make_cli(model="gpt-5.4")
+        changed = cli._normalize_model_for_provider("openai-codex")
+        assert changed is False
+        assert cli.model == "gpt-5.4"
+
+    def test_any_bare_model_trusted(self):
+        """Even a non-OpenAI bare model passes through — user explicitly set it."""
+        cli = _make_cli(model="claude-opus-4-6")
+        changed = cli._normalize_model_for_provider("openai-codex")
+        # User explicitly chose this model — we trust them, API will error if wrong
+        assert changed is False
+        assert cli.model == "claude-opus-4-6"
+
+    def test_provider_prefix_stripped(self):
+        """openai/gpt-5.4 → gpt-5.4 (strip prefix, keep model)."""
+        cli = _make_cli(model="openai/gpt-5.4")
+        changed = cli._normalize_model_for_provider("openai-codex")
+        assert changed is True
+        assert cli.model == "gpt-5.4"
+
+    def test_any_provider_prefix_stripped(self):
+        """anthropic/claude-opus-4.6 → claude-opus-4.6 (strip prefix only).
+        User explicitly chose this — let the API decide if it works."""
+        cli = _make_cli(model="anthropic/claude-opus-4.6")
+        changed = cli._normalize_model_for_provider("openai-codex")
+        assert changed is True
+        assert cli.model == "claude-opus-4.6"
+
+    def test_default_model_replaced(self):
+        """The untouched default (anthropic/claude-opus-4.6) gets swapped."""
+        import cli as _cli_mod
+        _clean_config = {
+            "model": {
+                "default": "anthropic/claude-opus-4.6",
+                "base_url": "https://openrouter.ai/api/v1",
+                "provider": "auto",
+            },
+            "display": {"compact": False, "tool_progress": "all", "resume_display": "full"},
+            "agent": {},
+            "terminal": {"env_type": "local"},
+        }
+        # Don't pass model= so _model_is_default is True
+        with (
+            patch("cli.get_tool_definitions", return_value=[]),
+            patch.dict("os.environ", {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}, clear=False),
+            patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}),
+        ):
+            from cli import HermesCLI
+            cli = HermesCLI()
+
+        assert cli._model_is_default is True
+        with patch(
+            "hermes_cli.codex_models.get_codex_model_ids",
+            return_value=["gpt-5.3-codex", "gpt-5.4"],
+        ):
+            changed = cli._normalize_model_for_provider("openai-codex")
+        assert changed is True
+        # Uses first from available list
+        assert cli.model == "gpt-5.3-codex"
+
+    def test_default_fallback_when_api_fails(self):
+        """Default model falls back to gpt-5.3-codex when API unreachable."""
+        import cli as _cli_mod
+        _clean_config = {
+            "model": {
+                "default": "anthropic/claude-opus-4.6",
+                "base_url": "https://openrouter.ai/api/v1",
+                "provider": "auto",
+            },
+            "display": {"compact": False, "tool_progress": "all", "resume_display": "full"},
+            "agent": {},
+            "terminal": {"env_type": "local"},
+        }
+        with (
+            patch("cli.get_tool_definitions", return_value=[]),
+            patch.dict("os.environ", {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}, clear=False),
+            patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}),
+        ):
+            from cli import HermesCLI
+            cli = HermesCLI()
+
+        with patch(
+            "hermes_cli.codex_models.get_codex_model_ids",
+            side_effect=Exception("offline"),
+        ):
+            changed = cli._normalize_model_for_provider("openai-codex")
+        assert changed is True
+        assert cli.model == "gpt-5.3-codex"