From 4a8cd6f856b54c9f3d7d2addc61cef9aa76ec6ce Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Thu, 12 Mar 2026 16:02:35 -0700
Subject: [PATCH] fix: stop rejecting unlisted models, accept with warning
 instead
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: use session_key instead of chat_id for adapter interrupt lookups

monitor_for_interrupt() in _run_agent was using source.chat_id to query
the adapter's has_pending_interrupt() and get_pending_message() methods.
But the adapter stores interrupt events under build_session_key(source),
which produces a different string (e.g. 'agent:main:telegram:dm' vs '123456').

This key mismatch meant the interrupt was never detected through the
adapter path, which is the only active interrupt path for all adapter-based
platforms (Telegram, Discord, Slack, etc.). The gateway-level interrupt
path (in dispatch_message) is unreachable because the adapter intercepts
the 2nd message in handle_message() before it reaches dispatch_message().

Result: sending a new message while subagents were running had no effect —
the interrupt was silently lost.

Fix: replace all source.chat_id references in the interrupt-related code
within _run_agent() with the session_key parameter, which matches the
adapter's storage keys.

Also adds regression tests verifying session_key vs chat_id consistency.

* debug: add file-based logging to CLI interrupt path

Temporary instrumentation to diagnose why message-based interrupts
don't seem to work during subagent execution. Logs to
~/.hermes/interrupt_debug.log (immune to redirect_stdout).

Two log points:
1. When Enter handler puts message into _interrupt_queue
2. When chat() reads it and calls agent.interrupt()

This will reveal whether the message reaches the queue and
whether the interrupt is actually fired.

* fix: accept unlisted models with warning instead of rejecting

validate_requested_model() previously hard-rejected any model not found
in the provider's API listing. This was too aggressive — users on higher
plan tiers (e.g. Z.AI Pro/Max) may have access to models not shown in
the public listing (like glm-5 on coding endpoints).

Changes:
- validate_requested_model: accept unlisted models with a warning note
  instead of blocking. The model is saved to config and used immediately.
- Z.AI setup: always offer glm-5 in the model list regardless of whether
  a coding endpoint was detected. Pro/Max plans support it.
- Z.AI setup detection message: softened from 'GLM-5 is not available'
  to 'GLM-5 may still be available depending on your plan tier'
---
 hermes_cli/models.py                      | 37 +++++++++--------------
 hermes_cli/setup.py                       | 16 +++++-----
 tests/hermes_cli/test_model_validation.py | 33 +++++++++++---------
 tests/test_cli_model_command.py           | 24 ++++++---------
 4 files changed, 48 insertions(+), 62 deletions(-)

diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 92dcbf975..199c4402c 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -327,44 +327,35 @@ def validate_requested_model(
                 "message": None,
             }
         else:
-            # API responded but model is not listed
+            # API responded but model is not listed.  Accept anyway —
+            # the user may have access to models not shown in the public
+            # listing (e.g. Z.AI Pro/Max plans can use glm-5 on coding
+            # endpoints even though it's not in /models).  Warn but allow.
             suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
             suggestion_text = ""
             if suggestions:
-                suggestion_text = "\n  Did you mean: " + ", ".join(f"`{s}`" for s in suggestions)
+                suggestion_text = "\n  Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
 
             return {
-                "accepted": False,
-                "persist": False,
+                "accepted": True,
+                "persist": True,
                 "recognized": False,
                 "message": (
-                    f"Error: `{requested}` is not a valid model for this provider."
+                    f"Note: `{requested}` was not found in this provider's model listing. "
+                    f"It may still work if your plan supports it."
                     f"{suggestion_text}"
                 ),
             }
 
-    # api_models is None — couldn't reach API, fall back to catalog check
+    # api_models is None — couldn't reach API.  Accept and persist,
+    # but warn so typos don't silently break things.
     provider_label = _PROVIDER_LABELS.get(normalized, normalized)
-    known_models = provider_model_ids(normalized)
-
-    if requested in known_models:
-        return {
-            "accepted": True,
-            "persist": True,
-            "recognized": True,
-            "message": None,
-        }
-
-    # Can't validate — accept for session only
-    suggestion = get_close_matches(requested, known_models, n=1, cutoff=0.6)
-    suggestion_text = f" Did you mean `{suggestion[0]}`?" if suggestion else ""
     return {
         "accepted": True,
-        "persist": False,
+        "persist": True,
         "recognized": False,
         "message": (
-            f"Could not validate `{requested}` against the live {provider_label} API. "
-            "Using it for this session only; config unchanged."
-            f"{suggestion_text}"
+            f"Could not reach the {provider_label} API to validate `{requested}`. "
+            f"If the service isn't down, this model may not be valid."
         ),
     }
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 975dfd0cb..8db8ff7dd 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -889,7 +889,8 @@ def setup_model_provider(config: dict):
                 print_info(f"  URL: {detected['base_url']}")
                 if detected["id"].startswith("coding"):
                     print_info(
-                        f"  Note: Coding Plan detected — GLM-5 is not available, using {detected['model']}"
+                        f"  Note: Coding Plan endpoint detected (default model: {detected['model']}). "
+                        f"GLM-5 may still be available depending on your plan tier."
                     )
                 save_env_value("GLM_BASE_URL", zai_base_url)
             else:
@@ -1107,14 +1108,11 @@ def setup_model_provider(config: dict):
             _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
             _set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL)
         elif selected_provider == "zai":
-            # Coding Plan endpoints don't have GLM-5
-            is_coding_plan = get_env_value("GLM_BASE_URL") and "coding" in (
-                get_env_value("GLM_BASE_URL") or ""
-            )
-            if is_coding_plan:
-                zai_models = ["glm-4.7", "glm-4.5", "glm-4.5-flash"]
-            else:
-                zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]
+            # Always offer all models — Pro/Max plans support GLM-5 even
+            # on coding endpoints.  If the user's plan doesn't support a
+            # model, the API will return an error at runtime (not our job
+            # to gatekeep).
+            zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]
             model_choices = list(zai_models)
             model_choices.append("Custom model")
             model_choices.append(f"Keep current ({current_model})")
diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py
index 71d47136c..8b8f34444 100644
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@@ -160,7 +160,8 @@ class TestValidateFormatChecks:
 
     def test_no_slash_model_rejected_if_not_in_api(self):
         result = _validate("gpt-5.4", api_models=["openai/gpt-5.4"])
-        assert result["accepted"] is False
+        assert result["accepted"] is True
+        assert "not found" in result["message"]
 
 
 # -- validate — API found ----------------------------------------------------
@@ -184,37 +185,39 @@ class TestValidateApiFound:
 # -- validate — API not found ------------------------------------------------
 
 class TestValidateApiNotFound:
-    def test_model_not_in_api_rejected(self):
+    def test_model_not_in_api_accepted_with_warning(self):
         result = _validate("anthropic/claude-nonexistent")
-        assert result["accepted"] is False
-        assert "not a valid model" in result["message"]
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert "not found" in result["message"]
 
-    def test_rejection_includes_suggestions(self):
+    def test_warning_includes_suggestions(self):
         result = _validate("anthropic/claude-opus-4.5")
-        assert result["accepted"] is False
-        assert "Did you mean" in result["message"]
+        assert result["accepted"] is True
+        assert "Similar models" in result["message"]
 
 
-# -- validate — API unreachable (fallback) -----------------------------------
+# -- validate — API unreachable — accept and persist everything ----------------
 
 class TestValidateApiFallback:
-    def test_known_catalog_model_accepted_when_api_down(self):
+    def test_any_model_accepted_when_api_down(self):
         result = _validate("anthropic/claude-opus-4.6", api_models=None)
         assert result["accepted"] is True
         assert result["persist"] is True
 
-    def test_unknown_model_session_only_when_api_down(self):
+    def test_unknown_model_also_accepted_when_api_down(self):
+        """No hardcoded catalog gatekeeping — accept, persist, and warn."""
         result = _validate("anthropic/claude-next-gen", api_models=None)
         assert result["accepted"] is True
-        assert result["persist"] is False
-        assert "session only" in result["message"].lower()
+        assert result["persist"] is True
+        assert "could not reach" in result["message"].lower()
 
-    def test_zai_known_model_accepted_when_api_down(self):
+    def test_zai_model_accepted_when_api_down(self):
         result = _validate("glm-5", provider="zai", api_models=None)
         assert result["accepted"] is True
         assert result["persist"] is True
 
-    def test_unknown_provider_session_only_when_api_down(self):
+    def test_unknown_provider_accepted_when_api_down(self):
         result = _validate("some-model", provider="totally-unknown", api_models=None)
         assert result["accepted"] is True
-        assert result["persist"] is False
+        assert result["persist"] is True
diff --git a/tests/test_cli_model_command.py b/tests/test_cli_model_command.py
index 477ad4292..636958b0f 100644
--- a/tests/test_cli_model_command.py
+++ b/tests/test_cli_model_command.py
@@ -31,7 +31,7 @@ class TestModelCommand:
         assert cli_obj.model == "anthropic/claude-sonnet-4.5"
         save_mock.assert_called_once_with("model.default", "anthropic/claude-sonnet-4.5")
 
-    def test_invalid_model_from_api_is_rejected(self, capsys):
+    def test_unlisted_model_accepted_with_warning(self, capsys):
         cli_obj = self._make_cli()
 
         with patch("hermes_cli.models.fetch_api_models",
@@ -40,12 +40,10 @@ class TestModelCommand:
             cli_obj.process_command("/model anthropic/fake-model")
 
         output = capsys.readouterr().out
-        assert "not a valid model" in output
-        assert "Model unchanged" in output
-        assert cli_obj.model == "anthropic/claude-opus-4.6"
-        save_mock.assert_not_called()
+        assert "not found" in output or "Model changed" in output
+        assert cli_obj.model == "anthropic/fake-model"  # accepted
 
-    def test_api_unreachable_falls_back_session_only(self, capsys):
+    def test_api_unreachable_accepts_and_persists(self, capsys):
         cli_obj = self._make_cli()
 
         with patch("hermes_cli.models.fetch_api_models", return_value=None), \
@@ -53,12 +51,11 @@ class TestModelCommand:
             cli_obj.process_command("/model anthropic/claude-sonnet-next")
 
         output = capsys.readouterr().out
-        assert "session only" in output
-        assert "will revert on restart" in output
+        assert "saved to config" in output
         assert cli_obj.model == "anthropic/claude-sonnet-next"
-        save_mock.assert_not_called()
+        save_mock.assert_called_once()
 
-    def test_no_slash_model_probes_api_and_rejects(self, capsys):
+    def test_no_slash_model_accepted_with_warning(self, capsys):
         cli_obj = self._make_cli()
 
         with patch("hermes_cli.models.fetch_api_models",
@@ -67,11 +64,8 @@ class TestModelCommand:
             cli_obj.process_command("/model gpt-5.4")
 
         output = capsys.readouterr().out
-        assert "not a valid model" in output
-        assert "Model unchanged" in output
-        assert cli_obj.model == "anthropic/claude-opus-4.6"  # unchanged
-        assert cli_obj.agent is not None  # not reset
-        save_mock.assert_not_called()
+        # Model is accepted (with warning) even if not in API listing
+        assert cli_obj.model == "gpt-5.4"
 
     def test_validation_crash_falls_back_to_save(self, capsys):
         cli_obj = self._make_cli()