From 4a8cd6f856b54c9f3d7d2addc61cef9aa76ec6ce Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:02:35 -0700 Subject: [PATCH] fix: stop rejecting unlisted models, accept with warning instead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: use session_key instead of chat_id for adapter interrupt lookups monitor_for_interrupt() in _run_agent was using source.chat_id to query the adapter's has_pending_interrupt() and get_pending_message() methods. But the adapter stores interrupt events under build_session_key(source), which produces a different string (e.g. 'agent:main:telegram:dm' vs '123456'). This key mismatch meant the interrupt was never detected through the adapter path, which is the only active interrupt path for all adapter-based platforms (Telegram, Discord, Slack, etc.). The gateway-level interrupt path (in dispatch_message) is unreachable because the adapter intercepts the 2nd message in handle_message() before it reaches dispatch_message(). Result: sending a new message while subagents were running had no effect — the interrupt was silently lost. Fix: replace all source.chat_id references in the interrupt-related code within _run_agent() with the session_key parameter, which matches the adapter's storage keys. Also adds regression tests verifying session_key vs chat_id consistency. * debug: add file-based logging to CLI interrupt path Temporary instrumentation to diagnose why message-based interrupts don't seem to work during subagent execution. Logs to ~/.hermes/interrupt_debug.log (immune to redirect_stdout). Two log points: 1. When Enter handler puts message into _interrupt_queue 2. When chat() reads it and calls agent.interrupt() This will reveal whether the message reaches the queue and whether the interrupt is actually fired. * fix: accept unlisted models with warning instead of rejecting validate_requested_model() previously hard-rejected any model not found in the provider's API listing. This was too aggressive — users on higher plan tiers (e.g. Z.AI Pro/Max) may have access to models not shown in the public listing (like glm-5 on coding endpoints). Changes: - validate_requested_model: accept unlisted models with a warning note instead of blocking. The model is saved to config and used immediately. - Z.AI setup: always offer glm-5 in the model list regardless of whether a coding endpoint was detected. Pro/Max plans support it. - Z.AI setup detection message: softened from 'GLM-5 is not available' to 'GLM-5 may still be available depending on your plan tier' --- hermes_cli/models.py | 37 +++++++++-------------- hermes_cli/setup.py | 16 +++++----- tests/hermes_cli/test_model_validation.py | 33 +++++++++++--------- tests/test_cli_model_command.py | 24 ++++++--------- 4 files changed, 48 insertions(+), 62 deletions(-) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 92dcbf975..199c4402c 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -327,44 +327,35 @@ def validate_requested_model( "message": None, } else: - # API responded but model is not listed + # API responded but model is not listed. Accept anyway — + # the user may have access to models not shown in the public + # listing (e.g. Z.AI Pro/Max plans can use glm-5 on coding + # endpoints even though it's not in /models). Warn but allow. suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5) suggestion_text = "" if suggestions: - suggestion_text = "\n Did you mean: " + ", ".join(f"`{s}`" for s in suggestions) + suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) return { - "accepted": False, - "persist": False, + "accepted": True, + "persist": True, "recognized": False, "message": ( - f"Error: `{requested}` is not a valid model for this provider." + f"Note: `{requested}` was not found in this provider's model listing. " + f"It may still work if your plan supports it." f"{suggestion_text}" ), } - # api_models is None — couldn't reach API, fall back to catalog check + # api_models is None — couldn't reach API. Accept and persist, + # but warn so typos don't silently break things. provider_label = _PROVIDER_LABELS.get(normalized, normalized) - known_models = provider_model_ids(normalized) - - if requested in known_models: - return { - "accepted": True, - "persist": True, - "recognized": True, - "message": None, - } - - # Can't validate — accept for session only - suggestion = get_close_matches(requested, known_models, n=1, cutoff=0.6) - suggestion_text = f" Did you mean `{suggestion[0]}`?" if suggestion else "" return { "accepted": True, - "persist": False, + "persist": True, "recognized": False, "message": ( - f"Could not validate `{requested}` against the live {provider_label} API. " - "Using it for this session only; config unchanged." - f"{suggestion_text}" + f"Could not reach the {provider_label} API to validate `{requested}`. " + f"If the service isn't down, this model may not be valid." ), } diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 975dfd0cb..8db8ff7dd 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -889,7 +889,8 @@ def setup_model_provider(config: dict): print_info(f" URL: {detected['base_url']}") if detected["id"].startswith("coding"): print_info( - f" Note: Coding Plan detected — GLM-5 is not available, using {detected['model']}" + f" Note: Coding Plan endpoint detected (default model: {detected['model']}). " + f"GLM-5 may still be available depending on your plan tier." ) save_env_value("GLM_BASE_URL", zai_base_url) else: @@ -1107,14 +1108,11 @@ def setup_model_provider(config: dict): _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) _set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL) elif selected_provider == "zai": - # Coding Plan endpoints don't have GLM-5 - is_coding_plan = get_env_value("GLM_BASE_URL") and "coding" in ( - get_env_value("GLM_BASE_URL") or "" - ) - if is_coding_plan: - zai_models = ["glm-4.7", "glm-4.5", "glm-4.5-flash"] - else: - zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"] + # Always offer all models — Pro/Max plans support GLM-5 even + # on coding endpoints. If the user's plan doesn't support a + # model, the API will return an error at runtime (not our job + # to gatekeep). + zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"] model_choices = list(zai_models) model_choices.append("Custom model") model_choices.append(f"Keep current ({current_model})") diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py index 71d47136c..8b8f34444 100644 --- a/tests/hermes_cli/test_model_validation.py +++ b/tests/hermes_cli/test_model_validation.py @@ -160,7 +160,8 @@ class TestValidateFormatChecks: def test_no_slash_model_rejected_if_not_in_api(self): result = _validate("gpt-5.4", api_models=["openai/gpt-5.4"]) - assert result["accepted"] is False + assert result["accepted"] is True + assert "not found" in result["message"] # -- validate — API found ---------------------------------------------------- @@ -184,37 +185,39 @@ class TestValidateApiFound: # -- validate — API not found ------------------------------------------------ class TestValidateApiNotFound: - def test_model_not_in_api_rejected(self): + def test_model_not_in_api_accepted_with_warning(self): result = _validate("anthropic/claude-nonexistent") - assert result["accepted"] is False - assert "not a valid model" in result["message"] + assert result["accepted"] is True + assert result["persist"] is True + assert "not found" in result["message"] - def test_rejection_includes_suggestions(self): + def test_warning_includes_suggestions(self): result = _validate("anthropic/claude-opus-4.5") - assert result["accepted"] is False - assert "Did you mean" in result["message"] + assert result["accepted"] is True + assert "Similar models" in result["message"] -# -- validate — API unreachable (fallback) ----------------------------------- +# -- validate — API unreachable — accept and persist everything ---------------- class TestValidateApiFallback: - def test_known_catalog_model_accepted_when_api_down(self): + def test_any_model_accepted_when_api_down(self): result = _validate("anthropic/claude-opus-4.6", api_models=None) assert result["accepted"] is True assert result["persist"] is True - def test_unknown_model_session_only_when_api_down(self): + def test_unknown_model_also_accepted_when_api_down(self): + """No hardcoded catalog gatekeeping — accept, persist, and warn.""" result = _validate("anthropic/claude-next-gen", api_models=None) assert result["accepted"] is True - assert result["persist"] is False - assert "session only" in result["message"].lower() + assert result["persist"] is True + assert "could not reach" in result["message"].lower() - def test_zai_known_model_accepted_when_api_down(self): + def test_zai_model_accepted_when_api_down(self): result = _validate("glm-5", provider="zai", api_models=None) assert result["accepted"] is True assert result["persist"] is True - def test_unknown_provider_session_only_when_api_down(self): + def test_unknown_provider_accepted_when_api_down(self): result = _validate("some-model", provider="totally-unknown", api_models=None) assert result["accepted"] is True - assert result["persist"] is False + assert result["persist"] is True diff --git a/tests/test_cli_model_command.py b/tests/test_cli_model_command.py index 477ad4292..636958b0f 100644 --- a/tests/test_cli_model_command.py +++ b/tests/test_cli_model_command.py @@ -31,7 +31,7 @@ class TestModelCommand: assert cli_obj.model == "anthropic/claude-sonnet-4.5" save_mock.assert_called_once_with("model.default", "anthropic/claude-sonnet-4.5") - def test_invalid_model_from_api_is_rejected(self, capsys): + def test_unlisted_model_accepted_with_warning(self, capsys): cli_obj = self._make_cli() with patch("hermes_cli.models.fetch_api_models", @@ -40,12 +40,10 @@ class TestModelCommand: cli_obj.process_command("/model anthropic/fake-model") output = capsys.readouterr().out - assert "not a valid model" in output - assert "Model unchanged" in output - assert cli_obj.model == "anthropic/claude-opus-4.6" - save_mock.assert_not_called() + assert "not found" in output or "Model changed" in output + assert cli_obj.model == "anthropic/fake-model" # accepted - def test_api_unreachable_falls_back_session_only(self, capsys): + def test_api_unreachable_accepts_and_persists(self, capsys): cli_obj = self._make_cli() with patch("hermes_cli.models.fetch_api_models", return_value=None), \ @@ -53,12 +51,11 @@ class TestModelCommand: cli_obj.process_command("/model anthropic/claude-sonnet-next") output = capsys.readouterr().out - assert "session only" in output - assert "will revert on restart" in output + assert "saved to config" in output assert cli_obj.model == "anthropic/claude-sonnet-next" - save_mock.assert_not_called() + save_mock.assert_called_once() - def test_no_slash_model_probes_api_and_rejects(self, capsys): + def test_no_slash_model_accepted_with_warning(self, capsys): cli_obj = self._make_cli() with patch("hermes_cli.models.fetch_api_models", @@ -67,11 +64,8 @@ class TestModelCommand: cli_obj.process_command("/model gpt-5.4") output = capsys.readouterr().out - assert "not a valid model" in output - assert "Model unchanged" in output - assert cli_obj.model == "anthropic/claude-opus-4.6" # unchanged - assert cli_obj.agent is not None # not reset - save_mock.assert_not_called() + # Model is accepted (with warning) even if not in API listing + assert cli_obj.model == "gpt-5.4" def test_validation_crash_falls_back_to_save(self, capsys): cli_obj = self._make_cli()