From 344239c2dbfe6c03c9020a4faa9552c8769be20a Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 31 Mar 2026 03:29:00 -0700 Subject: [PATCH] feat: auto-detect models from server probe in custom endpoint setup (#4218) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Custom endpoint setup (_model_flow_custom) now probes the server first and presents detected models instead of asking users to type blind: - Single model: auto-confirms with Y/n prompt - Multiple models: numbered list picker, or type a name - No models / probe failed: falls back to manual input Context length prompt also moved after model selection so the user sees the verified endpoint before being asked for details. All recent fixes preserved: config dict sync (#4172), api_key persistence (#4182), no save_env_value for URLs (#4165). Inspired by PR #4194 by sudoingX — re-implemented against current main. Co-authored-by: Xpress AI (Dip KD) <200180104+sudoingX@users.noreply.github.com> --- hermes_cli/main.py | 50 ++++++++++++++++++++------- tests/test_cli_provider_resolution.py | 5 ++- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 3c7142b5e..9b4b3ccac 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1242,22 +1242,10 @@ def _model_flow_custom(config): try: base_url = input(f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: ").strip() api_key = input(f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: ").strip() - model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip() - context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip() except (KeyboardInterrupt, EOFError): print("\nCancelled.") return - context_length = None - if context_length_str: - try: - context_length = int(context_length_str.replace(",", "").replace("k", "000").replace("K", "000")) - if context_length <= 0: - context_length = None - except ValueError: - print(f"Invalid context length: {context_length_str} — will auto-detect.") - context_length = None - if not base_url and not current_url: print("No URL provided. Cancelled.") return @@ -1294,6 +1282,44 @@ def _model_flow_custom(config): if probe.get("suggested_base_url"): print(f" If this server expects /v1, try base URL: {probe['suggested_base_url']}") + # Select model — use probe results when available, fall back to manual input + model_name = "" + detected_models = probe.get("models") or [] + try: + if len(detected_models) == 1: + print(f" Detected model: {detected_models[0]}") + confirm = input(" Use this model? [Y/n]: ").strip().lower() + if confirm in ("", "y", "yes"): + model_name = detected_models[0] + else: + model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip() + elif len(detected_models) > 1: + print(" Available models:") + for i, m in enumerate(detected_models, 1): + print(f" {i}. {m}") + pick = input(f" Select model [1-{len(detected_models)}] or type name: ").strip() + if pick.isdigit() and 1 <= int(pick) <= len(detected_models): + model_name = detected_models[int(pick) - 1] + elif pick: + model_name = pick + else: + model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip() + + context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip() + except (KeyboardInterrupt, EOFError): + print("\nCancelled.") + return + + context_length = None + if context_length_str: + try: + context_length = int(context_length_str.replace(",", "").replace("k", "000").replace("K", "000")) + if context_length <= 0: + context_length = None + except ValueError: + print(f"Invalid context length: {context_length_str} — will auto-detect.") + context_length = None + if model_name: _save_model_choice(model_name) diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index 943a45a55..3c9b31f5f 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -460,13 +460,16 @@ def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys): ) monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None) - answers = iter(["http://localhost:8000", "local-key", "llm", ""]) + # After the probe detects a single model ("llm"), the flow asks + # "Use this model? [Y/n]:" — confirm with Enter, then context length. + answers = iter(["http://localhost:8000", "local-key", "", ""]) monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers)) hermes_main._model_flow_custom({}) output = capsys.readouterr().out assert "Saving the working base URL instead" in output + assert "Detected model: llm" in output # OPENAI_BASE_URL is no longer saved to .env — config.yaml is authoritative assert "OPENAI_BASE_URL" not in saved_env assert saved_env["MODEL"] == "llm" \ No newline at end of file