feat: auto-detect models from server probe in custom endpoint setup (#4218)
Custom endpoint setup (_model_flow_custom) now probes the server first and presents detected models instead of asking users to type blind: - Single model: auto-confirms with Y/n prompt - Multiple models: numbered list picker, or type a name - No models / probe failed: falls back to manual input Context length prompt also moved after model selection so the user sees the verified endpoint before being asked for details. All recent fixes preserved: config dict sync (#4172), api_key persistence (#4182), no save_env_value for URLs (#4165). Inspired by PR #4194 by sudoingX — re-implemented against current main. Co-authored-by: Xpress AI (Dip KD) <200180104+sudoingX@users.noreply.github.com>
This commit is contained in:
@@ -1242,22 +1242,10 @@ def _model_flow_custom(config):
|
|||||||
try:
|
try:
|
||||||
base_url = input(f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: ").strip()
|
base_url = input(f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: ").strip()
|
||||||
api_key = input(f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: ").strip()
|
api_key = input(f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: ").strip()
|
||||||
model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
|
|
||||||
context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip()
|
|
||||||
except (KeyboardInterrupt, EOFError):
|
except (KeyboardInterrupt, EOFError):
|
||||||
print("\nCancelled.")
|
print("\nCancelled.")
|
||||||
return
|
return
|
||||||
|
|
||||||
context_length = None
|
|
||||||
if context_length_str:
|
|
||||||
try:
|
|
||||||
context_length = int(context_length_str.replace(",", "").replace("k", "000").replace("K", "000"))
|
|
||||||
if context_length <= 0:
|
|
||||||
context_length = None
|
|
||||||
except ValueError:
|
|
||||||
print(f"Invalid context length: {context_length_str} — will auto-detect.")
|
|
||||||
context_length = None
|
|
||||||
|
|
||||||
if not base_url and not current_url:
|
if not base_url and not current_url:
|
||||||
print("No URL provided. Cancelled.")
|
print("No URL provided. Cancelled.")
|
||||||
return
|
return
|
||||||
@@ -1294,6 +1282,44 @@ def _model_flow_custom(config):
|
|||||||
if probe.get("suggested_base_url"):
|
if probe.get("suggested_base_url"):
|
||||||
print(f" If this server expects /v1, try base URL: {probe['suggested_base_url']}")
|
print(f" If this server expects /v1, try base URL: {probe['suggested_base_url']}")
|
||||||
|
|
||||||
|
# Select model — use probe results when available, fall back to manual input
|
||||||
|
model_name = ""
|
||||||
|
detected_models = probe.get("models") or []
|
||||||
|
try:
|
||||||
|
if len(detected_models) == 1:
|
||||||
|
print(f" Detected model: {detected_models[0]}")
|
||||||
|
confirm = input(" Use this model? [Y/n]: ").strip().lower()
|
||||||
|
if confirm in ("", "y", "yes"):
|
||||||
|
model_name = detected_models[0]
|
||||||
|
else:
|
||||||
|
model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
|
||||||
|
elif len(detected_models) > 1:
|
||||||
|
print(" Available models:")
|
||||||
|
for i, m in enumerate(detected_models, 1):
|
||||||
|
print(f" {i}. {m}")
|
||||||
|
pick = input(f" Select model [1-{len(detected_models)}] or type name: ").strip()
|
||||||
|
if pick.isdigit() and 1 <= int(pick) <= len(detected_models):
|
||||||
|
model_name = detected_models[int(pick) - 1]
|
||||||
|
elif pick:
|
||||||
|
model_name = pick
|
||||||
|
else:
|
||||||
|
model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
|
||||||
|
|
||||||
|
context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip()
|
||||||
|
except (KeyboardInterrupt, EOFError):
|
||||||
|
print("\nCancelled.")
|
||||||
|
return
|
||||||
|
|
||||||
|
context_length = None
|
||||||
|
if context_length_str:
|
||||||
|
try:
|
||||||
|
context_length = int(context_length_str.replace(",", "").replace("k", "000").replace("K", "000"))
|
||||||
|
if context_length <= 0:
|
||||||
|
context_length = None
|
||||||
|
except ValueError:
|
||||||
|
print(f"Invalid context length: {context_length_str} — will auto-detect.")
|
||||||
|
context_length = None
|
||||||
|
|
||||||
if model_name:
|
if model_name:
|
||||||
_save_model_choice(model_name)
|
_save_model_choice(model_name)
|
||||||
|
|
||||||
|
|||||||
@@ -460,13 +460,16 @@ def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys):
|
|||||||
)
|
)
|
||||||
monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None)
|
monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None)
|
||||||
|
|
||||||
answers = iter(["http://localhost:8000", "local-key", "llm", ""])
|
# After the probe detects a single model ("llm"), the flow asks
|
||||||
|
# "Use this model? [Y/n]:" — confirm with Enter, then context length.
|
||||||
|
answers = iter(["http://localhost:8000", "local-key", "", ""])
|
||||||
monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers))
|
monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers))
|
||||||
|
|
||||||
hermes_main._model_flow_custom({})
|
hermes_main._model_flow_custom({})
|
||||||
output = capsys.readouterr().out
|
output = capsys.readouterr().out
|
||||||
|
|
||||||
assert "Saving the working base URL instead" in output
|
assert "Saving the working base URL instead" in output
|
||||||
|
assert "Detected model: llm" in output
|
||||||
# OPENAI_BASE_URL is no longer saved to .env — config.yaml is authoritative
|
# OPENAI_BASE_URL is no longer saved to .env — config.yaml is authoritative
|
||||||
assert "OPENAI_BASE_URL" not in saved_env
|
assert "OPENAI_BASE_URL" not in saved_env
|
||||||
assert saved_env["MODEL"] == "llm"
|
assert saved_env["MODEL"] == "llm"
|
||||||
Reference in New Issue
Block a user