Merge pull request #2403 from NousResearch/hermes/hermes-31d7db3b
fix(model_metadata): use /v1/props endpoint for llama.cpp context detection
This commit is contained in:
@@ -260,9 +260,11 @@ def detect_local_server_type(base_url: str) -> Optional[str]:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
# llama.cpp exposes /props
|
||||
# llama.cpp exposes /v1/props (older builds used /props without the /v1 prefix)
|
||||
try:
|
||||
r = client.get(f"{server_url}/props")
|
||||
r = client.get(f"{server_url}/v1/props")
|
||||
if r.status_code != 200:
|
||||
r = client.get(f"{server_url}/props") # fallback for older builds
|
||||
if r.status_code == 200 and "default_generation_settings" in r.text:
|
||||
return "llamacpp"
|
||||
except Exception:
|
||||
@@ -455,8 +457,11 @@ def fetch_endpoint_model_metadata(
|
||||
)
|
||||
if is_llamacpp:
|
||||
try:
|
||||
props_url = candidate.rstrip("/").replace("/v1", "") + "/props"
|
||||
props_resp = requests.get(props_url, headers=headers, timeout=5)
|
||||
# Try /v1/props first (current llama.cpp); fall back to /props for older builds
|
||||
base = candidate.rstrip("/").replace("/v1", "")
|
||||
props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5)
|
||||
if not props_resp.ok:
|
||||
props_resp = requests.get(base + "/props", headers=headers, timeout=5)
|
||||
if props_resp.ok:
|
||||
props = props_resp.json()
|
||||
gen_settings = props.get("default_generation_settings", {})
|
||||
|
||||
Reference in New Issue
Block a user