Updated the logic for determining the probed_url in the probe_api_models function to use the first tried URL instead of the last. This change ensures that the most relevant URL is returned when probing for models. Additionally, improved the output message in the _model_flow_custom function to provide clearer guidance based on the suggested_base_url.
1722 lines
59 KiB
Python
1722 lines
59 KiB
Python
"""
|
|
Canonical model catalogs and lightweight validation helpers.
|
|
|
|
Add, remove, or reorder entries here — both `hermes setup` and
|
|
`hermes` provider-selection will pick up the change automatically.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import urllib.request
|
|
import urllib.error
|
|
from difflib import get_close_matches
|
|
from typing import Any, Optional
|
|
|
|
COPILOT_BASE_URL = "https://api.githubcopilot.com"
|
|
COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
|
|
COPILOT_EDITOR_VERSION = "vscode/1.104.1"
|
|
COPILOT_REASONING_EFFORTS_GPT5 = ["minimal", "low", "medium", "high"]
|
|
COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
|
|
|
|
# Backward-compatible aliases for the earlier GitHub Models-backed Copilot work.
|
|
GITHUB_MODELS_BASE_URL = COPILOT_BASE_URL
|
|
GITHUB_MODELS_CATALOG_URL = COPILOT_MODELS_URL
|
|
|
|
# (model_id, display description shown in menus)
|
|
OPENROUTER_MODELS: list[tuple[str, str]] = [
|
|
("anthropic/claude-opus-4.6", "recommended"),
|
|
("anthropic/claude-sonnet-4.6", ""),
|
|
("qwen/qwen3.6-plus:free", "free"),
|
|
("anthropic/claude-sonnet-4.5", ""),
|
|
("anthropic/claude-haiku-4.5", ""),
|
|
("openai/gpt-5.4", ""),
|
|
("openai/gpt-5.4-mini", ""),
|
|
("xiaomi/mimo-v2-pro", ""),
|
|
("openai/gpt-5.3-codex", ""),
|
|
("google/gemini-3-pro-preview", ""),
|
|
("google/gemini-3-flash-preview", ""),
|
|
("google/gemini-3.1-pro-preview", ""),
|
|
("google/gemini-3.1-flash-lite-preview", ""),
|
|
("qwen/qwen3.5-plus-02-15", ""),
|
|
("qwen/qwen3.5-35b-a3b", ""),
|
|
("stepfun/step-3.5-flash", ""),
|
|
("minimax/minimax-m2.7", ""),
|
|
("minimax/minimax-m2.5", ""),
|
|
("z-ai/glm-5.1", ""),
|
|
("z-ai/glm-5-turbo", ""),
|
|
("moonshotai/kimi-k2.5", ""),
|
|
("x-ai/grok-4.20-beta", ""),
|
|
("nvidia/nemotron-3-super-120b-a12b", ""),
|
|
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
|
|
("arcee-ai/trinity-large-preview:free", "free"),
|
|
("arcee-ai/trinity-large-thinking", ""),
|
|
("openai/gpt-5.4-pro", ""),
|
|
("openai/gpt-5.4-nano", ""),
|
|
]
|
|
|
|
_PROVIDER_MODELS: dict[str, list[str]] = {
|
|
"nous": [
|
|
"anthropic/claude-opus-4.6",
|
|
"anthropic/claude-sonnet-4.6",
|
|
"anthropic/claude-sonnet-4.5",
|
|
"anthropic/claude-haiku-4.5",
|
|
"openai/gpt-5.4",
|
|
"openai/gpt-5.4-mini",
|
|
"xiaomi/mimo-v2-pro",
|
|
"openai/gpt-5.3-codex",
|
|
"google/gemini-3-pro-preview",
|
|
"google/gemini-3-flash-preview",
|
|
"google/gemini-3.1-pro-preview",
|
|
"google/gemini-3.1-flash-lite-preview",
|
|
"qwen/qwen3.5-plus-02-15",
|
|
"qwen/qwen3.5-35b-a3b",
|
|
"stepfun/step-3.5-flash",
|
|
"minimax/minimax-m2.7",
|
|
"minimax/minimax-m2.5",
|
|
"z-ai/glm-5.1",
|
|
"z-ai/glm-5-turbo",
|
|
"moonshotai/kimi-k2.5",
|
|
"x-ai/grok-4.20-beta",
|
|
"nvidia/nemotron-3-super-120b-a12b",
|
|
"nvidia/nemotron-3-super-120b-a12b:free",
|
|
"arcee-ai/trinity-large-preview:free",
|
|
"arcee-ai/trinity-large-thinking",
|
|
"openai/gpt-5.4-pro",
|
|
"openai/gpt-5.4-nano",
|
|
],
|
|
"openai-codex": [
|
|
"gpt-5.3-codex",
|
|
"gpt-5.2-codex",
|
|
"gpt-5.1-codex-mini",
|
|
"gpt-5.1-codex-max",
|
|
],
|
|
"copilot-acp": [
|
|
"copilot-acp",
|
|
],
|
|
"copilot": [
|
|
"gpt-5.4",
|
|
"gpt-5.4-mini",
|
|
"gpt-5-mini",
|
|
"gpt-5.3-codex",
|
|
"gpt-5.2-codex",
|
|
"gpt-4.1",
|
|
"gpt-4o",
|
|
"gpt-4o-mini",
|
|
"claude-opus-4.6",
|
|
"claude-sonnet-4.6",
|
|
"claude-sonnet-4.5",
|
|
"claude-haiku-4.5",
|
|
"gemini-2.5-pro",
|
|
"grok-code-fast-1",
|
|
],
|
|
"gemini": [
|
|
"gemini-3.1-pro-preview",
|
|
"gemini-3-flash-preview",
|
|
"gemini-3.1-flash-lite-preview",
|
|
"gemini-2.5-pro",
|
|
"gemini-2.5-flash",
|
|
"gemini-2.5-flash-lite",
|
|
# Gemma open models (also served via AI Studio)
|
|
"gemma-4-31b-it",
|
|
"gemma-4-26b-it",
|
|
],
|
|
"zai": [
|
|
"glm-5",
|
|
"glm-5-turbo",
|
|
"glm-4.7",
|
|
"glm-4.5",
|
|
"glm-4.5-flash",
|
|
],
|
|
"kimi-coding": [
|
|
"kimi-for-coding",
|
|
"kimi-k2.5",
|
|
"kimi-k2-thinking",
|
|
"kimi-k2-thinking-turbo",
|
|
"kimi-k2-turbo-preview",
|
|
"kimi-k2-0905-preview",
|
|
],
|
|
"moonshot": [
|
|
"kimi-k2.5",
|
|
"kimi-k2-thinking",
|
|
"kimi-k2-turbo-preview",
|
|
"kimi-k2-0905-preview",
|
|
],
|
|
"minimax": [
|
|
"MiniMax-M1",
|
|
"MiniMax-M1-40k",
|
|
"MiniMax-M1-80k",
|
|
"MiniMax-M1-128k",
|
|
"MiniMax-M1-256k",
|
|
"MiniMax-M2.5",
|
|
"MiniMax-M2.7",
|
|
],
|
|
"minimax-cn": [
|
|
"MiniMax-M1",
|
|
"MiniMax-M1-40k",
|
|
"MiniMax-M1-80k",
|
|
"MiniMax-M1-128k",
|
|
"MiniMax-M1-256k",
|
|
"MiniMax-M2.5",
|
|
"MiniMax-M2.7",
|
|
],
|
|
"anthropic": [
|
|
"claude-opus-4-6",
|
|
"claude-sonnet-4-6",
|
|
"claude-opus-4-5-20251101",
|
|
"claude-sonnet-4-5-20250929",
|
|
"claude-opus-4-20250514",
|
|
"claude-sonnet-4-20250514",
|
|
"claude-haiku-4-5-20251001",
|
|
],
|
|
"deepseek": [
|
|
"deepseek-chat",
|
|
"deepseek-reasoner",
|
|
],
|
|
"opencode-zen": [
|
|
"gpt-5.4-pro",
|
|
"gpt-5.4",
|
|
"gpt-5.3-codex",
|
|
"gpt-5.3-codex-spark",
|
|
"gpt-5.2",
|
|
"gpt-5.2-codex",
|
|
"gpt-5.1",
|
|
"gpt-5.1-codex",
|
|
"gpt-5.1-codex-max",
|
|
"gpt-5.1-codex-mini",
|
|
"gpt-5",
|
|
"gpt-5-codex",
|
|
"gpt-5-nano",
|
|
"claude-opus-4-6",
|
|
"claude-opus-4-5",
|
|
"claude-opus-4-1",
|
|
"claude-sonnet-4-6",
|
|
"claude-sonnet-4-5",
|
|
"claude-sonnet-4",
|
|
"claude-haiku-4-5",
|
|
"claude-3-5-haiku",
|
|
"gemini-3.1-pro",
|
|
"gemini-3-pro",
|
|
"gemini-3-flash",
|
|
"minimax-m2.7",
|
|
"minimax-m2.5",
|
|
"minimax-m2.5-free",
|
|
"minimax-m2.1",
|
|
"glm-5",
|
|
"glm-4.7",
|
|
"glm-4.6",
|
|
"kimi-k2.5",
|
|
"kimi-k2-thinking",
|
|
"kimi-k2",
|
|
"qwen3-coder",
|
|
"big-pickle",
|
|
],
|
|
"opencode-go": [
|
|
"glm-5",
|
|
"kimi-k2.5",
|
|
"mimo-v2-pro",
|
|
"mimo-v2-omni",
|
|
"minimax-m2.7",
|
|
"minimax-m2.5",
|
|
],
|
|
"ai-gateway": [
|
|
"anthropic/claude-opus-4.6",
|
|
"anthropic/claude-sonnet-4.6",
|
|
"anthropic/claude-sonnet-4.5",
|
|
"anthropic/claude-haiku-4.5",
|
|
"openai/gpt-5",
|
|
"openai/gpt-4.1",
|
|
"openai/gpt-4.1-mini",
|
|
"google/gemini-3-pro-preview",
|
|
"google/gemini-3-flash",
|
|
"google/gemini-2.5-pro",
|
|
"google/gemini-2.5-flash",
|
|
"deepseek/deepseek-v3.2",
|
|
],
|
|
"kilocode": [
|
|
"anthropic/claude-opus-4.6",
|
|
"anthropic/claude-sonnet-4.6",
|
|
"openai/gpt-5.4",
|
|
"google/gemini-3-pro-preview",
|
|
"google/gemini-3-flash-preview",
|
|
],
|
|
# Alibaba DashScope Coding platform (coding-intl) — default endpoint.
|
|
# Supports Qwen models + third-party providers (GLM, Kimi, MiniMax).
|
|
# Users with classic DashScope keys should override DASHSCOPE_BASE_URL
|
|
# to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
|
|
# or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
|
|
"alibaba": [
|
|
"qwen3.5-plus",
|
|
"qwen3-coder-plus",
|
|
"qwen3-coder-next",
|
|
# Third-party models available on coding-intl
|
|
"glm-5",
|
|
"glm-4.7",
|
|
"kimi-k2.5",
|
|
"MiniMax-M2.5",
|
|
],
|
|
# Curated HF model list — only agentic models that map to OpenRouter defaults.
|
|
"huggingface": [
|
|
"Qwen/Qwen3.5-397B-A17B",
|
|
"Qwen/Qwen3.5-35B-A3B",
|
|
"deepseek-ai/DeepSeek-V3.2",
|
|
"moonshotai/Kimi-K2.5",
|
|
"MiniMaxAI/MiniMax-M2.5",
|
|
"zai-org/GLM-5",
|
|
"XiaomiMiMo/MiMo-V2-Flash",
|
|
"moonshotai/Kimi-K2-Thinking",
|
|
],
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Nous Portal free-model filtering
|
|
# ---------------------------------------------------------------------------
|
|
# Models that are ALLOWED to appear when priced as free on Nous Portal.
|
|
# Any other free model is hidden — prevents promotional/temporary free models
|
|
# from cluttering the selection when users are paying subscribers.
|
|
# Models in this list are ALSO filtered out if they are NOT free (i.e. they
|
|
# should only appear in the menu when they are genuinely free).
|
|
_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
|
|
"xiaomi/mimo-v2-pro",
|
|
"xiaomi/mimo-v2-omni",
|
|
})
|
|
|
|
|
|
def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
|
|
"""Return True if *model_id* has zero-cost prompt AND completion pricing."""
|
|
p = pricing.get(model_id)
|
|
if not p:
|
|
return False
|
|
try:
|
|
return float(p.get("prompt", "1")) == 0 and float(p.get("completion", "1")) == 0
|
|
except (TypeError, ValueError):
|
|
return False
|
|
|
|
|
|
def filter_nous_free_models(
|
|
model_ids: list[str],
|
|
pricing: dict[str, dict[str, str]],
|
|
) -> list[str]:
|
|
"""Filter the Nous Portal model list according to free-model policy.
|
|
|
|
Rules:
|
|
• Paid models that are NOT in the allowlist → keep (normal case).
|
|
• Free models that are NOT in the allowlist → drop.
|
|
• Allowlist models that ARE free → keep.
|
|
• Allowlist models that are NOT free → drop.
|
|
"""
|
|
if not pricing:
|
|
return model_ids # no pricing data — can't filter, show everything
|
|
|
|
result: list[str] = []
|
|
for mid in model_ids:
|
|
free = _is_model_free(mid, pricing)
|
|
if mid in _NOUS_ALLOWED_FREE_MODELS:
|
|
# Allowlist model: only show when it's actually free
|
|
if free:
|
|
result.append(mid)
|
|
else:
|
|
# Regular model: keep only when it's NOT free
|
|
if not free:
|
|
result.append(mid)
|
|
return result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Nous Portal account tier detection
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def fetch_nous_account_tier(access_token: str, portal_base_url: str = "") -> dict[str, Any]:
|
|
"""Fetch the user's Nous Portal account/subscription info.
|
|
|
|
Calls ``<portal>/api/oauth/account`` with the OAuth access token.
|
|
|
|
Returns the parsed JSON dict on success, e.g.::
|
|
|
|
{
|
|
"subscription": {
|
|
"plan": "Plus",
|
|
"tier": 2,
|
|
"monthly_charge": 20,
|
|
"credits_remaining": 1686.60,
|
|
...
|
|
},
|
|
...
|
|
}
|
|
|
|
Returns an empty dict on any failure (network, auth, parse).
|
|
"""
|
|
base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
|
|
url = f"{base}/api/oauth/account"
|
|
headers = {
|
|
"Authorization": f"Bearer {access_token}",
|
|
"Accept": "application/json",
|
|
}
|
|
try:
|
|
req = urllib.request.Request(url, headers=headers)
|
|
with urllib.request.urlopen(req, timeout=8) as resp:
|
|
return json.loads(resp.read().decode())
|
|
except Exception:
|
|
return {}
|
|
|
|
|
|
def is_nous_free_tier(account_info: dict[str, Any]) -> bool:
|
|
"""Return True if the account info indicates a free (unpaid) tier.
|
|
|
|
Checks ``subscription.monthly_charge == 0``. Returns False when
|
|
the field is missing or unparseable (assumes paid — don't block users).
|
|
"""
|
|
sub = account_info.get("subscription")
|
|
if not isinstance(sub, dict):
|
|
return False
|
|
charge = sub.get("monthly_charge")
|
|
if charge is None:
|
|
return False
|
|
try:
|
|
return float(charge) == 0
|
|
except (TypeError, ValueError):
|
|
return False
|
|
|
|
|
|
def partition_nous_models_by_tier(
|
|
model_ids: list[str],
|
|
pricing: dict[str, dict[str, str]],
|
|
free_tier: bool,
|
|
) -> tuple[list[str], list[str]]:
|
|
"""Split Nous models into (selectable, unavailable) based on user tier.
|
|
|
|
For paid-tier users: all models are selectable, none unavailable
|
|
(free-model filtering is handled separately by ``filter_nous_free_models``).
|
|
|
|
For free-tier users: only free models are selectable; paid models
|
|
are returned as unavailable (shown grayed out in the menu).
|
|
"""
|
|
if not free_tier:
|
|
return (model_ids, [])
|
|
|
|
if not pricing:
|
|
return (model_ids, []) # can't determine, show everything
|
|
|
|
selectable: list[str] = []
|
|
unavailable: list[str] = []
|
|
for mid in model_ids:
|
|
if _is_model_free(mid, pricing):
|
|
selectable.append(mid)
|
|
else:
|
|
unavailable.append(mid)
|
|
return (selectable, unavailable)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# TTL cache for free-tier detection — avoids repeated API calls within a
|
|
# session while still picking up upgrades quickly.
|
|
# ---------------------------------------------------------------------------
|
|
_FREE_TIER_CACHE_TTL: int = 180 # seconds (3 minutes)
|
|
_free_tier_cache: tuple[bool, float] | None = None # (result, timestamp)
|
|
|
|
|
|
def clear_nous_free_tier_cache() -> None:
|
|
"""Invalidate the cached free-tier result (e.g. after login/logout)."""
|
|
global _free_tier_cache
|
|
_free_tier_cache = None
|
|
|
|
|
|
def check_nous_free_tier() -> bool:
|
|
"""Check if the current Nous Portal user is on a free (unpaid) tier.
|
|
|
|
Results are cached for ``_FREE_TIER_CACHE_TTL`` seconds to avoid
|
|
hitting the Portal API on every call. The cache is short-lived so
|
|
that an account upgrade is reflected within a few minutes.
|
|
|
|
Returns False (assume paid) on any error — never blocks paying users.
|
|
"""
|
|
global _free_tier_cache
|
|
import time
|
|
|
|
now = time.monotonic()
|
|
if _free_tier_cache is not None:
|
|
cached_result, cached_at = _free_tier_cache
|
|
if now - cached_at < _FREE_TIER_CACHE_TTL:
|
|
return cached_result
|
|
|
|
try:
|
|
from hermes_cli.auth import get_provider_auth_state, resolve_nous_runtime_credentials
|
|
|
|
# Ensure we have a fresh token (triggers refresh if needed)
|
|
resolve_nous_runtime_credentials(min_key_ttl_seconds=60)
|
|
|
|
state = get_provider_auth_state("nous")
|
|
if not state:
|
|
_free_tier_cache = (False, now)
|
|
return False
|
|
access_token = state.get("access_token", "")
|
|
portal_url = state.get("portal_base_url", "")
|
|
if not access_token:
|
|
_free_tier_cache = (False, now)
|
|
return False
|
|
|
|
account_info = fetch_nous_account_tier(access_token, portal_url)
|
|
result = is_nous_free_tier(account_info)
|
|
_free_tier_cache = (result, now)
|
|
return result
|
|
except Exception:
|
|
_free_tier_cache = (False, now)
|
|
return False # default to paid on error — don't block users
|
|
|
|
|
|
_PROVIDER_LABELS = {
|
|
"openrouter": "OpenRouter",
|
|
"openai-codex": "OpenAI Codex",
|
|
"copilot-acp": "GitHub Copilot ACP",
|
|
"nous": "Nous Portal",
|
|
"copilot": "GitHub Copilot",
|
|
"gemini": "Google AI Studio",
|
|
"zai": "Z.AI / GLM",
|
|
"kimi-coding": "Kimi / Moonshot",
|
|
"minimax": "MiniMax",
|
|
"minimax-cn": "MiniMax (China)",
|
|
"anthropic": "Anthropic",
|
|
"deepseek": "DeepSeek",
|
|
"opencode-zen": "OpenCode Zen",
|
|
"opencode-go": "OpenCode Go",
|
|
"ai-gateway": "AI Gateway",
|
|
"kilocode": "Kilo Code",
|
|
"alibaba": "Alibaba Cloud (DashScope)",
|
|
"qwen-oauth": "Qwen OAuth (Portal)",
|
|
"huggingface": "Hugging Face",
|
|
"custom": "Custom endpoint",
|
|
}
|
|
|
|
_PROVIDER_ALIASES = {
|
|
"glm": "zai",
|
|
"z-ai": "zai",
|
|
"z.ai": "zai",
|
|
"zhipu": "zai",
|
|
"github": "copilot",
|
|
"github-copilot": "copilot",
|
|
"github-models": "copilot",
|
|
"github-model": "copilot",
|
|
"github-copilot-acp": "copilot-acp",
|
|
"copilot-acp-agent": "copilot-acp",
|
|
"google": "gemini",
|
|
"google-gemini": "gemini",
|
|
"google-ai-studio": "gemini",
|
|
"kimi": "kimi-coding",
|
|
"moonshot": "kimi-coding",
|
|
"minimax-china": "minimax-cn",
|
|
"minimax_cn": "minimax-cn",
|
|
"claude": "anthropic",
|
|
"claude-code": "anthropic",
|
|
"deep-seek": "deepseek",
|
|
"opencode": "opencode-zen",
|
|
"zen": "opencode-zen",
|
|
"go": "opencode-go",
|
|
"opencode-go-sub": "opencode-go",
|
|
"aigateway": "ai-gateway",
|
|
"vercel": "ai-gateway",
|
|
"vercel-ai-gateway": "ai-gateway",
|
|
"kilo": "kilocode",
|
|
"kilo-code": "kilocode",
|
|
"kilo-gateway": "kilocode",
|
|
"dashscope": "alibaba",
|
|
"aliyun": "alibaba",
|
|
"qwen": "alibaba",
|
|
"alibaba-cloud": "alibaba",
|
|
"qwen-portal": "qwen-oauth",
|
|
"hf": "huggingface",
|
|
"hugging-face": "huggingface",
|
|
"huggingface-hub": "huggingface",
|
|
}
|
|
|
|
|
|
def model_ids() -> list[str]:
|
|
"""Return just the OpenRouter model-id strings."""
|
|
return [mid for mid, _ in OPENROUTER_MODELS]
|
|
|
|
|
|
def menu_labels() -> list[str]:
|
|
"""Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
|
|
labels = []
|
|
for mid, desc in OPENROUTER_MODELS:
|
|
labels.append(f"{mid} ({desc})" if desc else mid)
|
|
return labels
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Pricing helpers — fetch live pricing from OpenRouter-compatible /v1/models
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Cache: maps model_id → {"prompt": str, "completion": str} per endpoint
|
|
_pricing_cache: dict[str, dict[str, dict[str, str]]] = {}
|
|
|
|
|
|
def _format_price_per_mtok(per_token_str: str) -> str:
|
|
"""Convert a per-token price string to a human-friendly $/Mtok string.
|
|
|
|
Always uses 2 decimal places so that prices align vertically when
|
|
right-justified in a column (the decimal point stays in the same position).
|
|
|
|
Examples:
|
|
"0.000003" → "$3.00" (per million tokens)
|
|
"0.00003" → "$30.00"
|
|
"0.00000015" → "$0.15"
|
|
"0.0000001" → "$0.10"
|
|
"0.00018" → "$180.00"
|
|
"0" → "free"
|
|
"""
|
|
try:
|
|
val = float(per_token_str)
|
|
except (TypeError, ValueError):
|
|
return "?"
|
|
if val == 0:
|
|
return "free"
|
|
per_m = val * 1_000_000
|
|
return f"${per_m:.2f}"
|
|
|
|
|
|
def format_pricing_label(pricing: dict[str, str] | None) -> str:
|
|
"""Build a compact pricing label like 'in $3 · out $15 · cache $0.30/Mtok'.
|
|
|
|
Returns empty string when pricing is unavailable.
|
|
"""
|
|
if not pricing:
|
|
return ""
|
|
prompt_price = pricing.get("prompt", "")
|
|
completion_price = pricing.get("completion", "")
|
|
if not prompt_price and not completion_price:
|
|
return ""
|
|
inp = _format_price_per_mtok(prompt_price)
|
|
out = _format_price_per_mtok(completion_price)
|
|
if inp == "free" and out == "free":
|
|
return "free"
|
|
cache_read = pricing.get("input_cache_read", "")
|
|
cache_str = _format_price_per_mtok(cache_read) if cache_read else ""
|
|
if inp == out and not cache_str:
|
|
return f"{inp}/Mtok"
|
|
parts = [f"in {inp}", f"out {out}"]
|
|
if cache_str and cache_str != "?" and cache_str != inp:
|
|
parts.append(f"cache {cache_str}")
|
|
return " · ".join(parts) + "/Mtok"
|
|
|
|
|
|
def format_model_pricing_table(
|
|
models: list[tuple[str, str]],
|
|
pricing_map: dict[str, dict[str, str]],
|
|
current_model: str = "",
|
|
indent: str = " ",
|
|
) -> list[str]:
|
|
"""Build a column-aligned model+pricing table for terminal display.
|
|
|
|
Returns a list of pre-formatted lines ready to print.
|
|
*models* is ``[(model_id, description), ...]``.
|
|
"""
|
|
if not models:
|
|
return []
|
|
|
|
# Build rows: (model_id, input_price, output_price, cache_price, is_current)
|
|
rows: list[tuple[str, str, str, str, bool]] = []
|
|
has_cache = False
|
|
for mid, _desc in models:
|
|
is_cur = mid == current_model
|
|
p = pricing_map.get(mid)
|
|
if p:
|
|
inp = _format_price_per_mtok(p.get("prompt", ""))
|
|
out = _format_price_per_mtok(p.get("completion", ""))
|
|
cache_read = p.get("input_cache_read", "")
|
|
cache = _format_price_per_mtok(cache_read) if cache_read else ""
|
|
if cache:
|
|
has_cache = True
|
|
else:
|
|
inp, out, cache = "", "", ""
|
|
rows.append((mid, inp, out, cache, is_cur))
|
|
|
|
name_col = max(len(r[0]) for r in rows) + 2
|
|
# Compute price column widths from the actual data so decimals align
|
|
price_col = max(
|
|
max((len(r[1]) for r in rows if r[1]), default=4),
|
|
max((len(r[2]) for r in rows if r[2]), default=4),
|
|
3, # minimum: "In" / "Out" header
|
|
)
|
|
cache_col = max(
|
|
max((len(r[3]) for r in rows if r[3]), default=4),
|
|
5, # minimum: "Cache" header
|
|
) if has_cache else 0
|
|
lines: list[str] = []
|
|
|
|
# Header
|
|
if has_cache:
|
|
lines.append(f"{indent}{'Model':<{name_col}} {'In':>{price_col}} {'Out':>{price_col}} {'Cache':>{cache_col}} /Mtok")
|
|
lines.append(f"{indent}{'-' * name_col} {'-' * price_col} {'-' * price_col} {'-' * cache_col}")
|
|
else:
|
|
lines.append(f"{indent}{'Model':<{name_col}} {'In':>{price_col}} {'Out':>{price_col}} /Mtok")
|
|
lines.append(f"{indent}{'-' * name_col} {'-' * price_col} {'-' * price_col}")
|
|
|
|
for mid, inp, out, cache, is_cur in rows:
|
|
marker = " ← current" if is_cur else ""
|
|
if has_cache:
|
|
lines.append(f"{indent}{mid:<{name_col}} {inp:>{price_col}} {out:>{price_col}} {cache:>{cache_col}}{marker}")
|
|
else:
|
|
lines.append(f"{indent}{mid:<{name_col}} {inp:>{price_col}} {out:>{price_col}}{marker}")
|
|
|
|
return lines
|
|
|
|
|
|
def fetch_models_with_pricing(
|
|
api_key: str | None = None,
|
|
base_url: str = "https://openrouter.ai/api",
|
|
timeout: float = 8.0,
|
|
*,
|
|
force_refresh: bool = False,
|
|
) -> dict[str, dict[str, str]]:
|
|
"""Fetch ``/v1/models`` and return ``{model_id: {prompt, completion}}`` pricing.
|
|
|
|
Results are cached per *base_url* so repeated calls are free.
|
|
Works with any OpenRouter-compatible endpoint (OpenRouter, Nous Portal).
|
|
"""
|
|
cache_key = (base_url or "").rstrip("/")
|
|
if not force_refresh and cache_key in _pricing_cache:
|
|
return _pricing_cache[cache_key]
|
|
|
|
url = cache_key.rstrip("/") + "/v1/models"
|
|
headers: dict[str, str] = {"Accept": "application/json"}
|
|
if api_key:
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
|
|
try:
|
|
req = urllib.request.Request(url, headers=headers)
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
payload = json.loads(resp.read().decode())
|
|
except Exception:
|
|
_pricing_cache[cache_key] = {}
|
|
return {}
|
|
|
|
result: dict[str, dict[str, str]] = {}
|
|
for item in payload.get("data", []):
|
|
mid = item.get("id")
|
|
pricing = item.get("pricing")
|
|
if mid and isinstance(pricing, dict):
|
|
entry: dict[str, str] = {
|
|
"prompt": str(pricing.get("prompt", "")),
|
|
"completion": str(pricing.get("completion", "")),
|
|
}
|
|
if pricing.get("input_cache_read"):
|
|
entry["input_cache_read"] = str(pricing["input_cache_read"])
|
|
if pricing.get("input_cache_write"):
|
|
entry["input_cache_write"] = str(pricing["input_cache_write"])
|
|
result[mid] = entry
|
|
|
|
_pricing_cache[cache_key] = result
|
|
return result
|
|
|
|
|
|
def _resolve_openrouter_api_key() -> str:
|
|
"""Best-effort OpenRouter API key for pricing fetch."""
|
|
return os.getenv("OPENROUTER_API_KEY", "").strip()
|
|
|
|
|
|
def _resolve_nous_pricing_credentials() -> tuple[str, str]:
|
|
"""Return ``(api_key, base_url)`` for Nous Portal pricing, or empty strings."""
|
|
try:
|
|
from hermes_cli.auth import resolve_nous_runtime_credentials
|
|
creds = resolve_nous_runtime_credentials()
|
|
if creds:
|
|
return (creds.get("api_key", ""), creds.get("base_url", ""))
|
|
except Exception:
|
|
pass
|
|
return ("", "")
|
|
|
|
|
|
def get_pricing_for_provider(provider: str) -> dict[str, dict[str, str]]:
|
|
"""Return live pricing for providers that support it (openrouter, nous)."""
|
|
normalized = normalize_provider(provider)
|
|
if normalized == "openrouter":
|
|
return fetch_models_with_pricing(
|
|
api_key=_resolve_openrouter_api_key(),
|
|
base_url="https://openrouter.ai/api",
|
|
)
|
|
if normalized == "nous":
|
|
api_key, base_url = _resolve_nous_pricing_credentials()
|
|
if base_url:
|
|
# Nous base_url typically looks like https://inference-api.nousresearch.com/v1
|
|
# We need the part before /v1 for our fetch function
|
|
stripped = base_url.rstrip("/")
|
|
if stripped.endswith("/v1"):
|
|
stripped = stripped[:-3]
|
|
return fetch_models_with_pricing(
|
|
api_key=api_key,
|
|
base_url=stripped,
|
|
)
|
|
return {}
|
|
|
|
|
|
# All provider IDs and aliases that are valid for the provider:model syntax.
|
|
_KNOWN_PROVIDER_NAMES: set[str] = (
|
|
set(_PROVIDER_LABELS.keys())
|
|
| set(_PROVIDER_ALIASES.keys())
|
|
| {"openrouter", "custom"}
|
|
)
|
|
|
|
|
|
def list_available_providers() -> list[dict[str, str]]:
|
|
"""Return info about all providers the user could use with ``provider:model``.
|
|
|
|
Each dict has ``id``, ``label``, and ``aliases``.
|
|
Checks which providers have valid credentials configured.
|
|
"""
|
|
# Canonical providers in display order
|
|
_PROVIDER_ORDER = [
|
|
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
|
"gemini", "huggingface",
|
|
"zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
|
|
"qwen-oauth",
|
|
"opencode-zen", "opencode-go",
|
|
"ai-gateway", "deepseek", "custom",
|
|
]
|
|
# Build reverse alias map
|
|
aliases_for: dict[str, list[str]] = {}
|
|
for alias, canonical in _PROVIDER_ALIASES.items():
|
|
aliases_for.setdefault(canonical, []).append(alias)
|
|
|
|
result = []
|
|
for pid in _PROVIDER_ORDER:
|
|
label = _PROVIDER_LABELS.get(pid, pid)
|
|
alias_list = aliases_for.get(pid, [])
|
|
# Check if this provider has credentials available
|
|
has_creds = False
|
|
try:
|
|
from hermes_cli.auth import get_auth_status, has_usable_secret
|
|
if pid == "custom":
|
|
custom_base_url = _get_custom_base_url() or ""
|
|
has_creds = bool(custom_base_url.strip())
|
|
elif pid == "openrouter":
|
|
has_creds = has_usable_secret(os.getenv("OPENROUTER_API_KEY", ""))
|
|
else:
|
|
status = get_auth_status(pid)
|
|
has_creds = bool(status.get("logged_in") or status.get("configured"))
|
|
except Exception:
|
|
pass
|
|
result.append({
|
|
"id": pid,
|
|
"label": label,
|
|
"aliases": alias_list,
|
|
"authenticated": has_creds,
|
|
})
|
|
return result
|
|
|
|
|
|
def parse_model_input(raw: str, current_provider: str) -> tuple[str, str]:
|
|
"""Parse ``/model`` input into ``(provider, model)``.
|
|
|
|
Supports ``provider:model`` syntax to switch providers at runtime::
|
|
|
|
openrouter:anthropic/claude-sonnet-4.5 → ("openrouter", "anthropic/claude-sonnet-4.5")
|
|
nous:hermes-3 → ("nous", "hermes-3")
|
|
anthropic/claude-sonnet-4.5 → (current_provider, "anthropic/claude-sonnet-4.5")
|
|
gpt-5.4 → (current_provider, "gpt-5.4")
|
|
|
|
The colon is only treated as a provider delimiter if the left side is a
|
|
recognized provider name or alias. This avoids misinterpreting model names
|
|
that happen to contain colons (e.g. ``anthropic/claude-3.5-sonnet:beta``).
|
|
|
|
Returns ``(provider, model)`` where *provider* is either the explicit
|
|
provider from the input or *current_provider* if none was specified.
|
|
"""
|
|
stripped = raw.strip()
|
|
colon = stripped.find(":")
|
|
if colon > 0:
|
|
provider_part = stripped[:colon].strip().lower()
|
|
model_part = stripped[colon + 1:].strip()
|
|
if provider_part and model_part and provider_part in _KNOWN_PROVIDER_NAMES:
|
|
# Support custom:name:model triple syntax for named custom
|
|
# providers. ``custom:local:qwen`` → ("custom:local", "qwen").
|
|
# Single colon ``custom:qwen`` → ("custom", "qwen") as before.
|
|
if provider_part == "custom" and ":" in model_part:
|
|
second_colon = model_part.find(":")
|
|
custom_name = model_part[:second_colon].strip()
|
|
actual_model = model_part[second_colon + 1:].strip()
|
|
if custom_name and actual_model:
|
|
return (f"custom:{custom_name}", actual_model)
|
|
return (normalize_provider(provider_part), model_part)
|
|
return (current_provider, stripped)
|
|
|
|
|
|
def _get_custom_base_url() -> str:
|
|
"""Get the custom endpoint base_url from config.yaml."""
|
|
try:
|
|
from hermes_cli.config import load_config
|
|
config = load_config()
|
|
model_cfg = config.get("model", {})
|
|
if isinstance(model_cfg, dict):
|
|
return str(model_cfg.get("base_url", "")).strip()
|
|
except Exception:
|
|
pass
|
|
return ""
|
|
|
|
|
|
def curated_models_for_provider(provider: Optional[str]) -> list[tuple[str, str]]:
|
|
"""Return ``(model_id, description)`` tuples for a provider's model list.
|
|
|
|
Tries to fetch the live model list from the provider's API first,
|
|
falling back to the static ``_PROVIDER_MODELS`` catalog if the API
|
|
is unreachable.
|
|
"""
|
|
normalized = normalize_provider(provider)
|
|
if normalized == "openrouter":
|
|
return list(OPENROUTER_MODELS)
|
|
|
|
# Try live API first (Codex, Nous, etc. all support /models)
|
|
live = provider_model_ids(normalized)
|
|
if live:
|
|
return [(m, "") for m in live]
|
|
|
|
# Fallback to static catalog
|
|
models = _PROVIDER_MODELS.get(normalized, [])
|
|
return [(m, "") for m in models]
|
|
|
|
|
|
def detect_provider_for_model(
|
|
model_name: str,
|
|
current_provider: str,
|
|
) -> Optional[tuple[str, str]]:
|
|
"""Auto-detect the best provider for a model name.
|
|
|
|
Returns ``(provider_id, model_name)`` — the model name may be remapped
|
|
(e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
|
|
Returns ``None`` when no confident match is found.
|
|
|
|
Priority:
|
|
0. Bare provider name → switch to that provider's default model
|
|
1. Direct provider with credentials (highest)
|
|
2. Direct provider without credentials → remap to OpenRouter slug
|
|
3. OpenRouter catalog match
|
|
"""
|
|
name = (model_name or "").strip()
|
|
if not name:
|
|
return None
|
|
|
|
name_lower = name.lower()
|
|
|
|
# --- Step 0: bare provider name typed as model ---
|
|
# If someone types `/model nous` or `/model anthropic`, treat it as a
|
|
# provider switch and pick the first model from that provider's catalog.
|
|
# Skip "custom" and "openrouter" — custom has no model catalog, and
|
|
# openrouter requires an explicit model name to be useful.
|
|
resolved_provider = _PROVIDER_ALIASES.get(name_lower, name_lower)
|
|
if resolved_provider not in {"custom", "openrouter"}:
|
|
default_models = _PROVIDER_MODELS.get(resolved_provider, [])
|
|
if (
|
|
resolved_provider in _PROVIDER_LABELS
|
|
and default_models
|
|
and resolved_provider != normalize_provider(current_provider)
|
|
):
|
|
return (resolved_provider, default_models[0])
|
|
|
|
# Aggregators list other providers' models — never auto-switch TO them
|
|
_AGGREGATORS = {"nous", "openrouter"}
|
|
|
|
# If the model belongs to the current provider's catalog, don't suggest switching
|
|
current_models = _PROVIDER_MODELS.get(current_provider, [])
|
|
if any(name_lower == m.lower() for m in current_models):
|
|
return None
|
|
|
|
# --- Step 1: check static provider catalogs for a direct match ---
|
|
direct_match: Optional[str] = None
|
|
for pid, models in _PROVIDER_MODELS.items():
|
|
if pid == current_provider or pid in _AGGREGATORS:
|
|
continue
|
|
if any(name_lower == m.lower() for m in models):
|
|
direct_match = pid
|
|
break
|
|
|
|
if direct_match:
|
|
# Check if we have credentials for this provider
|
|
has_creds = False
|
|
try:
|
|
from hermes_cli.auth import PROVIDER_REGISTRY
|
|
pconfig = PROVIDER_REGISTRY.get(direct_match)
|
|
if pconfig:
|
|
import os
|
|
for env_var in pconfig.api_key_env_vars:
|
|
if os.getenv(env_var, "").strip():
|
|
has_creds = True
|
|
break
|
|
except Exception:
|
|
pass
|
|
|
|
if has_creds:
|
|
return (direct_match, name)
|
|
|
|
# No direct creds — try to find this model on OpenRouter instead
|
|
or_slug = _find_openrouter_slug(name)
|
|
if or_slug:
|
|
return ("openrouter", or_slug)
|
|
# Still return the direct provider — credential resolution will
|
|
# give a clear error rather than silently using the wrong provider
|
|
return (direct_match, name)
|
|
|
|
# --- Step 2: check OpenRouter catalog ---
|
|
# First try exact match (handles provider/model format)
|
|
or_slug = _find_openrouter_slug(name)
|
|
if or_slug:
|
|
if current_provider != "openrouter":
|
|
return ("openrouter", or_slug)
|
|
# Already on openrouter, just return the resolved slug
|
|
if or_slug != name:
|
|
return ("openrouter", or_slug)
|
|
return None # already on openrouter with matching name
|
|
|
|
return None
|
|
|
|
|
|
def _find_openrouter_slug(model_name: str) -> Optional[str]:
|
|
"""Find the full OpenRouter model slug for a bare or partial model name.
|
|
|
|
Handles:
|
|
- Exact match: ``anthropic/claude-opus-4.6`` → as-is
|
|
- Bare name: ``deepseek-chat`` → ``deepseek/deepseek-chat``
|
|
- Bare name: ``claude-opus-4.6`` → ``anthropic/claude-opus-4.6``
|
|
"""
|
|
name_lower = model_name.strip().lower()
|
|
if not name_lower:
|
|
return None
|
|
|
|
# Exact match (already has provider/ prefix)
|
|
for mid, _ in OPENROUTER_MODELS:
|
|
if name_lower == mid.lower():
|
|
return mid
|
|
|
|
# Try matching just the model part (after the /)
|
|
for mid, _ in OPENROUTER_MODELS:
|
|
if "/" in mid:
|
|
_, model_part = mid.split("/", 1)
|
|
if name_lower == model_part.lower():
|
|
return mid
|
|
|
|
return None
|
|
|
|
|
|
def normalize_provider(provider: Optional[str]) -> str:
|
|
"""Normalize provider aliases to Hermes' canonical provider ids.
|
|
|
|
Note: ``"auto"`` passes through unchanged — use
|
|
``hermes_cli.auth.resolve_provider()`` to resolve it to a concrete
|
|
provider based on credentials and environment.
|
|
"""
|
|
normalized = (provider or "openrouter").strip().lower()
|
|
return _PROVIDER_ALIASES.get(normalized, normalized)
|
|
|
|
|
|
def provider_label(provider: Optional[str]) -> str:
|
|
"""Return a human-friendly label for a provider id or alias."""
|
|
original = (provider or "openrouter").strip()
|
|
normalized = original.lower()
|
|
if normalized == "auto":
|
|
return "Auto"
|
|
normalized = normalize_provider(normalized)
|
|
return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")
|
|
|
|
|
|
def _resolve_copilot_catalog_api_key() -> str:
|
|
"""Best-effort GitHub token for fetching the Copilot model catalog."""
|
|
try:
|
|
from hermes_cli.auth import resolve_api_key_provider_credentials
|
|
|
|
creds = resolve_api_key_provider_credentials("copilot")
|
|
return str(creds.get("api_key") or "").strip()
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def provider_model_ids(provider: Optional[str]) -> list[str]:
|
|
"""Return the best known model catalog for a provider.
|
|
|
|
Tries live API endpoints for providers that support them (Codex, Nous),
|
|
falling back to static lists.
|
|
"""
|
|
normalized = normalize_provider(provider)
|
|
if normalized == "openrouter":
|
|
return model_ids()
|
|
if normalized == "openai-codex":
|
|
from hermes_cli.codex_models import get_codex_model_ids
|
|
|
|
return get_codex_model_ids()
|
|
if normalized in {"copilot", "copilot-acp"}:
|
|
try:
|
|
live = _fetch_github_models(_resolve_copilot_catalog_api_key())
|
|
if live:
|
|
return live
|
|
except Exception:
|
|
pass
|
|
if normalized == "copilot-acp":
|
|
return list(_PROVIDER_MODELS.get("copilot", []))
|
|
if normalized == "nous":
|
|
# Try live Nous Portal /models endpoint
|
|
try:
|
|
from hermes_cli.auth import fetch_nous_models, resolve_nous_runtime_credentials
|
|
creds = resolve_nous_runtime_credentials()
|
|
if creds:
|
|
live = fetch_nous_models(api_key=creds.get("api_key", ""), inference_base_url=creds.get("base_url", ""))
|
|
if live:
|
|
return live
|
|
except Exception:
|
|
pass
|
|
if normalized == "anthropic":
|
|
live = _fetch_anthropic_models()
|
|
if live:
|
|
return live
|
|
if normalized == "ai-gateway":
|
|
live = _fetch_ai_gateway_models()
|
|
if live:
|
|
return live
|
|
if normalized == "custom":
|
|
base_url = _get_custom_base_url()
|
|
if base_url:
|
|
# Try common API key env vars for custom endpoints
|
|
api_key = (
|
|
os.getenv("CUSTOM_API_KEY", "")
|
|
or os.getenv("OPENAI_API_KEY", "")
|
|
or os.getenv("OPENROUTER_API_KEY", "")
|
|
)
|
|
live = fetch_api_models(api_key, base_url)
|
|
if live:
|
|
return live
|
|
return list(_PROVIDER_MODELS.get(normalized, []))
|
|
|
|
|
|
def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
|
|
"""Fetch available models from the Anthropic /v1/models endpoint.
|
|
|
|
Uses resolve_anthropic_token() to find credentials (env vars or
|
|
Claude Code auto-discovery). Returns sorted model IDs or None.
|
|
"""
|
|
try:
|
|
from agent.anthropic_adapter import resolve_anthropic_token, _is_oauth_token
|
|
except ImportError:
|
|
return None
|
|
|
|
token = resolve_anthropic_token()
|
|
if not token:
|
|
return None
|
|
|
|
headers: dict[str, str] = {"anthropic-version": "2023-06-01"}
|
|
if _is_oauth_token(token):
|
|
headers["Authorization"] = f"Bearer {token}"
|
|
from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS
|
|
headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
|
|
else:
|
|
headers["x-api-key"] = token
|
|
|
|
req = urllib.request.Request(
|
|
"https://api.anthropic.com/v1/models",
|
|
headers=headers,
|
|
)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
data = json.loads(resp.read().decode())
|
|
models = [m["id"] for m in data.get("data", []) if m.get("id")]
|
|
# Sort: latest/largest first (opus > sonnet > haiku, higher version first)
|
|
return sorted(models, key=lambda m: (
|
|
"opus" not in m, # opus first
|
|
"sonnet" not in m, # then sonnet
|
|
"haiku" not in m, # then haiku
|
|
m, # alphabetical within tier
|
|
))
|
|
except Exception as e:
|
|
import logging
|
|
logging.getLogger(__name__).debug("Failed to fetch Anthropic models: %s", e)
|
|
return None
|
|
|
|
|
|
def _payload_items(payload: Any) -> list[dict[str, Any]]:
|
|
if isinstance(payload, list):
|
|
return [item for item in payload if isinstance(item, dict)]
|
|
if isinstance(payload, dict):
|
|
data = payload.get("data", [])
|
|
if isinstance(data, list):
|
|
return [item for item in data if isinstance(item, dict)]
|
|
return []
|
|
|
|
|
|
def copilot_default_headers() -> dict[str, str]:
|
|
"""Standard headers for Copilot API requests.
|
|
|
|
Includes Openai-Intent and x-initiator headers that opencode and the
|
|
Copilot CLI send on every request.
|
|
"""
|
|
try:
|
|
from hermes_cli.copilot_auth import copilot_request_headers
|
|
return copilot_request_headers(is_agent_turn=True)
|
|
except ImportError:
|
|
return {
|
|
"Editor-Version": COPILOT_EDITOR_VERSION,
|
|
"User-Agent": "HermesAgent/1.0",
|
|
"Openai-Intent": "conversation-edits",
|
|
"x-initiator": "agent",
|
|
}
|
|
|
|
|
|
def _copilot_catalog_item_is_text_model(item: dict[str, Any]) -> bool:
|
|
model_id = str(item.get("id") or "").strip()
|
|
if not model_id:
|
|
return False
|
|
|
|
if item.get("model_picker_enabled") is False:
|
|
return False
|
|
|
|
capabilities = item.get("capabilities")
|
|
if isinstance(capabilities, dict):
|
|
model_type = str(capabilities.get("type") or "").strip().lower()
|
|
if model_type and model_type != "chat":
|
|
return False
|
|
|
|
supported_endpoints = item.get("supported_endpoints")
|
|
if isinstance(supported_endpoints, list):
|
|
normalized_endpoints = {
|
|
str(endpoint).strip()
|
|
for endpoint in supported_endpoints
|
|
if str(endpoint).strip()
|
|
}
|
|
if normalized_endpoints and not normalized_endpoints.intersection(
|
|
{"/chat/completions", "/responses", "/v1/messages"}
|
|
):
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def fetch_github_model_catalog(
|
|
api_key: Optional[str] = None, timeout: float = 5.0
|
|
) -> Optional[list[dict[str, Any]]]:
|
|
"""Fetch the live GitHub Copilot model catalog for this account."""
|
|
attempts: list[dict[str, str]] = []
|
|
if api_key:
|
|
attempts.append({
|
|
**copilot_default_headers(),
|
|
"Authorization": f"Bearer {api_key}",
|
|
})
|
|
attempts.append(copilot_default_headers())
|
|
|
|
for headers in attempts:
|
|
req = urllib.request.Request(COPILOT_MODELS_URL, headers=headers)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
data = json.loads(resp.read().decode())
|
|
items = _payload_items(data)
|
|
models: list[dict[str, Any]] = []
|
|
seen_ids: set[str] = set()
|
|
for item in items:
|
|
if not _copilot_catalog_item_is_text_model(item):
|
|
continue
|
|
model_id = str(item.get("id") or "").strip()
|
|
if not model_id or model_id in seen_ids:
|
|
continue
|
|
seen_ids.add(model_id)
|
|
models.append(item)
|
|
if models:
|
|
return models
|
|
except Exception:
|
|
continue
|
|
return None
|
|
|
|
|
|
def _is_github_models_base_url(base_url: Optional[str]) -> bool:
|
|
normalized = (base_url or "").strip().rstrip("/").lower()
|
|
return (
|
|
normalized.startswith(COPILOT_BASE_URL)
|
|
or normalized.startswith("https://models.github.ai/inference")
|
|
)
|
|
|
|
|
|
def _fetch_github_models(api_key: Optional[str] = None, timeout: float = 5.0) -> Optional[list[str]]:
|
|
catalog = fetch_github_model_catalog(api_key=api_key, timeout=timeout)
|
|
if not catalog:
|
|
return None
|
|
return [item.get("id", "") for item in catalog if item.get("id")]
|
|
|
|
|
|
_COPILOT_MODEL_ALIASES = {
|
|
"openai/gpt-5": "gpt-5-mini",
|
|
"openai/gpt-5-chat": "gpt-5-mini",
|
|
"openai/gpt-5-mini": "gpt-5-mini",
|
|
"openai/gpt-5-nano": "gpt-5-mini",
|
|
"openai/gpt-4.1": "gpt-4.1",
|
|
"openai/gpt-4.1-mini": "gpt-4.1",
|
|
"openai/gpt-4.1-nano": "gpt-4.1",
|
|
"openai/gpt-4o": "gpt-4o",
|
|
"openai/gpt-4o-mini": "gpt-4o-mini",
|
|
"openai/o1": "gpt-5.2",
|
|
"openai/o1-mini": "gpt-5-mini",
|
|
"openai/o1-preview": "gpt-5.2",
|
|
"openai/o3": "gpt-5.3-codex",
|
|
"openai/o3-mini": "gpt-5-mini",
|
|
"openai/o4-mini": "gpt-5-mini",
|
|
"anthropic/claude-opus-4.6": "claude-opus-4.6",
|
|
"anthropic/claude-sonnet-4.6": "claude-sonnet-4.6",
|
|
"anthropic/claude-sonnet-4.5": "claude-sonnet-4.5",
|
|
"anthropic/claude-haiku-4.5": "claude-haiku-4.5",
|
|
}
|
|
|
|
|
|
def _copilot_catalog_ids(
|
|
catalog: Optional[list[dict[str, Any]]] = None,
|
|
api_key: Optional[str] = None,
|
|
) -> set[str]:
|
|
if catalog is None and api_key:
|
|
catalog = fetch_github_model_catalog(api_key=api_key)
|
|
if not catalog:
|
|
return set()
|
|
return {
|
|
str(item.get("id") or "").strip()
|
|
for item in catalog
|
|
if str(item.get("id") or "").strip()
|
|
}
|
|
|
|
|
|
def normalize_copilot_model_id(
|
|
model_id: Optional[str],
|
|
*,
|
|
catalog: Optional[list[dict[str, Any]]] = None,
|
|
api_key: Optional[str] = None,
|
|
) -> str:
|
|
raw = str(model_id or "").strip()
|
|
if not raw:
|
|
return ""
|
|
|
|
catalog_ids = _copilot_catalog_ids(catalog=catalog, api_key=api_key)
|
|
alias = _COPILOT_MODEL_ALIASES.get(raw)
|
|
if alias:
|
|
return alias
|
|
|
|
candidates = [raw]
|
|
if "/" in raw:
|
|
candidates.append(raw.split("/", 1)[1].strip())
|
|
|
|
if raw.endswith("-mini"):
|
|
candidates.append(raw[:-5])
|
|
if raw.endswith("-nano"):
|
|
candidates.append(raw[:-5])
|
|
if raw.endswith("-chat"):
|
|
candidates.append(raw[:-5])
|
|
|
|
seen: set[str] = set()
|
|
for candidate in candidates:
|
|
if not candidate or candidate in seen:
|
|
continue
|
|
seen.add(candidate)
|
|
if candidate in _COPILOT_MODEL_ALIASES:
|
|
return _COPILOT_MODEL_ALIASES[candidate]
|
|
if candidate in catalog_ids:
|
|
return candidate
|
|
|
|
if "/" in raw:
|
|
return raw.split("/", 1)[1].strip()
|
|
return raw
|
|
|
|
|
|
def _github_reasoning_efforts_for_model_id(model_id: str) -> list[str]:
|
|
raw = (model_id or "").strip().lower()
|
|
if raw.startswith(("openai/o1", "openai/o3", "openai/o4", "o1", "o3", "o4")):
|
|
return list(COPILOT_REASONING_EFFORTS_O_SERIES)
|
|
normalized = normalize_copilot_model_id(model_id).lower()
|
|
if normalized.startswith("gpt-5"):
|
|
return list(COPILOT_REASONING_EFFORTS_GPT5)
|
|
return []
|
|
|
|
|
|
def _should_use_copilot_responses_api(model_id: str) -> bool:
|
|
"""Decide whether a Copilot model should use the Responses API.
|
|
|
|
Replicates opencode's ``shouldUseCopilotResponsesApi`` logic:
|
|
GPT-5+ models use Responses API, except ``gpt-5-mini`` which uses
|
|
Chat Completions. All non-GPT models (Claude, Gemini, etc.) use
|
|
Chat Completions.
|
|
"""
|
|
import re
|
|
|
|
match = re.match(r"^gpt-(\d+)", model_id)
|
|
if not match:
|
|
return False
|
|
major = int(match.group(1))
|
|
return major >= 5 and not model_id.startswith("gpt-5-mini")
|
|
|
|
|
|
def copilot_model_api_mode(
|
|
model_id: Optional[str],
|
|
*,
|
|
catalog: Optional[list[dict[str, Any]]] = None,
|
|
api_key: Optional[str] = None,
|
|
) -> str:
|
|
"""Determine the API mode for a Copilot model.
|
|
|
|
Uses the model ID pattern (matching opencode's approach) as the
|
|
primary signal. Falls back to the catalog's ``supported_endpoints``
|
|
only for models not covered by the pattern check.
|
|
"""
|
|
normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key)
|
|
if not normalized:
|
|
return "chat_completions"
|
|
|
|
# Primary: model ID pattern (matches opencode's shouldUseCopilotResponsesApi)
|
|
if _should_use_copilot_responses_api(normalized):
|
|
return "codex_responses"
|
|
|
|
# Secondary: check catalog for non-GPT-5 models (Claude via /v1/messages, etc.)
|
|
if catalog is None and api_key:
|
|
catalog = fetch_github_model_catalog(api_key=api_key)
|
|
|
|
if catalog:
|
|
catalog_entry = next((item for item in catalog if item.get("id") == normalized), None)
|
|
if isinstance(catalog_entry, dict):
|
|
supported_endpoints = {
|
|
str(endpoint).strip()
|
|
for endpoint in (catalog_entry.get("supported_endpoints") or [])
|
|
if str(endpoint).strip()
|
|
}
|
|
# For non-GPT-5 models, check if they only support messages API
|
|
if "/v1/messages" in supported_endpoints and "/chat/completions" not in supported_endpoints:
|
|
return "anthropic_messages"
|
|
|
|
return "chat_completions"
|
|
|
|
|
|
def normalize_opencode_model_id(provider_id: Optional[str], model_id: Optional[str]) -> str:
|
|
"""Normalize OpenCode config IDs to the bare model slug used in API requests."""
|
|
provider = normalize_provider(provider_id)
|
|
current = str(model_id or "").strip()
|
|
if not current or provider not in {"opencode-zen", "opencode-go"}:
|
|
return current
|
|
|
|
prefix = f"{provider}/"
|
|
if current.lower().startswith(prefix):
|
|
return current[len(prefix):]
|
|
return current
|
|
|
|
|
|
def opencode_model_api_mode(provider_id: Optional[str], model_id: Optional[str]) -> str:
|
|
"""Determine the API mode for an OpenCode Zen / Go model.
|
|
|
|
OpenCode routes different models behind different API surfaces:
|
|
|
|
- GPT-5 / Codex models on Zen use ``/v1/responses``
|
|
- Claude models on Zen use ``/v1/messages``
|
|
- MiniMax models on Go use ``/v1/messages``
|
|
- GLM / Kimi on Go use ``/v1/chat/completions``
|
|
- Other Zen models (Gemini, GLM, Kimi, MiniMax, Qwen, etc.) use
|
|
``/v1/chat/completions``
|
|
|
|
This follows the published OpenCode docs for Zen and Go endpoints.
|
|
"""
|
|
provider = normalize_provider(provider_id)
|
|
normalized = normalize_opencode_model_id(provider_id, model_id).lower()
|
|
if not normalized:
|
|
return "chat_completions"
|
|
|
|
if provider == "opencode-go":
|
|
if normalized.startswith("minimax-"):
|
|
return "anthropic_messages"
|
|
return "chat_completions"
|
|
|
|
if provider == "opencode-zen":
|
|
if normalized.startswith("claude-"):
|
|
return "anthropic_messages"
|
|
if normalized.startswith("gpt-"):
|
|
return "codex_responses"
|
|
return "chat_completions"
|
|
|
|
return "chat_completions"
|
|
|
|
|
|
def github_model_reasoning_efforts(
|
|
model_id: Optional[str],
|
|
*,
|
|
catalog: Optional[list[dict[str, Any]]] = None,
|
|
api_key: Optional[str] = None,
|
|
) -> list[str]:
|
|
"""Return supported reasoning-effort levels for a Copilot-visible model."""
|
|
normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key)
|
|
if not normalized:
|
|
return []
|
|
|
|
catalog_entry = None
|
|
if catalog is not None:
|
|
catalog_entry = next((item for item in catalog if item.get("id") == normalized), None)
|
|
elif api_key:
|
|
fetched_catalog = fetch_github_model_catalog(api_key=api_key)
|
|
if fetched_catalog:
|
|
catalog_entry = next((item for item in fetched_catalog if item.get("id") == normalized), None)
|
|
|
|
if catalog_entry is not None:
|
|
capabilities = catalog_entry.get("capabilities")
|
|
if isinstance(capabilities, dict):
|
|
supports = capabilities.get("supports")
|
|
if isinstance(supports, dict):
|
|
efforts = supports.get("reasoning_effort")
|
|
if isinstance(efforts, list):
|
|
normalized_efforts = [
|
|
str(effort).strip().lower()
|
|
for effort in efforts
|
|
if str(effort).strip()
|
|
]
|
|
return list(dict.fromkeys(normalized_efforts))
|
|
return []
|
|
legacy_capabilities = {
|
|
str(capability).strip().lower()
|
|
for capability in catalog_entry.get("capabilities", [])
|
|
if str(capability).strip()
|
|
}
|
|
if "reasoning" not in legacy_capabilities:
|
|
return []
|
|
|
|
return _github_reasoning_efforts_for_model_id(str(model_id or normalized))
|
|
|
|
|
|
def probe_api_models(
|
|
api_key: Optional[str],
|
|
base_url: Optional[str],
|
|
timeout: float = 5.0,
|
|
) -> dict[str, Any]:
|
|
"""Probe an OpenAI-compatible ``/models`` endpoint with light URL heuristics."""
|
|
normalized = (base_url or "").strip().rstrip("/")
|
|
if not normalized:
|
|
return {
|
|
"models": None,
|
|
"probed_url": None,
|
|
"resolved_base_url": "",
|
|
"suggested_base_url": None,
|
|
"used_fallback": False,
|
|
}
|
|
|
|
if _is_github_models_base_url(normalized):
|
|
models = _fetch_github_models(api_key=api_key, timeout=timeout)
|
|
return {
|
|
"models": models,
|
|
"probed_url": COPILOT_MODELS_URL,
|
|
"resolved_base_url": COPILOT_BASE_URL,
|
|
"suggested_base_url": None,
|
|
"used_fallback": False,
|
|
}
|
|
|
|
if normalized.endswith("/v1"):
|
|
alternate_base = normalized[:-3].rstrip("/")
|
|
else:
|
|
alternate_base = normalized + "/v1"
|
|
|
|
candidates: list[tuple[str, bool]] = [(normalized, False)]
|
|
if alternate_base and alternate_base != normalized:
|
|
candidates.append((alternate_base, True))
|
|
|
|
tried: list[str] = []
|
|
headers: dict[str, str] = {}
|
|
if api_key:
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
if normalized.startswith(COPILOT_BASE_URL):
|
|
headers.update(copilot_default_headers())
|
|
|
|
for candidate_base, is_fallback in candidates:
|
|
url = candidate_base.rstrip("/") + "/models"
|
|
tried.append(url)
|
|
req = urllib.request.Request(url, headers=headers)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
data = json.loads(resp.read().decode())
|
|
return {
|
|
"models": [m.get("id", "") for m in data.get("data", [])],
|
|
"probed_url": url,
|
|
"resolved_base_url": candidate_base.rstrip("/"),
|
|
"suggested_base_url": alternate_base if alternate_base != candidate_base else normalized,
|
|
"used_fallback": is_fallback,
|
|
}
|
|
except Exception:
|
|
continue
|
|
|
|
return {
|
|
"models": None,
|
|
"probed_url": tried[0] if tried else normalized.rstrip("/") + "/models",
|
|
"resolved_base_url": normalized,
|
|
"suggested_base_url": alternate_base if alternate_base != normalized else None,
|
|
"used_fallback": False,
|
|
}
|
|
|
|
|
|
def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]:
|
|
"""Fetch available language models with tool-use from AI Gateway."""
|
|
api_key = os.getenv("AI_GATEWAY_API_KEY", "").strip()
|
|
if not api_key:
|
|
return None
|
|
base_url = os.getenv("AI_GATEWAY_BASE_URL", "").strip()
|
|
if not base_url:
|
|
from hermes_constants import AI_GATEWAY_BASE_URL
|
|
base_url = AI_GATEWAY_BASE_URL
|
|
|
|
url = base_url.rstrip("/") + "/models"
|
|
headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"}
|
|
req = urllib.request.Request(url, headers=headers)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
data = json.loads(resp.read().decode())
|
|
return [
|
|
m["id"]
|
|
for m in data.get("data", [])
|
|
if m.get("id")
|
|
and m.get("type") == "language"
|
|
and "tool-use" in (m.get("tags") or [])
|
|
]
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def fetch_api_models(
|
|
api_key: Optional[str],
|
|
base_url: Optional[str],
|
|
timeout: float = 5.0,
|
|
) -> Optional[list[str]]:
|
|
"""Fetch the list of available model IDs from the provider's ``/models`` endpoint.
|
|
|
|
Returns a list of model ID strings, or ``None`` if the endpoint could not
|
|
be reached (network error, timeout, auth failure, etc.).
|
|
"""
|
|
return probe_api_models(api_key, base_url, timeout=timeout).get("models")
|
|
|
|
|
|
def validate_requested_model(
|
|
model_name: str,
|
|
provider: Optional[str],
|
|
*,
|
|
api_key: Optional[str] = None,
|
|
base_url: Optional[str] = None,
|
|
) -> dict[str, Any]:
|
|
"""
|
|
Validate a ``/model`` value for the active provider.
|
|
|
|
Performs format checks first, then probes the live API to confirm
|
|
the model actually exists.
|
|
|
|
Returns a dict with:
|
|
- accepted: whether the CLI should switch to the requested model now
|
|
- persist: whether it is safe to save to config
|
|
- recognized: whether it matched a known provider catalog
|
|
- message: optional warning / guidance for the user
|
|
"""
|
|
requested = (model_name or "").strip()
|
|
normalized = normalize_provider(provider)
|
|
if normalized == "openrouter" and base_url and "openrouter.ai" not in base_url:
|
|
normalized = "custom"
|
|
requested_for_lookup = requested
|
|
if normalized == "copilot":
|
|
requested_for_lookup = normalize_copilot_model_id(
|
|
requested,
|
|
api_key=api_key,
|
|
) or requested
|
|
|
|
if not requested:
|
|
return {
|
|
"accepted": False,
|
|
"persist": False,
|
|
"recognized": False,
|
|
"message": "Model name cannot be empty.",
|
|
}
|
|
|
|
if any(ch.isspace() for ch in requested):
|
|
return {
|
|
"accepted": False,
|
|
"persist": False,
|
|
"recognized": False,
|
|
"message": "Model names cannot contain spaces.",
|
|
}
|
|
|
|
if normalized == "custom":
|
|
probe = probe_api_models(api_key, base_url)
|
|
api_models = probe.get("models")
|
|
if api_models is not None:
|
|
if requested_for_lookup in set(api_models):
|
|
return {
|
|
"accepted": True,
|
|
"persist": True,
|
|
"recognized": True,
|
|
"message": None,
|
|
}
|
|
|
|
suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
|
|
suggestion_text = ""
|
|
if suggestions:
|
|
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
|
|
|
|
message = (
|
|
f"Note: `{requested}` was not found in this custom endpoint's model listing "
|
|
f"({probe.get('probed_url')}). It may still work if the server supports hidden or aliased models."
|
|
f"{suggestion_text}"
|
|
)
|
|
if probe.get("used_fallback"):
|
|
message += (
|
|
f"\n Endpoint verification succeeded after trying `{probe.get('resolved_base_url')}`. "
|
|
f"Consider saving that as your base URL."
|
|
)
|
|
|
|
return {
|
|
"accepted": True,
|
|
"persist": True,
|
|
"recognized": False,
|
|
"message": message,
|
|
}
|
|
|
|
message = (
|
|
f"Note: could not reach this custom endpoint's model listing at `{probe.get('probed_url')}`. "
|
|
f"Hermes will still save `{requested}`, but the endpoint should expose `/models` for verification."
|
|
)
|
|
if probe.get("suggested_base_url"):
|
|
message += f"\n If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`"
|
|
|
|
return {
|
|
"accepted": True,
|
|
"persist": True,
|
|
"recognized": False,
|
|
"message": message,
|
|
}
|
|
|
|
# Probe the live API to check if the model actually exists
|
|
api_models = fetch_api_models(api_key, base_url)
|
|
|
|
if api_models is not None:
|
|
if requested_for_lookup in set(api_models):
|
|
# API confirmed the model exists
|
|
return {
|
|
"accepted": True,
|
|
"persist": True,
|
|
"recognized": True,
|
|
"message": None,
|
|
}
|
|
else:
|
|
# API responded but model is not listed. Accept anyway —
|
|
# the user may have access to models not shown in the public
|
|
# listing (e.g. Z.AI Pro/Max plans can use glm-5 on coding
|
|
# endpoints even though it's not in /models). Warn but allow.
|
|
suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
|
|
suggestion_text = ""
|
|
if suggestions:
|
|
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
|
|
|
|
return {
|
|
"accepted": True,
|
|
"persist": True,
|
|
"recognized": False,
|
|
"message": (
|
|
f"Note: `{requested}` was not found in this provider's model listing. "
|
|
f"It may still work if your plan supports it."
|
|
f"{suggestion_text}"
|
|
),
|
|
}
|
|
|
|
# api_models is None — couldn't reach API. Accept and persist,
|
|
# but warn so typos don't silently break things.
|
|
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
|
|
return {
|
|
"accepted": True,
|
|
"persist": True,
|
|
"recognized": False,
|
|
"message": (
|
|
f"Could not reach the {provider_label} API to validate `{requested}`. "
|
|
f"If the service isn't down, this model may not be valid."
|
|
),
|
|
}
|