2026-02-22 02:16:11 -08:00
|
|
|
"""
|
2026-03-07 19:56:48 -08:00
|
|
|
Canonical model catalogs and lightweight validation helpers.
|
2026-02-22 02:16:11 -08:00
|
|
|
|
|
|
|
|
Add, remove, or reorder entries here — both `hermes setup` and
|
|
|
|
|
`hermes` provider-selection will pick up the change automatically.
|
|
|
|
|
"""
|
|
|
|
|
|
2026-03-07 19:56:48 -08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
2026-03-08 05:22:15 -07:00
|
|
|
import json
|
2026-03-17 00:12:16 -07:00
|
|
|
import os
|
2026-03-08 05:22:15 -07:00
|
|
|
import urllib.request
|
|
|
|
|
import urllib.error
|
2026-03-07 19:56:48 -08:00
|
|
|
from difflib import get_close_matches
|
|
|
|
|
from typing import Any, Optional
|
|
|
|
|
|
2026-02-22 02:16:11 -08:00
|
|
|
# (model_id, display description shown in menus)
|
|
|
|
|
OPENROUTER_MODELS: list[tuple[str, str]] = [
|
|
|
|
|
("anthropic/claude-opus-4.6", "recommended"),
|
|
|
|
|
("anthropic/claude-sonnet-4.5", ""),
|
2026-03-06 00:49:25 -08:00
|
|
|
("openai/gpt-5.4-pro", ""),
|
2026-03-06 00:52:45 -08:00
|
|
|
("openai/gpt-5.4", ""),
|
|
|
|
|
("openai/gpt-5.3-codex", ""),
|
2026-02-22 02:16:11 -08:00
|
|
|
("google/gemini-3-pro-preview", ""),
|
|
|
|
|
("google/gemini-3-flash-preview", ""),
|
2026-03-07 19:56:48 -08:00
|
|
|
("qwen/qwen3.5-plus-02-15", ""),
|
|
|
|
|
("qwen/qwen3.5-35b-a3b", ""),
|
2026-03-06 00:49:25 -08:00
|
|
|
("stepfun/step-3.5-flash", ""),
|
2026-03-06 00:52:45 -08:00
|
|
|
("z-ai/glm-5", ""),
|
2026-02-22 02:16:11 -08:00
|
|
|
("moonshotai/kimi-k2.5", ""),
|
2026-03-06 00:52:45 -08:00
|
|
|
("minimax/minimax-m2.5", ""),
|
2026-02-22 02:16:11 -08:00
|
|
|
]
|
|
|
|
|
|
2026-03-07 19:56:48 -08:00
|
|
|
_PROVIDER_MODELS: dict[str, list[str]] = {
|
2026-03-11 23:06:06 -07:00
|
|
|
"nous": [
|
|
|
|
|
"claude-opus-4-6",
|
|
|
|
|
"claude-sonnet-4-6",
|
|
|
|
|
"gpt-5.4",
|
|
|
|
|
"gemini-3-flash",
|
|
|
|
|
"gemini-3.0-pro-preview",
|
|
|
|
|
"deepseek-v3.2",
|
|
|
|
|
],
|
|
|
|
|
"openai-codex": [
|
2026-03-13 21:06:06 -07:00
|
|
|
"gpt-5.3-codex",
|
2026-03-11 23:06:06 -07:00
|
|
|
"gpt-5.2-codex",
|
|
|
|
|
"gpt-5.1-codex-mini",
|
|
|
|
|
"gpt-5.1-codex-max",
|
|
|
|
|
],
|
2026-03-07 19:56:48 -08:00
|
|
|
"zai": [
|
|
|
|
|
"glm-5",
|
|
|
|
|
"glm-4.7",
|
|
|
|
|
"glm-4.5",
|
|
|
|
|
"glm-4.5-flash",
|
|
|
|
|
],
|
|
|
|
|
"kimi-coding": [
|
fix: improve Kimi model selection — auto-detect endpoint, add missing models (#1039)
* fix: /reasoning command output ordering, display, and inline think extraction
Three issues with the /reasoning command:
1. Output interleaving: The command echo used print() while feedback
used _cprint(), causing them to render out-of-order under
prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
all output renders through the same path in correct order.
2. Reasoning display not working: /reasoning show toggled a flag
but reasoning never appeared for models that embed thinking in
inline <think> blocks rather than structured API fields. Added
fallback extraction in _build_assistant_message to capture
<think> block content as reasoning when no structured reasoning
fields (reasoning, reasoning_content, reasoning_details) are
present. This feeds into both the reasoning callback (during
tool loops) and the post-response reasoning box display.
3. Feedback clarity: Added checkmarks to confirm actions, persisted
show/hide to config (was session-only before), and aligned the
status display for readability.
Tests: 7 new tests for inline think block extraction (41 total).
* feat: add /reasoning command to gateway (Telegram/Discord/etc)
The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:
1. /reasoning command handler in the gateway:
- No args: shows current effort level and display state
- /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
- /reasoning show|hide: toggles reasoning display in responses
- All changes saved to config.yaml immediately
2. Reasoning display in gateway responses:
- When show_reasoning is enabled, prepends a 'Reasoning' block
with the model's last_reasoning content before the response
- Collapses long reasoning (>15 lines) to keep messages readable
- Uses last_reasoning from run_conversation result dict
3. Plumbing:
- Added _show_reasoning attribute loaded from config at startup
- Propagated last_reasoning through _run_agent return dict
- Added /reasoning to help text and known_commands set
- Uses getattr for _show_reasoning to handle test stubs
* fix: improve Kimi model selection — auto-detect endpoint, add missing models
Kimi Coding Plan setup:
- New dedicated _model_flow_kimi() replaces the generic API-key flow
for kimi-coding. Removes the confusing 'Base URL' prompt entirely —
the endpoint is auto-detected from the API key prefix:
sk-kimi-* → api.kimi.com/coding/v1 (Kimi Coding Plan)
other → api.moonshot.ai/v1 (legacy Moonshot)
- Shows appropriate models for each endpoint:
Coding Plan: kimi-for-coding, kimi-k2.5, kimi-k2-thinking, kimi-k2-thinking-turbo
Moonshot: full model catalog
- Clears any stale KIMI_BASE_URL override so runtime auto-detection
via _resolve_kimi_base_url() works correctly.
Model catalog updates:
- Added kimi-for-coding (primary Coding Plan model) and kimi-k2-thinking-turbo
to models.py, main.py _PROVIDER_MODELS, and model_metadata.py context windows.
- Updated User-Agent from KimiCLI/1.0 to KimiCLI/1.3 (Kimi's coding
endpoint whitelists known coding agents via User-Agent sniffing).
2026-03-12 05:58:48 -07:00
|
|
|
"kimi-for-coding",
|
2026-03-07 19:56:48 -08:00
|
|
|
"kimi-k2.5",
|
|
|
|
|
"kimi-k2-thinking",
|
fix: improve Kimi model selection — auto-detect endpoint, add missing models (#1039)
* fix: /reasoning command output ordering, display, and inline think extraction
Three issues with the /reasoning command:
1. Output interleaving: The command echo used print() while feedback
used _cprint(), causing them to render out-of-order under
prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
all output renders through the same path in correct order.
2. Reasoning display not working: /reasoning show toggled a flag
but reasoning never appeared for models that embed thinking in
inline <think> blocks rather than structured API fields. Added
fallback extraction in _build_assistant_message to capture
<think> block content as reasoning when no structured reasoning
fields (reasoning, reasoning_content, reasoning_details) are
present. This feeds into both the reasoning callback (during
tool loops) and the post-response reasoning box display.
3. Feedback clarity: Added checkmarks to confirm actions, persisted
show/hide to config (was session-only before), and aligned the
status display for readability.
Tests: 7 new tests for inline think block extraction (41 total).
* feat: add /reasoning command to gateway (Telegram/Discord/etc)
The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:
1. /reasoning command handler in the gateway:
- No args: shows current effort level and display state
- /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
- /reasoning show|hide: toggles reasoning display in responses
- All changes saved to config.yaml immediately
2. Reasoning display in gateway responses:
- When show_reasoning is enabled, prepends a 'Reasoning' block
with the model's last_reasoning content before the response
- Collapses long reasoning (>15 lines) to keep messages readable
- Uses last_reasoning from run_conversation result dict
3. Plumbing:
- Added _show_reasoning attribute loaded from config at startup
- Propagated last_reasoning through _run_agent return dict
- Added /reasoning to help text and known_commands set
- Uses getattr for _show_reasoning to handle test stubs
* fix: improve Kimi model selection — auto-detect endpoint, add missing models
Kimi Coding Plan setup:
- New dedicated _model_flow_kimi() replaces the generic API-key flow
for kimi-coding. Removes the confusing 'Base URL' prompt entirely —
the endpoint is auto-detected from the API key prefix:
sk-kimi-* → api.kimi.com/coding/v1 (Kimi Coding Plan)
other → api.moonshot.ai/v1 (legacy Moonshot)
- Shows appropriate models for each endpoint:
Coding Plan: kimi-for-coding, kimi-k2.5, kimi-k2-thinking, kimi-k2-thinking-turbo
Moonshot: full model catalog
- Clears any stale KIMI_BASE_URL override so runtime auto-detection
via _resolve_kimi_base_url() works correctly.
Model catalog updates:
- Added kimi-for-coding (primary Coding Plan model) and kimi-k2-thinking-turbo
to models.py, main.py _PROVIDER_MODELS, and model_metadata.py context windows.
- Updated User-Agent from KimiCLI/1.0 to KimiCLI/1.3 (Kimi's coding
endpoint whitelists known coding agents via User-Agent sniffing).
2026-03-12 05:58:48 -07:00
|
|
|
"kimi-k2-thinking-turbo",
|
2026-03-07 19:56:48 -08:00
|
|
|
"kimi-k2-turbo-preview",
|
|
|
|
|
"kimi-k2-0905-preview",
|
|
|
|
|
],
|
|
|
|
|
"minimax": [
|
|
|
|
|
"MiniMax-M2.5",
|
|
|
|
|
"MiniMax-M2.5-highspeed",
|
|
|
|
|
"MiniMax-M2.1",
|
|
|
|
|
],
|
|
|
|
|
"minimax-cn": [
|
|
|
|
|
"MiniMax-M2.5",
|
|
|
|
|
"MiniMax-M2.5-highspeed",
|
|
|
|
|
"MiniMax-M2.1",
|
|
|
|
|
],
|
feat: native Anthropic provider with Claude Code credential auto-discovery
Add Anthropic as a first-class inference provider, bypassing OpenRouter
for direct API access. Uses the native Anthropic SDK with a full format
adapter (same pattern as the codex_responses api_mode).
## Auth (three methods, priority order)
1. ANTHROPIC_API_KEY env var (regular API key, sk-ant-api-*)
2. ANTHROPIC_TOKEN / CLAUDE_CODE_OAUTH_TOKEN env var (setup-token, sk-ant-oat-*)
3. Auto-discovery from ~/.claude/.credentials.json (Claude Code subscription)
- Reads Claude Code's OAuth credentials
- Checks token expiry with 60s buffer
- Setup tokens use Bearer auth + anthropic-beta: oauth-2025-04-20 header
- Regular API keys use standard x-api-key header
## Changes by file
### New files
- agent/anthropic_adapter.py — Client builder, message/tool/response
format conversion, Claude Code credential reader, token resolver.
Handles system prompt extraction, tool_use/tool_result blocks,
thinking/reasoning, orphaned tool_use cleanup, cache_control.
- tests/test_anthropic_adapter.py — 36 tests covering all adapter logic
### Modified files
- pyproject.toml — Add anthropic>=0.39.0 dependency
- hermes_cli/auth.py — Add 'anthropic' to PROVIDER_REGISTRY with
three env vars, plus 'claude'/'claude-code' aliases
- hermes_cli/models.py — Add model catalog, labels, aliases, provider order
- hermes_cli/main.py — Add 'anthropic' to --provider CLI choices
- hermes_cli/runtime_provider.py — Add Anthropic branch returning
api_mode='anthropic_messages' (before generic api_key fallthrough)
- hermes_cli/setup.py — Add Anthropic setup wizard with Claude Code
credential auto-discovery, model selection, OpenRouter tools prompt
- agent/auxiliary_client.py — Add claude-haiku-4-5 as aux model
- agent/model_metadata.py — Add bare Claude model context lengths
- run_agent.py — Add anthropic_messages api_mode:
* Client init (Anthropic SDK instead of OpenAI)
* API call dispatch (_anthropic_client.messages.create)
* Response validation (content blocks)
* finish_reason mapping (stop_reason -> finish_reason)
* Token usage (input_tokens/output_tokens)
* Response normalization (normalize_anthropic_response)
* Client interrupt/rebuild
* Prompt caching auto-enabled for native Anthropic
- tests/test_run_agent.py — Update test_anthropic_base_url_accepted to
expect native routing, add test_prompt_caching_native_anthropic
2026-03-12 15:47:45 -07:00
|
|
|
"anthropic": [
|
2026-03-12 17:04:31 -07:00
|
|
|
"claude-opus-4-6",
|
|
|
|
|
"claude-sonnet-4-6",
|
|
|
|
|
"claude-opus-4-5-20251101",
|
|
|
|
|
"claude-sonnet-4-5-20250929",
|
feat: native Anthropic provider with Claude Code credential auto-discovery
Add Anthropic as a first-class inference provider, bypassing OpenRouter
for direct API access. Uses the native Anthropic SDK with a full format
adapter (same pattern as the codex_responses api_mode).
## Auth (three methods, priority order)
1. ANTHROPIC_API_KEY env var (regular API key, sk-ant-api-*)
2. ANTHROPIC_TOKEN / CLAUDE_CODE_OAUTH_TOKEN env var (setup-token, sk-ant-oat-*)
3. Auto-discovery from ~/.claude/.credentials.json (Claude Code subscription)
- Reads Claude Code's OAuth credentials
- Checks token expiry with 60s buffer
- Setup tokens use Bearer auth + anthropic-beta: oauth-2025-04-20 header
- Regular API keys use standard x-api-key header
## Changes by file
### New files
- agent/anthropic_adapter.py — Client builder, message/tool/response
format conversion, Claude Code credential reader, token resolver.
Handles system prompt extraction, tool_use/tool_result blocks,
thinking/reasoning, orphaned tool_use cleanup, cache_control.
- tests/test_anthropic_adapter.py — 36 tests covering all adapter logic
### Modified files
- pyproject.toml — Add anthropic>=0.39.0 dependency
- hermes_cli/auth.py — Add 'anthropic' to PROVIDER_REGISTRY with
three env vars, plus 'claude'/'claude-code' aliases
- hermes_cli/models.py — Add model catalog, labels, aliases, provider order
- hermes_cli/main.py — Add 'anthropic' to --provider CLI choices
- hermes_cli/runtime_provider.py — Add Anthropic branch returning
api_mode='anthropic_messages' (before generic api_key fallthrough)
- hermes_cli/setup.py — Add Anthropic setup wizard with Claude Code
credential auto-discovery, model selection, OpenRouter tools prompt
- agent/auxiliary_client.py — Add claude-haiku-4-5 as aux model
- agent/model_metadata.py — Add bare Claude model context lengths
- run_agent.py — Add anthropic_messages api_mode:
* Client init (Anthropic SDK instead of OpenAI)
* API call dispatch (_anthropic_client.messages.create)
* Response validation (content blocks)
* finish_reason mapping (stop_reason -> finish_reason)
* Token usage (input_tokens/output_tokens)
* Response normalization (normalize_anthropic_response)
* Client interrupt/rebuild
* Prompt caching auto-enabled for native Anthropic
- tests/test_run_agent.py — Update test_anthropic_base_url_accepted to
expect native routing, add test_prompt_caching_native_anthropic
2026-03-12 15:47:45 -07:00
|
|
|
"claude-opus-4-20250514",
|
2026-03-12 17:04:31 -07:00
|
|
|
"claude-sonnet-4-20250514",
|
feat: native Anthropic provider with Claude Code credential auto-discovery
Add Anthropic as a first-class inference provider, bypassing OpenRouter
for direct API access. Uses the native Anthropic SDK with a full format
adapter (same pattern as the codex_responses api_mode).
## Auth (three methods, priority order)
1. ANTHROPIC_API_KEY env var (regular API key, sk-ant-api-*)
2. ANTHROPIC_TOKEN / CLAUDE_CODE_OAUTH_TOKEN env var (setup-token, sk-ant-oat-*)
3. Auto-discovery from ~/.claude/.credentials.json (Claude Code subscription)
- Reads Claude Code's OAuth credentials
- Checks token expiry with 60s buffer
- Setup tokens use Bearer auth + anthropic-beta: oauth-2025-04-20 header
- Regular API keys use standard x-api-key header
## Changes by file
### New files
- agent/anthropic_adapter.py — Client builder, message/tool/response
format conversion, Claude Code credential reader, token resolver.
Handles system prompt extraction, tool_use/tool_result blocks,
thinking/reasoning, orphaned tool_use cleanup, cache_control.
- tests/test_anthropic_adapter.py — 36 tests covering all adapter logic
### Modified files
- pyproject.toml — Add anthropic>=0.39.0 dependency
- hermes_cli/auth.py — Add 'anthropic' to PROVIDER_REGISTRY with
three env vars, plus 'claude'/'claude-code' aliases
- hermes_cli/models.py — Add model catalog, labels, aliases, provider order
- hermes_cli/main.py — Add 'anthropic' to --provider CLI choices
- hermes_cli/runtime_provider.py — Add Anthropic branch returning
api_mode='anthropic_messages' (before generic api_key fallthrough)
- hermes_cli/setup.py — Add Anthropic setup wizard with Claude Code
credential auto-discovery, model selection, OpenRouter tools prompt
- agent/auxiliary_client.py — Add claude-haiku-4-5 as aux model
- agent/model_metadata.py — Add bare Claude model context lengths
- run_agent.py — Add anthropic_messages api_mode:
* Client init (Anthropic SDK instead of OpenAI)
* API call dispatch (_anthropic_client.messages.create)
* Response validation (content blocks)
* finish_reason mapping (stop_reason -> finish_reason)
* Token usage (input_tokens/output_tokens)
* Response normalization (normalize_anthropic_response)
* Client interrupt/rebuild
* Prompt caching auto-enabled for native Anthropic
- tests/test_run_agent.py — Update test_anthropic_base_url_accepted to
expect native routing, add test_prompt_caching_native_anthropic
2026-03-12 15:47:45 -07:00
|
|
|
"claude-haiku-4-5-20251001",
|
|
|
|
|
],
|
2026-03-16 04:34:45 -07:00
|
|
|
"deepseek": [
|
|
|
|
|
"deepseek-chat",
|
|
|
|
|
"deepseek-reasoner",
|
|
|
|
|
],
|
2026-03-17 00:12:16 -07:00
|
|
|
"ai-gateway": [
|
|
|
|
|
"anthropic/claude-opus-4.6",
|
|
|
|
|
"anthropic/claude-sonnet-4.6",
|
|
|
|
|
"anthropic/claude-sonnet-4.5",
|
|
|
|
|
"anthropic/claude-haiku-4.5",
|
|
|
|
|
"openai/gpt-5",
|
|
|
|
|
"openai/gpt-4.1",
|
|
|
|
|
"openai/gpt-4.1-mini",
|
|
|
|
|
"google/gemini-3-pro-preview",
|
|
|
|
|
"google/gemini-3-flash",
|
|
|
|
|
"google/gemini-2.5-pro",
|
|
|
|
|
"google/gemini-2.5-flash",
|
|
|
|
|
"deepseek/deepseek-v3.2",
|
|
|
|
|
],
|
2026-03-07 19:56:48 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_PROVIDER_LABELS = {
|
|
|
|
|
"openrouter": "OpenRouter",
|
|
|
|
|
"openai-codex": "OpenAI Codex",
|
|
|
|
|
"nous": "Nous Portal",
|
|
|
|
|
"zai": "Z.AI / GLM",
|
|
|
|
|
"kimi-coding": "Kimi / Moonshot",
|
|
|
|
|
"minimax": "MiniMax",
|
|
|
|
|
"minimax-cn": "MiniMax (China)",
|
feat: native Anthropic provider with Claude Code credential auto-discovery
Add Anthropic as a first-class inference provider, bypassing OpenRouter
for direct API access. Uses the native Anthropic SDK with a full format
adapter (same pattern as the codex_responses api_mode).
## Auth (three methods, priority order)
1. ANTHROPIC_API_KEY env var (regular API key, sk-ant-api-*)
2. ANTHROPIC_TOKEN / CLAUDE_CODE_OAUTH_TOKEN env var (setup-token, sk-ant-oat-*)
3. Auto-discovery from ~/.claude/.credentials.json (Claude Code subscription)
- Reads Claude Code's OAuth credentials
- Checks token expiry with 60s buffer
- Setup tokens use Bearer auth + anthropic-beta: oauth-2025-04-20 header
- Regular API keys use standard x-api-key header
## Changes by file
### New files
- agent/anthropic_adapter.py — Client builder, message/tool/response
format conversion, Claude Code credential reader, token resolver.
Handles system prompt extraction, tool_use/tool_result blocks,
thinking/reasoning, orphaned tool_use cleanup, cache_control.
- tests/test_anthropic_adapter.py — 36 tests covering all adapter logic
### Modified files
- pyproject.toml — Add anthropic>=0.39.0 dependency
- hermes_cli/auth.py — Add 'anthropic' to PROVIDER_REGISTRY with
three env vars, plus 'claude'/'claude-code' aliases
- hermes_cli/models.py — Add model catalog, labels, aliases, provider order
- hermes_cli/main.py — Add 'anthropic' to --provider CLI choices
- hermes_cli/runtime_provider.py — Add Anthropic branch returning
api_mode='anthropic_messages' (before generic api_key fallthrough)
- hermes_cli/setup.py — Add Anthropic setup wizard with Claude Code
credential auto-discovery, model selection, OpenRouter tools prompt
- agent/auxiliary_client.py — Add claude-haiku-4-5 as aux model
- agent/model_metadata.py — Add bare Claude model context lengths
- run_agent.py — Add anthropic_messages api_mode:
* Client init (Anthropic SDK instead of OpenAI)
* API call dispatch (_anthropic_client.messages.create)
* Response validation (content blocks)
* finish_reason mapping (stop_reason -> finish_reason)
* Token usage (input_tokens/output_tokens)
* Response normalization (normalize_anthropic_response)
* Client interrupt/rebuild
* Prompt caching auto-enabled for native Anthropic
- tests/test_run_agent.py — Update test_anthropic_base_url_accepted to
expect native routing, add test_prompt_caching_native_anthropic
2026-03-12 15:47:45 -07:00
|
|
|
"anthropic": "Anthropic",
|
2026-03-16 04:34:45 -07:00
|
|
|
"deepseek": "DeepSeek",
|
2026-03-17 00:12:16 -07:00
|
|
|
"ai-gateway": "AI Gateway",
|
2026-03-09 02:38:34 -07:00
|
|
|
"custom": "Custom endpoint",
|
2026-03-07 19:56:48 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
_PROVIDER_ALIASES = {
|
|
|
|
|
"glm": "zai",
|
|
|
|
|
"z-ai": "zai",
|
|
|
|
|
"z.ai": "zai",
|
|
|
|
|
"zhipu": "zai",
|
|
|
|
|
"kimi": "kimi-coding",
|
|
|
|
|
"moonshot": "kimi-coding",
|
|
|
|
|
"minimax-china": "minimax-cn",
|
|
|
|
|
"minimax_cn": "minimax-cn",
|
feat: native Anthropic provider with Claude Code credential auto-discovery
Add Anthropic as a first-class inference provider, bypassing OpenRouter
for direct API access. Uses the native Anthropic SDK with a full format
adapter (same pattern as the codex_responses api_mode).
## Auth (three methods, priority order)
1. ANTHROPIC_API_KEY env var (regular API key, sk-ant-api-*)
2. ANTHROPIC_TOKEN / CLAUDE_CODE_OAUTH_TOKEN env var (setup-token, sk-ant-oat-*)
3. Auto-discovery from ~/.claude/.credentials.json (Claude Code subscription)
- Reads Claude Code's OAuth credentials
- Checks token expiry with 60s buffer
- Setup tokens use Bearer auth + anthropic-beta: oauth-2025-04-20 header
- Regular API keys use standard x-api-key header
## Changes by file
### New files
- agent/anthropic_adapter.py — Client builder, message/tool/response
format conversion, Claude Code credential reader, token resolver.
Handles system prompt extraction, tool_use/tool_result blocks,
thinking/reasoning, orphaned tool_use cleanup, cache_control.
- tests/test_anthropic_adapter.py — 36 tests covering all adapter logic
### Modified files
- pyproject.toml — Add anthropic>=0.39.0 dependency
- hermes_cli/auth.py — Add 'anthropic' to PROVIDER_REGISTRY with
three env vars, plus 'claude'/'claude-code' aliases
- hermes_cli/models.py — Add model catalog, labels, aliases, provider order
- hermes_cli/main.py — Add 'anthropic' to --provider CLI choices
- hermes_cli/runtime_provider.py — Add Anthropic branch returning
api_mode='anthropic_messages' (before generic api_key fallthrough)
- hermes_cli/setup.py — Add Anthropic setup wizard with Claude Code
credential auto-discovery, model selection, OpenRouter tools prompt
- agent/auxiliary_client.py — Add claude-haiku-4-5 as aux model
- agent/model_metadata.py — Add bare Claude model context lengths
- run_agent.py — Add anthropic_messages api_mode:
* Client init (Anthropic SDK instead of OpenAI)
* API call dispatch (_anthropic_client.messages.create)
* Response validation (content blocks)
* finish_reason mapping (stop_reason -> finish_reason)
* Token usage (input_tokens/output_tokens)
* Response normalization (normalize_anthropic_response)
* Client interrupt/rebuild
* Prompt caching auto-enabled for native Anthropic
- tests/test_run_agent.py — Update test_anthropic_base_url_accepted to
expect native routing, add test_prompt_caching_native_anthropic
2026-03-12 15:47:45 -07:00
|
|
|
"claude": "anthropic",
|
|
|
|
|
"claude-code": "anthropic",
|
2026-03-16 04:34:45 -07:00
|
|
|
"deep-seek": "deepseek",
|
2026-03-17 00:12:16 -07:00
|
|
|
"aigateway": "ai-gateway",
|
|
|
|
|
"vercel": "ai-gateway",
|
|
|
|
|
"vercel-ai-gateway": "ai-gateway",
|
2026-03-07 19:56:48 -08:00
|
|
|
}
|
|
|
|
|
|
2026-02-22 02:16:11 -08:00
|
|
|
|
|
|
|
|
def model_ids() -> list[str]:
|
2026-03-07 19:56:48 -08:00
|
|
|
"""Return just the OpenRouter model-id strings."""
|
2026-02-22 02:16:11 -08:00
|
|
|
return [mid for mid, _ in OPENROUTER_MODELS]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def menu_labels() -> list[str]:
|
|
|
|
|
"""Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
|
|
|
|
|
labels = []
|
|
|
|
|
for mid, desc in OPENROUTER_MODELS:
|
|
|
|
|
labels.append(f"{mid} ({desc})" if desc else mid)
|
|
|
|
|
return labels
|
2026-03-07 19:56:48 -08:00
|
|
|
|
|
|
|
|
|
2026-03-08 06:09:36 -07:00
|
|
|
# All provider IDs and aliases that are valid for the provider:model syntax.
|
|
|
|
|
_KNOWN_PROVIDER_NAMES: set[str] = (
|
|
|
|
|
set(_PROVIDER_LABELS.keys())
|
|
|
|
|
| set(_PROVIDER_ALIASES.keys())
|
|
|
|
|
| {"openrouter", "custom"}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_available_providers() -> list[dict[str, str]]:
|
|
|
|
|
"""Return info about all providers the user could use with ``provider:model``.
|
|
|
|
|
|
|
|
|
|
Each dict has ``id``, ``label``, and ``aliases``.
|
|
|
|
|
Checks which providers have valid credentials configured.
|
|
|
|
|
"""
|
|
|
|
|
# Canonical providers in display order
|
|
|
|
|
_PROVIDER_ORDER = [
|
|
|
|
|
"openrouter", "nous", "openai-codex",
|
2026-03-17 00:12:16 -07:00
|
|
|
"zai", "kimi-coding", "minimax", "minimax-cn", "anthropic",
|
|
|
|
|
"ai-gateway", "deepseek",
|
2026-03-08 06:09:36 -07:00
|
|
|
]
|
|
|
|
|
# Build reverse alias map
|
|
|
|
|
aliases_for: dict[str, list[str]] = {}
|
|
|
|
|
for alias, canonical in _PROVIDER_ALIASES.items():
|
|
|
|
|
aliases_for.setdefault(canonical, []).append(alias)
|
|
|
|
|
|
|
|
|
|
result = []
|
|
|
|
|
for pid in _PROVIDER_ORDER:
|
|
|
|
|
label = _PROVIDER_LABELS.get(pid, pid)
|
|
|
|
|
alias_list = aliases_for.get(pid, [])
|
|
|
|
|
# Check if this provider has credentials available
|
|
|
|
|
has_creds = False
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.runtime_provider import resolve_runtime_provider
|
|
|
|
|
runtime = resolve_runtime_provider(requested=pid)
|
|
|
|
|
has_creds = bool(runtime.get("api_key"))
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
result.append({
|
|
|
|
|
"id": pid,
|
|
|
|
|
"label": label,
|
|
|
|
|
"aliases": alias_list,
|
|
|
|
|
"authenticated": has_creds,
|
|
|
|
|
})
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
2026-03-08 05:45:55 -07:00
|
|
|
def parse_model_input(raw: str, current_provider: str) -> tuple[str, str]:
|
|
|
|
|
"""Parse ``/model`` input into ``(provider, model)``.
|
|
|
|
|
|
|
|
|
|
Supports ``provider:model`` syntax to switch providers at runtime::
|
|
|
|
|
|
|
|
|
|
openrouter:anthropic/claude-sonnet-4.5 → ("openrouter", "anthropic/claude-sonnet-4.5")
|
|
|
|
|
nous:hermes-3 → ("nous", "hermes-3")
|
|
|
|
|
anthropic/claude-sonnet-4.5 → (current_provider, "anthropic/claude-sonnet-4.5")
|
|
|
|
|
gpt-5.4 → (current_provider, "gpt-5.4")
|
|
|
|
|
|
2026-03-08 06:09:36 -07:00
|
|
|
The colon is only treated as a provider delimiter if the left side is a
|
|
|
|
|
recognized provider name or alias. This avoids misinterpreting model names
|
|
|
|
|
that happen to contain colons (e.g. ``anthropic/claude-3.5-sonnet:beta``).
|
|
|
|
|
|
2026-03-08 05:45:55 -07:00
|
|
|
Returns ``(provider, model)`` where *provider* is either the explicit
|
|
|
|
|
provider from the input or *current_provider* if none was specified.
|
|
|
|
|
"""
|
|
|
|
|
stripped = raw.strip()
|
|
|
|
|
colon = stripped.find(":")
|
|
|
|
|
if colon > 0:
|
|
|
|
|
provider_part = stripped[:colon].strip().lower()
|
|
|
|
|
model_part = stripped[colon + 1:].strip()
|
2026-03-08 06:09:36 -07:00
|
|
|
if provider_part and model_part and provider_part in _KNOWN_PROVIDER_NAMES:
|
2026-03-08 05:45:55 -07:00
|
|
|
return (normalize_provider(provider_part), model_part)
|
|
|
|
|
return (current_provider, stripped)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def curated_models_for_provider(provider: Optional[str]) -> list[tuple[str, str]]:
|
2026-03-12 02:46:33 -07:00
|
|
|
"""Return ``(model_id, description)`` tuples for a provider's model list.
|
|
|
|
|
|
|
|
|
|
Tries to fetch the live model list from the provider's API first,
|
|
|
|
|
falling back to the static ``_PROVIDER_MODELS`` catalog if the API
|
|
|
|
|
is unreachable.
|
|
|
|
|
"""
|
2026-03-08 05:45:55 -07:00
|
|
|
normalized = normalize_provider(provider)
|
|
|
|
|
if normalized == "openrouter":
|
|
|
|
|
return list(OPENROUTER_MODELS)
|
2026-03-12 02:46:33 -07:00
|
|
|
|
|
|
|
|
# Try live API first (Codex, Nous, etc. all support /models)
|
|
|
|
|
live = provider_model_ids(normalized)
|
|
|
|
|
if live:
|
|
|
|
|
return [(m, "") for m in live]
|
|
|
|
|
|
|
|
|
|
# Fallback to static catalog
|
2026-03-08 05:45:55 -07:00
|
|
|
models = _PROVIDER_MODELS.get(normalized, [])
|
|
|
|
|
return [(m, "") for m in models]
|
|
|
|
|
|
|
|
|
|
|
2026-03-16 04:34:45 -07:00
|
|
|
def detect_provider_for_model(
|
|
|
|
|
model_name: str,
|
|
|
|
|
current_provider: str,
|
|
|
|
|
) -> Optional[tuple[str, str]]:
|
|
|
|
|
"""Auto-detect the best provider for a model name.
|
|
|
|
|
|
|
|
|
|
Returns ``(provider_id, model_name)`` — the model name may be remapped
|
|
|
|
|
(e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
|
|
|
|
|
Returns ``None`` when no confident match is found.
|
|
|
|
|
|
|
|
|
|
Priority:
|
|
|
|
|
1. Direct provider with credentials (highest)
|
|
|
|
|
2. Direct provider without credentials → remap to OpenRouter slug
|
|
|
|
|
3. OpenRouter catalog match
|
|
|
|
|
"""
|
|
|
|
|
name = (model_name or "").strip()
|
|
|
|
|
if not name:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
name_lower = name.lower()
|
|
|
|
|
|
|
|
|
|
# Aggregators list other providers' models — never auto-switch TO them
|
|
|
|
|
_AGGREGATORS = {"nous", "openrouter"}
|
|
|
|
|
|
|
|
|
|
# If the model belongs to the current provider's catalog, don't suggest switching
|
|
|
|
|
current_models = _PROVIDER_MODELS.get(current_provider, [])
|
|
|
|
|
if any(name_lower == m.lower() for m in current_models):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# --- Step 1: check static provider catalogs for a direct match ---
|
|
|
|
|
direct_match: Optional[str] = None
|
|
|
|
|
for pid, models in _PROVIDER_MODELS.items():
|
|
|
|
|
if pid == current_provider or pid in _AGGREGATORS:
|
|
|
|
|
continue
|
|
|
|
|
if any(name_lower == m.lower() for m in models):
|
|
|
|
|
direct_match = pid
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if direct_match:
|
|
|
|
|
# Check if we have credentials for this provider
|
|
|
|
|
has_creds = False
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.auth import PROVIDER_REGISTRY
|
|
|
|
|
pconfig = PROVIDER_REGISTRY.get(direct_match)
|
|
|
|
|
if pconfig:
|
|
|
|
|
import os
|
|
|
|
|
for env_var in pconfig.api_key_env_vars:
|
|
|
|
|
if os.getenv(env_var, "").strip():
|
|
|
|
|
has_creds = True
|
|
|
|
|
break
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
if has_creds:
|
|
|
|
|
return (direct_match, name)
|
|
|
|
|
|
|
|
|
|
# No direct creds — try to find this model on OpenRouter instead
|
|
|
|
|
or_slug = _find_openrouter_slug(name)
|
|
|
|
|
if or_slug:
|
|
|
|
|
return ("openrouter", or_slug)
|
|
|
|
|
# Still return the direct provider — credential resolution will
|
|
|
|
|
# give a clear error rather than silently using the wrong provider
|
|
|
|
|
return (direct_match, name)
|
|
|
|
|
|
|
|
|
|
# --- Step 2: check OpenRouter catalog ---
|
|
|
|
|
# First try exact match (handles provider/model format)
|
|
|
|
|
or_slug = _find_openrouter_slug(name)
|
|
|
|
|
if or_slug:
|
|
|
|
|
if current_provider != "openrouter":
|
|
|
|
|
return ("openrouter", or_slug)
|
|
|
|
|
# Already on openrouter, just return the resolved slug
|
|
|
|
|
if or_slug != name:
|
|
|
|
|
return ("openrouter", or_slug)
|
|
|
|
|
return None # already on openrouter with matching name
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _find_openrouter_slug(model_name: str) -> Optional[str]:
|
|
|
|
|
"""Find the full OpenRouter model slug for a bare or partial model name.
|
|
|
|
|
|
|
|
|
|
Handles:
|
|
|
|
|
- Exact match: ``anthropic/claude-opus-4.6`` → as-is
|
|
|
|
|
- Bare name: ``deepseek-chat`` → ``deepseek/deepseek-chat``
|
|
|
|
|
- Bare name: ``claude-opus-4.6`` → ``anthropic/claude-opus-4.6``
|
|
|
|
|
"""
|
|
|
|
|
name_lower = model_name.strip().lower()
|
|
|
|
|
if not name_lower:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# Exact match (already has provider/ prefix)
|
|
|
|
|
for mid, _ in OPENROUTER_MODELS:
|
|
|
|
|
if name_lower == mid.lower():
|
|
|
|
|
return mid
|
|
|
|
|
|
|
|
|
|
# Try matching just the model part (after the /)
|
|
|
|
|
for mid, _ in OPENROUTER_MODELS:
|
|
|
|
|
if "/" in mid:
|
|
|
|
|
_, model_part = mid.split("/", 1)
|
|
|
|
|
if name_lower == model_part.lower():
|
|
|
|
|
return mid
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2026-03-07 19:56:48 -08:00
|
|
|
def normalize_provider(provider: Optional[str]) -> str:
|
2026-03-08 05:58:45 -07:00
|
|
|
"""Normalize provider aliases to Hermes' canonical provider ids.
|
|
|
|
|
|
|
|
|
|
Note: ``"auto"`` passes through unchanged — use
|
|
|
|
|
``hermes_cli.auth.resolve_provider()`` to resolve it to a concrete
|
|
|
|
|
provider based on credentials and environment.
|
|
|
|
|
"""
|
2026-03-07 19:56:48 -08:00
|
|
|
normalized = (provider or "openrouter").strip().lower()
|
|
|
|
|
return _PROVIDER_ALIASES.get(normalized, normalized)
|
|
|
|
|
|
|
|
|
|
|
2026-03-11 19:37:42 +03:00
|
|
|
def provider_label(provider: Optional[str]) -> str:
|
|
|
|
|
"""Return a human-friendly label for a provider id or alias."""
|
|
|
|
|
original = (provider or "openrouter").strip()
|
|
|
|
|
normalized = original.lower()
|
|
|
|
|
if normalized == "auto":
|
|
|
|
|
return "Auto"
|
|
|
|
|
normalized = normalize_provider(normalized)
|
|
|
|
|
return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")
|
|
|
|
|
|
|
|
|
|
|
2026-03-07 19:56:48 -08:00
|
|
|
def provider_model_ids(provider: Optional[str]) -> list[str]:
|
2026-03-12 02:46:33 -07:00
|
|
|
"""Return the best known model catalog for a provider.
|
|
|
|
|
|
|
|
|
|
Tries live API endpoints for providers that support them (Codex, Nous),
|
|
|
|
|
falling back to static lists.
|
|
|
|
|
"""
|
2026-03-07 19:56:48 -08:00
|
|
|
normalized = normalize_provider(provider)
|
|
|
|
|
if normalized == "openrouter":
|
|
|
|
|
return model_ids()
|
|
|
|
|
if normalized == "openai-codex":
|
|
|
|
|
from hermes_cli.codex_models import get_codex_model_ids
|
|
|
|
|
|
|
|
|
|
return get_codex_model_ids()
|
2026-03-12 02:46:33 -07:00
|
|
|
if normalized == "nous":
|
|
|
|
|
# Try live Nous Portal /models endpoint
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.auth import fetch_nous_models, resolve_nous_runtime_credentials
|
|
|
|
|
creds = resolve_nous_runtime_credentials()
|
|
|
|
|
if creds:
|
|
|
|
|
live = fetch_nous_models(creds.get("api_key", ""), creds.get("base_url", ""))
|
|
|
|
|
if live:
|
|
|
|
|
return live
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2026-03-12 17:04:31 -07:00
|
|
|
if normalized == "anthropic":
|
|
|
|
|
live = _fetch_anthropic_models()
|
|
|
|
|
if live:
|
|
|
|
|
return live
|
2026-03-17 00:12:16 -07:00
|
|
|
if normalized == "ai-gateway":
|
|
|
|
|
live = _fetch_ai_gateway_models()
|
|
|
|
|
if live:
|
|
|
|
|
return live
|
2026-03-07 19:56:48 -08:00
|
|
|
return list(_PROVIDER_MODELS.get(normalized, []))
|
|
|
|
|
|
|
|
|
|
|
2026-03-12 17:04:31 -07:00
|
|
|
def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
|
|
|
|
|
"""Fetch available models from the Anthropic /v1/models endpoint.
|
|
|
|
|
|
|
|
|
|
Uses resolve_anthropic_token() to find credentials (env vars or
|
|
|
|
|
Claude Code auto-discovery). Returns sorted model IDs or None.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
from agent.anthropic_adapter import resolve_anthropic_token, _is_oauth_token
|
|
|
|
|
except ImportError:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
token = resolve_anthropic_token()
|
|
|
|
|
if not token:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
headers: dict[str, str] = {"anthropic-version": "2023-06-01"}
|
|
|
|
|
if _is_oauth_token(token):
|
|
|
|
|
headers["Authorization"] = f"Bearer {token}"
|
fix: Anthropic OAuth — beta header, token refresh, config contamination, reauthentication (#1132)
Fixes Anthropic OAuth/subscription authentication end-to-end:
Auth failures (401 errors):
- Add missing 'claude-code-20250219' beta header for OAuth tokens. Both
clawdbot and OpenCode include this alongside 'oauth-2025-04-20' — without
it, Anthropic's API rejects OAuth tokens with 401 authentication errors.
- Fix _fetch_anthropic_models() to use canonical beta headers from
_COMMON_BETAS + _OAUTH_ONLY_BETAS instead of hardcoding.
Token refresh:
- Add _refresh_oauth_token() — when Claude Code credentials from
~/.claude/.credentials.json are expired but have a refresh token,
automatically POST to console.anthropic.com/v1/oauth/token to get
a new access token. Uses the same client_id as Claude Code / OpenCode.
- Add _write_claude_code_credentials() — writes refreshed tokens back
to ~/.claude/.credentials.json, preserving other fields.
- resolve_anthropic_token() now auto-refreshes expired tokens before
returning None.
Config contamination:
- Anthropic's _model_flow_anthropic() no longer saves base_url to config.
Since resolve_runtime_provider() always hardcodes Anthropic's URL, the
stale base_url was contaminating other providers when users switched
without re-running 'hermes model' (e.g., Codex hitting api.anthropic.com).
- _update_config_for_provider() now pops base_url when passed empty string.
- Same fix in setup.py.
Flow/UX (hermes model command):
- CLAUDE_CODE_OAUTH_TOKEN env var now checked in credential detection
- Reauthentication option when existing credentials found
- run_oauth_setup_token() runs 'claude setup-token' as interactive
subprocess, then auto-detects saved credentials
- Clean has_creds/needs_auth flow in both main.py and setup.py
Tests (14 new):
- Beta header assertions for claude-code-20250219
- Token refresh: successful refresh with credential writeback, failed
refresh returns None, no refresh token returns None
- Credential writeback: new file creation, preserving existing fields
- Auto-refresh integration in resolve_anthropic_token()
- CLAUDE_CODE_OAUTH_TOKEN fallback, credential file auto-discovery
- run_oauth_setup_token() (5 scenarios)
2026-03-12 20:45:50 -07:00
|
|
|
from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS
|
|
|
|
|
headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
|
2026-03-12 17:04:31 -07:00
|
|
|
else:
|
|
|
|
|
headers["x-api-key"] = token
|
|
|
|
|
|
|
|
|
|
req = urllib.request.Request(
|
|
|
|
|
"https://api.anthropic.com/v1/models",
|
|
|
|
|
headers=headers,
|
|
|
|
|
)
|
|
|
|
|
try:
|
|
|
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
|
|
|
data = json.loads(resp.read().decode())
|
|
|
|
|
models = [m["id"] for m in data.get("data", []) if m.get("id")]
|
|
|
|
|
# Sort: latest/largest first (opus > sonnet > haiku, higher version first)
|
|
|
|
|
return sorted(models, key=lambda m: (
|
|
|
|
|
"opus" not in m, # opus first
|
|
|
|
|
"sonnet" not in m, # then sonnet
|
|
|
|
|
"haiku" not in m, # then haiku
|
|
|
|
|
m, # alphabetical within tier
|
|
|
|
|
))
|
2026-03-12 17:14:22 -07:00
|
|
|
except Exception as e:
|
|
|
|
|
import logging
|
|
|
|
|
logging.getLogger(__name__).debug("Failed to fetch Anthropic models: %s", e)
|
2026-03-12 17:04:31 -07:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2026-03-15 20:09:50 -07:00
|
|
|
def probe_api_models(
|
|
|
|
|
api_key: Optional[str],
|
|
|
|
|
base_url: Optional[str],
|
|
|
|
|
timeout: float = 5.0,
|
|
|
|
|
) -> dict[str, Any]:
|
|
|
|
|
"""Probe an OpenAI-compatible ``/models`` endpoint with light URL heuristics."""
|
|
|
|
|
normalized = (base_url or "").strip().rstrip("/")
|
|
|
|
|
if not normalized:
|
|
|
|
|
return {
|
|
|
|
|
"models": None,
|
|
|
|
|
"probed_url": None,
|
|
|
|
|
"resolved_base_url": "",
|
|
|
|
|
"suggested_base_url": None,
|
|
|
|
|
"used_fallback": False,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if normalized.endswith("/v1"):
|
|
|
|
|
alternate_base = normalized[:-3].rstrip("/")
|
|
|
|
|
else:
|
|
|
|
|
alternate_base = normalized + "/v1"
|
|
|
|
|
|
|
|
|
|
candidates: list[tuple[str, bool]] = [(normalized, False)]
|
|
|
|
|
if alternate_base and alternate_base != normalized:
|
|
|
|
|
candidates.append((alternate_base, True))
|
|
|
|
|
|
|
|
|
|
tried: list[str] = []
|
|
|
|
|
headers: dict[str, str] = {}
|
|
|
|
|
if api_key:
|
|
|
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
|
|
|
|
|
|
|
|
for candidate_base, is_fallback in candidates:
|
|
|
|
|
url = candidate_base.rstrip("/") + "/models"
|
|
|
|
|
tried.append(url)
|
|
|
|
|
req = urllib.request.Request(url, headers=headers)
|
|
|
|
|
try:
|
|
|
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
|
|
|
data = json.loads(resp.read().decode())
|
|
|
|
|
return {
|
|
|
|
|
"models": [m.get("id", "") for m in data.get("data", [])],
|
|
|
|
|
"probed_url": url,
|
|
|
|
|
"resolved_base_url": candidate_base.rstrip("/"),
|
|
|
|
|
"suggested_base_url": alternate_base if alternate_base != candidate_base else normalized,
|
|
|
|
|
"used_fallback": is_fallback,
|
|
|
|
|
}
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"models": None,
|
|
|
|
|
"probed_url": tried[-1] if tried else normalized.rstrip("/") + "/models",
|
|
|
|
|
"resolved_base_url": normalized,
|
|
|
|
|
"suggested_base_url": alternate_base if alternate_base != normalized else None,
|
|
|
|
|
"used_fallback": False,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2026-03-17 00:12:16 -07:00
|
|
|
def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]:
|
|
|
|
|
"""Fetch available language models with tool-use from AI Gateway."""
|
|
|
|
|
api_key = os.getenv("AI_GATEWAY_API_KEY", "").strip()
|
|
|
|
|
if not api_key:
|
|
|
|
|
return None
|
|
|
|
|
base_url = os.getenv("AI_GATEWAY_BASE_URL", "").strip()
|
|
|
|
|
if not base_url:
|
|
|
|
|
from hermes_constants import AI_GATEWAY_BASE_URL
|
|
|
|
|
base_url = AI_GATEWAY_BASE_URL
|
|
|
|
|
|
|
|
|
|
url = base_url.rstrip("/") + "/models"
|
|
|
|
|
headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"}
|
|
|
|
|
req = urllib.request.Request(url, headers=headers)
|
|
|
|
|
try:
|
|
|
|
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
|
|
|
data = json.loads(resp.read().decode())
|
|
|
|
|
return [
|
|
|
|
|
m["id"]
|
|
|
|
|
for m in data.get("data", [])
|
|
|
|
|
if m.get("id")
|
|
|
|
|
and m.get("type") == "language"
|
|
|
|
|
and "tool-use" in (m.get("tags") or [])
|
|
|
|
|
]
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2026-03-08 05:22:15 -07:00
|
|
|
def fetch_api_models(
|
|
|
|
|
api_key: Optional[str],
|
|
|
|
|
base_url: Optional[str],
|
|
|
|
|
timeout: float = 5.0,
|
|
|
|
|
) -> Optional[list[str]]:
|
|
|
|
|
"""Fetch the list of available model IDs from the provider's ``/models`` endpoint.
|
|
|
|
|
|
|
|
|
|
Returns a list of model ID strings, or ``None`` if the endpoint could not
|
|
|
|
|
be reached (network error, timeout, auth failure, etc.).
|
|
|
|
|
"""
|
2026-03-15 20:09:50 -07:00
|
|
|
return probe_api_models(api_key, base_url, timeout=timeout).get("models")
|
2026-03-08 05:22:15 -07:00
|
|
|
|
|
|
|
|
|
2026-03-07 19:56:48 -08:00
|
|
|
def validate_requested_model(
|
|
|
|
|
model_name: str,
|
|
|
|
|
provider: Optional[str],
|
|
|
|
|
*,
|
2026-03-08 05:22:15 -07:00
|
|
|
api_key: Optional[str] = None,
|
2026-03-07 19:56:48 -08:00
|
|
|
base_url: Optional[str] = None,
|
|
|
|
|
) -> dict[str, Any]:
|
|
|
|
|
"""
|
2026-03-08 05:22:15 -07:00
|
|
|
Validate a ``/model`` value for the active provider.
|
|
|
|
|
|
|
|
|
|
Performs format checks first, then probes the live API to confirm
|
|
|
|
|
the model actually exists.
|
2026-03-07 19:56:48 -08:00
|
|
|
|
|
|
|
|
Returns a dict with:
|
|
|
|
|
- accepted: whether the CLI should switch to the requested model now
|
|
|
|
|
- persist: whether it is safe to save to config
|
|
|
|
|
- recognized: whether it matched a known provider catalog
|
|
|
|
|
- message: optional warning / guidance for the user
|
|
|
|
|
"""
|
|
|
|
|
requested = (model_name or "").strip()
|
|
|
|
|
normalized = normalize_provider(provider)
|
|
|
|
|
if normalized == "openrouter" and base_url and "openrouter.ai" not in base_url:
|
|
|
|
|
normalized = "custom"
|
|
|
|
|
|
|
|
|
|
if not requested:
|
|
|
|
|
return {
|
|
|
|
|
"accepted": False,
|
|
|
|
|
"persist": False,
|
|
|
|
|
"recognized": False,
|
|
|
|
|
"message": "Model name cannot be empty.",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if any(ch.isspace() for ch in requested):
|
|
|
|
|
return {
|
|
|
|
|
"accepted": False,
|
|
|
|
|
"persist": False,
|
|
|
|
|
"recognized": False,
|
|
|
|
|
"message": "Model names cannot contain spaces.",
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-11 23:29:26 -07:00
|
|
|
if normalized == "custom":
|
2026-03-15 20:09:50 -07:00
|
|
|
probe = probe_api_models(api_key, base_url)
|
|
|
|
|
api_models = probe.get("models")
|
|
|
|
|
if api_models is not None:
|
|
|
|
|
if requested in set(api_models):
|
|
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": True,
|
|
|
|
|
"message": None,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
|
|
|
|
|
suggestion_text = ""
|
|
|
|
|
if suggestions:
|
|
|
|
|
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
|
|
|
|
|
|
|
|
|
|
message = (
|
|
|
|
|
f"Note: `{requested}` was not found in this custom endpoint's model listing "
|
|
|
|
|
f"({probe.get('probed_url')}). It may still work if the server supports hidden or aliased models."
|
|
|
|
|
f"{suggestion_text}"
|
|
|
|
|
)
|
|
|
|
|
if probe.get("used_fallback"):
|
|
|
|
|
message += (
|
|
|
|
|
f"\n Endpoint verification succeeded after trying `{probe.get('resolved_base_url')}`. "
|
|
|
|
|
f"Consider saving that as your base URL."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": False,
|
|
|
|
|
"message": message,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
message = (
|
|
|
|
|
f"Note: could not reach this custom endpoint's model listing at `{probe.get('probed_url')}`. "
|
|
|
|
|
f"Hermes will still save `{requested}`, but the endpoint should expose `/models` for verification."
|
|
|
|
|
)
|
|
|
|
|
if probe.get("suggested_base_url"):
|
|
|
|
|
message += f"\n If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`"
|
|
|
|
|
|
2026-03-11 23:29:26 -07:00
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": False,
|
2026-03-15 20:09:50 -07:00
|
|
|
"message": message,
|
2026-03-11 23:29:26 -07:00
|
|
|
}
|
|
|
|
|
|
2026-03-08 05:22:15 -07:00
|
|
|
# Probe the live API to check if the model actually exists
|
|
|
|
|
api_models = fetch_api_models(api_key, base_url)
|
|
|
|
|
|
|
|
|
|
if api_models is not None:
|
|
|
|
|
if requested in set(api_models):
|
|
|
|
|
# API confirmed the model exists
|
|
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
|
|
|
|
"recognized": True,
|
|
|
|
|
"message": None,
|
|
|
|
|
}
|
|
|
|
|
else:
|
2026-03-12 16:02:35 -07:00
|
|
|
# API responded but model is not listed. Accept anyway —
|
|
|
|
|
# the user may have access to models not shown in the public
|
|
|
|
|
# listing (e.g. Z.AI Pro/Max plans can use glm-5 on coding
|
|
|
|
|
# endpoints even though it's not in /models). Warn but allow.
|
2026-03-08 05:22:15 -07:00
|
|
|
suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
|
|
|
|
|
suggestion_text = ""
|
|
|
|
|
if suggestions:
|
2026-03-12 16:02:35 -07:00
|
|
|
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
|
2026-03-08 05:22:15 -07:00
|
|
|
|
|
|
|
|
return {
|
2026-03-12 16:02:35 -07:00
|
|
|
"accepted": True,
|
|
|
|
|
"persist": True,
|
2026-03-08 05:22:15 -07:00
|
|
|
"recognized": False,
|
|
|
|
|
"message": (
|
2026-03-12 16:02:35 -07:00
|
|
|
f"Note: `{requested}` was not found in this provider's model listing. "
|
|
|
|
|
f"It may still work if your plan supports it."
|
2026-03-08 05:22:15 -07:00
|
|
|
f"{suggestion_text}"
|
|
|
|
|
),
|
|
|
|
|
}
|
2026-03-07 19:56:48 -08:00
|
|
|
|
2026-03-12 16:02:35 -07:00
|
|
|
# api_models is None — couldn't reach API. Accept and persist,
|
|
|
|
|
# but warn so typos don't silently break things.
|
2026-03-08 05:22:15 -07:00
|
|
|
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
|
2026-03-07 19:56:48 -08:00
|
|
|
return {
|
|
|
|
|
"accepted": True,
|
2026-03-12 16:02:35 -07:00
|
|
|
"persist": True,
|
2026-03-07 19:56:48 -08:00
|
|
|
"recognized": False,
|
|
|
|
|
"message": (
|
2026-03-12 16:02:35 -07:00
|
|
|
f"Could not reach the {provider_label} API to validate `{requested}`. "
|
|
|
|
|
f"If the service isn't down, this model may not be valid."
|
2026-03-07 19:56:48 -08:00
|
|
|
),
|
|
|
|
|
}
|