Compare commits
8 Commits
fix/kimi-f
...
feat/ci-no
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5a942d71a1 | ||
|
|
044f0f8951 | ||
| 61c59ce332 | |||
| 01ce8ae889 | |||
|
|
b179250ab8 | ||
| 01a3f47a5b | |||
|
|
4538e11f97 | ||
| 7936483ffc |
@@ -47,6 +47,11 @@ jobs:
|
||||
source .venv/bin/activate
|
||||
python scripts/syntax_guard.py
|
||||
|
||||
- name: No duplicate models
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python scripts/check_no_duplicate_models.py
|
||||
|
||||
- name: Green-path E2E
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
|
||||
@@ -922,6 +922,7 @@ def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[st
|
||||
_AUTO_PROVIDER_LABELS = {
|
||||
"_try_openrouter": "openrouter",
|
||||
"_try_nous": "nous",
|
||||
"_try_ollama": "ollama",
|
||||
"_try_custom_endpoint": "local/custom",
|
||||
"_try_codex": "openai-codex",
|
||||
"_resolve_api_key_provider": "api-key",
|
||||
@@ -930,6 +931,18 @@ _AUTO_PROVIDER_LABELS = {
|
||||
_AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})
|
||||
|
||||
|
||||
def _try_ollama() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Detect and return an Ollama client if the server is reachable."""
|
||||
base_url = (os.getenv("OLLAMA_BASE_URL", "") or "http://localhost:11434").strip().rstrip("/")
|
||||
base_url = base_url + "/v1" if not base_url.endswith("/v1") else base_url
|
||||
from agent.model_metadata import detect_local_server_type
|
||||
if detect_local_server_type(base_url) != "ollama":
|
||||
return None, None
|
||||
api_key = (os.getenv("OLLAMA_API_KEY", "") or "ollama").strip()
|
||||
model = _read_main_model() or "gemma4:12b"
|
||||
return OpenAI(api_key=api_key, base_url=base_url), model
|
||||
|
||||
|
||||
def _get_provider_chain() -> List[tuple]:
|
||||
"""Return the ordered provider detection chain.
|
||||
|
||||
@@ -939,6 +952,7 @@ def _get_provider_chain() -> List[tuple]:
|
||||
return [
|
||||
("openrouter", _try_openrouter),
|
||||
("nous", _try_nous),
|
||||
("ollama", _try_ollama),
|
||||
("local/custom", _try_custom_endpoint),
|
||||
("openai-codex", _try_codex),
|
||||
("api-key", _resolve_api_key_provider),
|
||||
@@ -988,6 +1002,7 @@ def _try_payment_fallback(
|
||||
# Map common resolved_provider values back to chain labels.
|
||||
_alias_to_label = {"openrouter": "openrouter", "nous": "nous",
|
||||
"openai-codex": "openai-codex", "codex": "openai-codex",
|
||||
"ollama": "ollama",
|
||||
"custom": "local/custom", "local/custom": "local/custom"}
|
||||
skip_chain_labels = {_alias_to_label.get(s, s) for s in skip_labels}
|
||||
|
||||
@@ -1195,6 +1210,15 @@ def resolve_provider_client(
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── Ollama (first-class local provider) ──────────────────────────
|
||||
if provider == "ollama":
|
||||
base_url = (explicit_base_url or os.getenv("OLLAMA_BASE_URL", "") or "http://localhost:11434").strip().rstrip("/")
|
||||
base_url = base_url + "/v1" if not base_url.endswith("/v1") else base_url
|
||||
api_key = (explicit_api_key or os.getenv("OLLAMA_API_KEY", "") or "ollama").strip()
|
||||
final_model = model or _read_main_model() or "gemma4:12b"
|
||||
client = OpenAI(api_key=api_key, base_url=base_url)
|
||||
return (_to_async_client(client, final_model) if async_mode else (client, final_model))
|
||||
|
||||
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
||||
if provider == "custom":
|
||||
if explicit_base_url:
|
||||
@@ -1335,6 +1359,7 @@ def get_async_text_auxiliary_client(task: str = ""):
|
||||
_VISION_AUTO_PROVIDER_ORDER = (
|
||||
"openrouter",
|
||||
"nous",
|
||||
"ollama",
|
||||
"openai-codex",
|
||||
"anthropic",
|
||||
"custom",
|
||||
|
||||
@@ -26,7 +26,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
|
||||
"gemini", "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
|
||||
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
|
||||
"custom", "local",
|
||||
"ollama", "custom", "local",
|
||||
# Common aliases
|
||||
"google", "google-gemini", "google-ai-studio",
|
||||
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
|
||||
@@ -102,9 +102,12 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"gpt-4": 128000,
|
||||
# Google
|
||||
"gemini": 1048576,
|
||||
# Gemma (open models served via AI Studio)
|
||||
# Gemma (open models — Ollama / AI Studio)
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-4-26b": 256000,
|
||||
"gemma-4-12b": 256000,
|
||||
"gemma-4-4b": 256000,
|
||||
"gemma-4-1b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek
|
||||
@@ -187,6 +190,8 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"api.githubcopilot.com": "copilot",
|
||||
"models.github.ai": "copilot",
|
||||
"api.fireworks.ai": "fireworks",
|
||||
"localhost": "ollama",
|
||||
"127.0.0.1": "ollama",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -148,7 +148,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"openrouter": "openrouter",
|
||||
"anthropic": "anthropic",
|
||||
"zai": "zai",
|
||||
"kimi-coding": "kimi-for-coding",
|
||||
"kimi-coding": "kimi-k2.5",
|
||||
"minimax": "minimax",
|
||||
"minimax-cn": "minimax-cn",
|
||||
"deepseek": "deepseek",
|
||||
|
||||
@@ -6,7 +6,7 @@ model: anthropic/claude-opus-4.6
|
||||
# Fallback chain: Anthropic -> Kimi -> Ollama (local)
|
||||
fallback_providers:
|
||||
- provider: kimi-coding
|
||||
model: kimi-for-coding
|
||||
model: kimi-k2.5
|
||||
timeout: 60
|
||||
reason: "Primary fallback when Anthropic quota limited"
|
||||
|
||||
|
||||
@@ -820,10 +820,11 @@ def resolve_provider(
|
||||
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
|
||||
"go": "opencode-go", "opencode-go-sub": "opencode-go",
|
||||
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
||||
# Local server aliases — route through the generic custom provider
|
||||
# Local server aliases
|
||||
"lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
|
||||
"ollama": "custom", "vllm": "custom", "llamacpp": "custom",
|
||||
"vllm": "custom", "llamacpp": "custom",
|
||||
"llama.cpp": "custom", "llama-cpp": "custom",
|
||||
"ollama": "ollama",
|
||||
}
|
||||
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
|
||||
|
||||
|
||||
@@ -2126,9 +2126,8 @@ def _model_flow_kimi(config, current_model=""):
|
||||
|
||||
# Step 3: Model selection — show appropriate models for the endpoint
|
||||
if is_coding_plan:
|
||||
# Coding Plan models (kimi-for-coding first)
|
||||
# Coding Plan models (kimi-k2.5 first)
|
||||
model_list = [
|
||||
"kimi-for-coding",
|
||||
"kimi-k2.5",
|
||||
"kimi-k2-thinking",
|
||||
"kimi-k2-thinking-turbo",
|
||||
@@ -4206,7 +4205,7 @@ For more help on a command:
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--provider",
|
||||
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"],
|
||||
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "ollama"],
|
||||
default=None,
|
||||
help="Inference provider (default: auto)"
|
||||
)
|
||||
|
||||
@@ -130,7 +130,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"glm-4.5-flash",
|
||||
],
|
||||
"kimi-coding": [
|
||||
"kimi-for-coding",
|
||||
"kimi-k2.5",
|
||||
"kimi-k2-thinking",
|
||||
"kimi-k2-thinking-turbo",
|
||||
@@ -568,7 +567,7 @@ def list_available_providers() -> list[dict[str, str]]:
|
||||
"gemini", "huggingface",
|
||||
"zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
|
||||
"opencode-zen", "opencode-go",
|
||||
"ai-gateway", "deepseek", "custom",
|
||||
"ai-gateway", "deepseek", "ollama", "custom",
|
||||
]
|
||||
# Build reverse alias map
|
||||
aliases_for: dict[str, list[str]] = {}
|
||||
|
||||
@@ -78,7 +78,7 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
extra_env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
|
||||
base_url_env_var="GLM_BASE_URL",
|
||||
),
|
||||
"kimi-for-coding": HermesOverlay(
|
||||
"kimi-k2.5": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_env_var="KIMI_BASE_URL",
|
||||
),
|
||||
@@ -162,10 +162,10 @@ ALIASES: Dict[str, str] = {
|
||||
"z.ai": "zai",
|
||||
"zhipu": "zai",
|
||||
|
||||
# kimi-for-coding (models.dev ID)
|
||||
"kimi": "kimi-for-coding",
|
||||
"kimi-coding": "kimi-for-coding",
|
||||
"moonshot": "kimi-for-coding",
|
||||
# kimi-k2.5 (models.dev ID)
|
||||
"kimi": "kimi-k2.5",
|
||||
"kimi-coding": "kimi-k2.5",
|
||||
"moonshot": "kimi-k2.5",
|
||||
|
||||
# minimax-cn
|
||||
"minimax-china": "minimax-cn",
|
||||
@@ -376,7 +376,7 @@ LABELS: Dict[str, str] = {
|
||||
"github-copilot": "GitHub Copilot",
|
||||
"anthropic": "Anthropic",
|
||||
"zai": "Z.AI / GLM",
|
||||
"kimi-for-coding": "Kimi / Moonshot",
|
||||
"kimi-k2.5": "Kimi / Moonshot",
|
||||
"minimax": "MiniMax",
|
||||
"minimax-cn": "MiniMax (China)",
|
||||
"deepseek": "DeepSeek",
|
||||
|
||||
@@ -235,7 +235,7 @@ The Hermes Agent framework serves as both the delivery platform and the portfoli
|
||||
|
||||
| House | Host | Model / Provider | Gateway Status |
|
||||
|-------|------|------------------|----------------|
|
||||
| Ezra | Hermes VPS | `kimi-for-coding` (Kimi K2.5) | API `8658`, webhook `8648` — Active |
|
||||
| Ezra | Hermes VPS | `kimi-k2.5` (Kimi K2.5) | API `8658`, webhook `8648` — Active |
|
||||
| Bezalel | Hermes VPS | Claude Opus 4.6 (Anthropic) | Port `8645` — Active |
|
||||
| Allegro-Primus | Hermes VPS | Kimi K2.5 | Port `8644` — Requires restart |
|
||||
| Bilbo | External | Gemma 4B (local) | Telegram dual-mode — Active |
|
||||
|
||||
74
scripts/check_no_duplicate_models.py
Executable file
74
scripts/check_no_duplicate_models.py
Executable file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python3
|
||||
"""CI check: ensure no duplicate model IDs exist in provider configs.
|
||||
|
||||
Catches the class of bugs where a rename introduces a duplicate entry
|
||||
(e.g. PR #225 kimi-for-coding -> kimi-k2.5 when kimi-k2.5 already existed).
|
||||
|
||||
Runtime target: < 2 seconds.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Allow running from repo root
|
||||
REPO_ROOT = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
|
||||
def check_openrouter_models() -> list[str]:
|
||||
"""Check OPENROUTER_MODELS for duplicate model IDs."""
|
||||
try:
|
||||
from hermes_cli.models import OPENROUTER_MODELS
|
||||
except ImportError:
|
||||
return []
|
||||
|
||||
errors = []
|
||||
seen: dict[str, int] = {}
|
||||
for i, (model_id, _desc) in enumerate(OPENROUTER_MODELS):
|
||||
if model_id in seen:
|
||||
errors.append(
|
||||
f" OPENROUTER_MODELS: duplicate '{model_id}' "
|
||||
f"(index {seen[model_id]} and {i})"
|
||||
)
|
||||
else:
|
||||
seen[model_id] = i
|
||||
return errors
|
||||
|
||||
|
||||
def check_provider_models() -> list[str]:
|
||||
"""Check _PROVIDER_MODELS for duplicate model IDs within each provider list."""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
|
||||
errors = []
|
||||
for provider, models in _PROVIDER_MODELS.items():
|
||||
seen: dict[str, int] = {}
|
||||
for i, model_id in enumerate(models):
|
||||
if model_id in seen:
|
||||
errors.append(
|
||||
f" _PROVIDER_MODELS['{provider}']: duplicate '{model_id}' "
|
||||
f"(index {seen[model_id]} and {i})"
|
||||
)
|
||||
else:
|
||||
seen[model_id] = i
|
||||
return errors
|
||||
|
||||
|
||||
def main() -> int:
|
||||
errors = []
|
||||
errors.extend(check_openrouter_models())
|
||||
errors.extend(check_provider_models())
|
||||
|
||||
if errors:
|
||||
print(f"FAIL: {len(errors)} duplicate model(s) found:")
|
||||
for e in errors:
|
||||
print(e)
|
||||
return 1
|
||||
|
||||
print("OK: no duplicate model entries")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -895,7 +895,7 @@ class TestKimiMoonshotModelListIsolation:
|
||||
def test_moonshot_list_excludes_coding_plan_only_models(self):
|
||||
from hermes_cli.main import _PROVIDER_MODELS
|
||||
moonshot_models = _PROVIDER_MODELS["moonshot"]
|
||||
coding_plan_only = {"kimi-for-coding", "kimi-k2-thinking-turbo"}
|
||||
coding_plan_only = {"kimi-k2.5", "kimi-k2-thinking-turbo"}
|
||||
leaked = set(moonshot_models) & coding_plan_only
|
||||
assert not leaked, f"Moonshot list contains Coding Plan-only models: {leaked}"
|
||||
|
||||
@@ -908,7 +908,7 @@ class TestKimiMoonshotModelListIsolation:
|
||||
def test_coding_plan_list_contains_plan_specific_models(self):
|
||||
from hermes_cli.main import _PROVIDER_MODELS
|
||||
coding_models = _PROVIDER_MODELS["kimi-coding"]
|
||||
assert "kimi-for-coding" in coding_models
|
||||
assert "kimi-k2.5" in coding_models
|
||||
assert "kimi-k2-thinking-turbo" in coding_models
|
||||
|
||||
|
||||
|
||||
@@ -142,7 +142,7 @@ hermes chat --provider zai --model glm-5
|
||||
# Requires: GLM_API_KEY in ~/.hermes/.env
|
||||
|
||||
# Kimi / Moonshot AI
|
||||
hermes chat --provider kimi-coding --model kimi-for-coding
|
||||
hermes chat --provider kimi-coding --model kimi-k2.5
|
||||
# Requires: KIMI_API_KEY in ~/.hermes/.env
|
||||
|
||||
# MiniMax (global endpoint)
|
||||
|
||||
Reference in New Issue
Block a user