From e4043633fcf852c604311d5e09bc32a0ed3150cc Mon Sep 17 00:00:00 2001 From: octo-patch Date: Wed, 18 Mar 2026 02:42:58 -0700 Subject: [PATCH] feat: upgrade MiniMax default to M2.7 + add new OpenRouter models MiniMax: Add M2.7 and M2.7-highspeed as new defaults across provider model lists, auxiliary client, metadata, setup wizard, RL training tool, fallback tests, and docs. Retain M2.5/M2.1 as alternatives. OpenRouter: Add grok-4.20-beta, nemotron-3-super-120b-a12b:free, trinity-large-preview:free, glm-5-turbo, and hunter-alpha to the model catalog. MiniMax changes based on PR #1882 by @octo-patch (applied manually due to stale conflicts in refactored pricing module). --- agent/auxiliary_client.py | 4 ++-- agent/model_metadata.py | 2 ++ hermes_cli/models.py | 11 ++++++++++- hermes_cli/setup.py | 4 ++-- tests/test_fallback_model.py | 8 ++++---- tools/rl_training_tool.py | 4 ++-- website/docs/user-guide/configuration.md | 4 ++-- website/docs/user-guide/features/rl-training.md | 2 +- 8 files changed, 25 insertions(+), 14 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 94be9d6fe..c10490834 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -55,8 +55,8 @@ logger = logging.getLogger(__name__) _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { "zai": "glm-4.5-flash", "kimi-coding": "kimi-k2-turbo-preview", - "minimax": "MiniMax-M2.5-highspeed", - "minimax-cn": "MiniMax-M2.5-highspeed", + "minimax": "MiniMax-M2.7-highspeed", + "minimax-cn": "MiniMax-M2.7-highspeed", "anthropic": "claude-haiku-4-5-20251001", "ai-gateway": "google/gemini-3-flash", "opencode-zen": "gemini-3-flash", diff --git a/agent/model_metadata.py b/agent/model_metadata.py index cd847aa82..fb0d38466 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -77,6 +77,8 @@ DEFAULT_CONTEXT_LENGTHS = { "kimi-k2-thinking-turbo": 262144, "kimi-k2-turbo-preview": 262144, "kimi-k2-0905-preview": 131072, + "MiniMax-M2.7": 204800, + "MiniMax-M2.7-highspeed": 204800, "MiniMax-M2.5": 204800, "MiniMax-M2.5-highspeed": 204800, "MiniMax-M2.1": 204800, diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 174aa9475..5a3f871f9 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -28,7 +28,12 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ ("stepfun/step-3.5-flash", ""), ("z-ai/glm-5", ""), ("moonshotai/kimi-k2.5", ""), - ("minimax/minimax-m2.5", ""), + ("minimax/minimax-m2.7", ""), + ("x-ai/grok-4.20-beta", ""), + ("nvidia/nemotron-3-super-120b-a12b:free", "free"), + ("arcee-ai/trinity-large-preview:free", "free"), + ("z-ai/glm-5-turbo", ""), + ("openrouter/hunter-alpha", ""), ] _PROVIDER_MODELS: dict[str, list[str]] = { @@ -61,11 +66,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "kimi-k2-0905-preview", ], "minimax": [ + "MiniMax-M2.7", + "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1", ], "minimax-cn": [ + "MiniMax-M2.7", + "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1", diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index e3b5ed7d4..46c7eea96 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -57,8 +57,8 @@ def _set_default_model(config: Dict[str, Any], model_name: str) -> None: _DEFAULT_PROVIDER_MODELS = { "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"], "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], - "minimax": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"], - "minimax-cn": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"], + "minimax": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"], + "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"], "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"], "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"], } diff --git a/tests/test_fallback_model.py b/tests/test_fallback_model.py index 9e34bf749..df2bc9cb5 100644 --- a/tests/test_fallback_model.py +++ b/tests/test_fallback_model.py @@ -131,7 +131,7 @@ class TestTryActivateFallback: def test_activates_minimax_fallback(self): agent = _make_agent( - fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"}, + fallback_model={"provider": "minimax", "model": "MiniMax-M2.7"}, ) mock_client = _mock_resolve( api_key="sk-mm-key", @@ -139,10 +139,10 @@ class TestTryActivateFallback: ) with patch( "agent.auxiliary_client.resolve_provider_client", - return_value=(mock_client, "MiniMax-M2.5"), + return_value=(mock_client, "MiniMax-M2.7"), ): assert agent._try_activate_fallback() is True - assert agent.model == "MiniMax-M2.5" + assert agent.model == "MiniMax-M2.7" assert agent.provider == "minimax" assert agent.client is mock_client @@ -165,7 +165,7 @@ class TestTryActivateFallback: def test_returns_false_when_no_api_key(self): """Fallback should fail gracefully when the API key env var is unset.""" agent = _make_agent( - fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"}, + fallback_model={"provider": "minimax", "model": "MiniMax-M2.7"}, ) with patch( "agent.auxiliary_client.resolve_provider_client", diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py index 6d32bd538..566a2fb33 100644 --- a/tools/rl_training_tool.py +++ b/tools/rl_training_tool.py @@ -1009,7 +1009,7 @@ async def rl_list_runs() -> str: TEST_MODELS = [ {"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"}, {"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"}, - {"id": "minimax/minimax-m2.5", "name": "MiniMax M2.5", "scale": "large"}, + {"id": "minimax/minimax-m2.7", "name": "MiniMax M2.7", "scale": "large"}, ] # Default test parameters - quick but representative @@ -1370,7 +1370,7 @@ RL_CHECK_STATUS_SCHEMA = {"name": "rl_check_status", "description": "Get status RL_STOP_TRAINING_SCHEMA = {"name": "rl_stop_training", "description": "Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to stop"}}, "required": ["run_id"]}} RL_GET_RESULTS_SCHEMA = {"name": "rl_get_results", "description": "Get final results and metrics for a completed training run. Returns final metrics and path to trained weights.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to get results for"}}, "required": ["run_id"]}} RL_LIST_RUNS_SCHEMA = {"name": "rl_list_runs", "description": "List all training runs (active and completed) with their status.", "parameters": {"type": "object", "properties": {}, "required": []}} -RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.5"}}, "required": []}} +RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.7"}}, "required": []}} _rl_env = ["TINKER_API_KEY", "WANDB_API_KEY"] diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 8ee4d3095..878982b28 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -131,11 +131,11 @@ hermes chat --provider kimi-coding --model moonshot-v1-auto # Requires: KIMI_API_KEY in ~/.hermes/.env # MiniMax (global endpoint) -hermes chat --provider minimax --model MiniMax-Text-01 +hermes chat --provider minimax --model MiniMax-M2.7 # Requires: MINIMAX_API_KEY in ~/.hermes/.env # MiniMax (China endpoint) -hermes chat --provider minimax-cn --model MiniMax-Text-01 +hermes chat --provider minimax-cn --model MiniMax-M2.7 # Requires: MINIMAX_CN_API_KEY in ~/.hermes/.env # Alibaba Cloud / DashScope (Qwen models) diff --git a/website/docs/user-guide/features/rl-training.md b/website/docs/user-guide/features/rl-training.md index 440cc31b5..9c5d71952 100644 --- a/website/docs/user-guide/features/rl-training.md +++ b/website/docs/user-guide/features/rl-training.md @@ -147,7 +147,7 @@ Default configuration: - Tests 3 models at different scales for robustness: - `qwen/qwen3-8b` (small) - `z-ai/glm-4.7-flash` (medium) - - `minimax/minimax-m2.5` (large) + - `minimax/minimax-m2.7` (large) - Total: ~144 rollouts This validates: