From e4043633fcf852c604311d5e09bc32a0ed3150cc Mon Sep 17 00:00:00 2001
From: octo-patch <octo-patch@users.noreply.github.com>
Date: Wed, 18 Mar 2026 02:42:58 -0700
Subject: [PATCH] feat: upgrade MiniMax default to M2.7 + add new OpenRouter
 models

MiniMax: Add M2.7 and M2.7-highspeed as new defaults across provider
model lists, auxiliary client, metadata, setup wizard, RL training tool,
fallback tests, and docs. Retain M2.5/M2.1 as alternatives.

OpenRouter: Add grok-4.20-beta, nemotron-3-super-120b-a12b:free,
trinity-large-preview:free, glm-5-turbo, and hunter-alpha to the
model catalog.

MiniMax changes based on PR #1882 by @octo-patch (applied manually
due to stale conflicts in refactored pricing module).
---
 agent/auxiliary_client.py                       |  4 ++--
 agent/model_metadata.py                         |  2 ++
 hermes_cli/models.py                            | 11 ++++++++++-
 hermes_cli/setup.py                             |  4 ++--
 tests/test_fallback_model.py                    |  8 ++++----
 tools/rl_training_tool.py                       |  4 ++--
 website/docs/user-guide/configuration.md        |  4 ++--
 website/docs/user-guide/features/rl-training.md |  2 +-
 8 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 94be9d6fe..c10490834 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -55,8 +55,8 @@ logger = logging.getLogger(__name__)
 _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
     "zai": "glm-4.5-flash",
     "kimi-coding": "kimi-k2-turbo-preview",
-    "minimax": "MiniMax-M2.5-highspeed",
-    "minimax-cn": "MiniMax-M2.5-highspeed",
+    "minimax": "MiniMax-M2.7-highspeed",
+    "minimax-cn": "MiniMax-M2.7-highspeed",
     "anthropic": "claude-haiku-4-5-20251001",
     "ai-gateway": "google/gemini-3-flash",
     "opencode-zen": "gemini-3-flash",
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index cd847aa82..fb0d38466 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -77,6 +77,8 @@ DEFAULT_CONTEXT_LENGTHS = {
     "kimi-k2-thinking-turbo": 262144,
     "kimi-k2-turbo-preview": 262144,
     "kimi-k2-0905-preview": 131072,
+    "MiniMax-M2.7": 204800,
+    "MiniMax-M2.7-highspeed": 204800,
     "MiniMax-M2.5": 204800,
     "MiniMax-M2.5-highspeed": 204800,
     "MiniMax-M2.1": 204800,
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 174aa9475..5a3f871f9 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -28,7 +28,12 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("stepfun/step-3.5-flash",          ""),
     ("z-ai/glm-5",                      ""),
     ("moonshotai/kimi-k2.5",            ""),
-    ("minimax/minimax-m2.5",            ""),
+    ("minimax/minimax-m2.7",            ""),
+    ("x-ai/grok-4.20-beta",             ""),
+    ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
+    ("arcee-ai/trinity-large-preview:free", "free"),
+    ("z-ai/glm-5-turbo",                ""),
+    ("openrouter/hunter-alpha",          ""),
 ]
 
 _PROVIDER_MODELS: dict[str, list[str]] = {
@@ -61,11 +66,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "kimi-k2-0905-preview",
     ],
     "minimax": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
         "MiniMax-M2.5",
         "MiniMax-M2.5-highspeed",
         "MiniMax-M2.1",
     ],
     "minimax-cn": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
         "MiniMax-M2.5",
         "MiniMax-M2.5-highspeed",
         "MiniMax-M2.1",
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index e3b5ed7d4..46c7eea96 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -57,8 +57,8 @@ def _set_default_model(config: Dict[str, Any], model_name: str) -> None:
 _DEFAULT_PROVIDER_MODELS = {
     "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
     "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
-    "minimax": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
-    "minimax-cn": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
+    "minimax": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
+    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
     "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
     "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
 }
diff --git a/tests/test_fallback_model.py b/tests/test_fallback_model.py
index 9e34bf749..df2bc9cb5 100644
--- a/tests/test_fallback_model.py
+++ b/tests/test_fallback_model.py
@@ -131,7 +131,7 @@ class TestTryActivateFallback:
 
     def test_activates_minimax_fallback(self):
         agent = _make_agent(
-            fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
+            fallback_model={"provider": "minimax", "model": "MiniMax-M2.7"},
         )
         mock_client = _mock_resolve(
             api_key="sk-mm-key",
@@ -139,10 +139,10 @@ class TestTryActivateFallback:
         )
         with patch(
             "agent.auxiliary_client.resolve_provider_client",
-            return_value=(mock_client, "MiniMax-M2.5"),
+            return_value=(mock_client, "MiniMax-M2.7"),
         ):
             assert agent._try_activate_fallback() is True
-            assert agent.model == "MiniMax-M2.5"
+            assert agent.model == "MiniMax-M2.7"
             assert agent.provider == "minimax"
             assert agent.client is mock_client
 
@@ -165,7 +165,7 @@ class TestTryActivateFallback:
     def test_returns_false_when_no_api_key(self):
         """Fallback should fail gracefully when the API key env var is unset."""
         agent = _make_agent(
-            fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
+            fallback_model={"provider": "minimax", "model": "MiniMax-M2.7"},
         )
         with patch(
             "agent.auxiliary_client.resolve_provider_client",
diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
index 6d32bd538..566a2fb33 100644
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -1009,7 +1009,7 @@ async def rl_list_runs() -> str:
 TEST_MODELS = [
     {"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
     {"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"},
-    {"id": "minimax/minimax-m2.5", "name": "MiniMax M2.5", "scale": "large"},
+    {"id": "minimax/minimax-m2.7", "name": "MiniMax M2.7", "scale": "large"},
 ]
 
 # Default test parameters - quick but representative
@@ -1370,7 +1370,7 @@ RL_CHECK_STATUS_SCHEMA = {"name": "rl_check_status", "description": "Get status
 RL_STOP_TRAINING_SCHEMA = {"name": "rl_stop_training", "description": "Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to stop"}}, "required": ["run_id"]}}
 RL_GET_RESULTS_SCHEMA = {"name": "rl_get_results", "description": "Get final results and metrics for a completed training run. Returns final metrics and path to trained weights.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to get results for"}}, "required": ["run_id"]}}
 RL_LIST_RUNS_SCHEMA = {"name": "rl_list_runs", "description": "List all training runs (active and completed) with their status.", "parameters": {"type": "object", "properties": {}, "required": []}}
-RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.5"}}, "required": []}}
+RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.7"}}, "required": []}}
 
 _rl_env = ["TINKER_API_KEY", "WANDB_API_KEY"]
 
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 8ee4d3095..878982b28 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -131,11 +131,11 @@ hermes chat --provider kimi-coding --model moonshot-v1-auto
 # Requires: KIMI_API_KEY in ~/.hermes/.env
 
 # MiniMax (global endpoint)
-hermes chat --provider minimax --model MiniMax-Text-01
+hermes chat --provider minimax --model MiniMax-M2.7
 # Requires: MINIMAX_API_KEY in ~/.hermes/.env
 
 # MiniMax (China endpoint)
-hermes chat --provider minimax-cn --model MiniMax-Text-01
+hermes chat --provider minimax-cn --model MiniMax-M2.7
 # Requires: MINIMAX_CN_API_KEY in ~/.hermes/.env
 
 # Alibaba Cloud / DashScope (Qwen models)
diff --git a/website/docs/user-guide/features/rl-training.md b/website/docs/user-guide/features/rl-training.md
index 440cc31b5..9c5d71952 100644
--- a/website/docs/user-guide/features/rl-training.md
+++ b/website/docs/user-guide/features/rl-training.md
@@ -147,7 +147,7 @@ Default configuration:
 - Tests 3 models at different scales for robustness:
   - `qwen/qwen3-8b` (small)
   - `z-ai/glm-4.7-flash` (medium)
-  - `minimax/minimax-m2.5` (large)
+  - `minimax/minimax-m2.7` (large)
 - Total: ~144 rollouts
 
 This validates: