From 32636ecf8a751ee84ab44c140ec1b8174a6d7eef Mon Sep 17 00:00:00 2001
From: tars90percent <tars@minimaxi.com>
Date: Fri, 6 Mar 2026 16:47:48 +0800
Subject: [PATCH] Update MiniMax model ID from m2.1 to m2.5

---
 hermes_cli/models.py                            | 2 +-
 tools/rl_training_tool.py                       | 4 ++--
 website/docs/user-guide/features/rl-training.md | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 90441b18d..3529b4743 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -17,7 +17,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("google/gemini-3-flash-preview",   ""),
     ("z-ai/glm-4.7",                    ""),
     ("moonshotai/kimi-k2.5",            ""),
-    ("minimax/minimax-m2.1",            ""),
+    ("minimax/minimax-m2.5",            ""),
 ]
 
 
diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
index b98a07d56..6ffa6e237 100644
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -993,7 +993,7 @@ async def rl_list_runs() -> str:
 TEST_MODELS = [
     {"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
     {"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"},
-    {"id": "minimax/minimax-m2.1", "name": "MiniMax M2.1", "scale": "large"},
+    {"id": "minimax/minimax-m2.5", "name": "MiniMax M2.5", "scale": "large"},
 ]
 
 # Default test parameters - quick but representative
@@ -1353,7 +1353,7 @@ RL_CHECK_STATUS_SCHEMA = {"name": "rl_check_status", "description": "Get status
 RL_STOP_TRAINING_SCHEMA = {"name": "rl_stop_training", "description": "Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to stop"}}, "required": ["run_id"]}}
 RL_GET_RESULTS_SCHEMA = {"name": "rl_get_results", "description": "Get final results and metrics for a completed training run. Returns final metrics and path to trained weights.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to get results for"}}, "required": ["run_id"]}}
 RL_LIST_RUNS_SCHEMA = {"name": "rl_list_runs", "description": "List all training runs (active and completed) with their status.", "parameters": {"type": "object", "properties": {}, "required": []}}
-RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.1"}}, "required": []}}
+RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.5"}}, "required": []}}
 
 _rl_env = ["TINKER_API_KEY", "WANDB_API_KEY"]
 
diff --git a/website/docs/user-guide/features/rl-training.md b/website/docs/user-guide/features/rl-training.md
index 90273441f..862403958 100644
--- a/website/docs/user-guide/features/rl-training.md
+++ b/website/docs/user-guide/features/rl-training.md
@@ -147,7 +147,7 @@ Default configuration:
 - Tests 3 models at different scales for robustness:
   - `qwen/qwen3-8b` (small)
   - `z-ai/glm-4.7-flash` (medium)
-  - `minimax/minimax-m2.1` (large)
+  - `minimax/minimax-m2.5` (large)
 - Total: ~144 rollouts
 
 This validates: