Merge PR #473: Update model id in OpenRouter from minimax-m2.1 to minimax-m2.5

Authored by tars90percent. Updates remaining minimax-m2.1 references to minimax-m2.5 in rl_training_tool.py and docs.
2026-03-06 18:43:18 -08:00
parent 3c6c11b7c9 32636ecf8a
commit 479dfc096a
2 changed files with 3 additions and 3 deletions
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -993,7 +993,7 @@ async def rl_list_runs() -> str:
 TEST_MODELS = [
    {"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
    {"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"},
-    {"id": "minimax/minimax-m2.1", "name": "MiniMax M2.1", "scale": "large"},
+    {"id": "minimax/minimax-m2.5", "name": "MiniMax M2.5", "scale": "large"},
 ]

 # Default test parameters - quick but representative
@@ -1353,7 +1353,7 @@ RL_CHECK_STATUS_SCHEMA = {"name": "rl_check_status", "description": "Get status
 RL_STOP_TRAINING_SCHEMA = {"name": "rl_stop_training", "description": "Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to stop"}}, "required": ["run_id"]}}
 RL_GET_RESULTS_SCHEMA = {"name": "rl_get_results", "description": "Get final results and metrics for a completed training run. Returns final metrics and path to trained weights.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to get results for"}}, "required": ["run_id"]}}
 RL_LIST_RUNS_SCHEMA = {"name": "rl_list_runs", "description": "List all training runs (active and completed) with their status.", "parameters": {"type": "object", "properties": {}, "required": []}}
-RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.1"}}, "required": []}}
+RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.5"}}, "required": []}}

 _rl_env = ["TINKER_API_KEY", "WANDB_API_KEY"]

--- a/website/docs/user-guide/features/rl-training.md
+++ b/website/docs/user-guide/features/rl-training.md
@@ -147,7 +147,7 @@ Default configuration:
 - Tests 3 models at different scales for robustness:
  - `qwen/qwen3-8b` (small)
  - `z-ai/glm-4.7-flash` (medium)
-  - `minimax/minimax-m2.1` (large)
+  - `minimax/minimax-m2.5` (large)
 - Total: ~144 rollouts

 This validates: