From 32636ecf8a751ee84ab44c140ec1b8174a6d7eef Mon Sep 17 00:00:00 2001 From: tars90percent Date: Fri, 6 Mar 2026 16:47:48 +0800 Subject: [PATCH] Update MiniMax model ID from m2.1 to m2.5 --- hermes_cli/models.py | 2 +- tools/rl_training_tool.py | 4 ++-- website/docs/user-guide/features/rl-training.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 90441b18d..3529b4743 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -17,7 +17,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ ("google/gemini-3-flash-preview", ""), ("z-ai/glm-4.7", ""), ("moonshotai/kimi-k2.5", ""), - ("minimax/minimax-m2.1", ""), + ("minimax/minimax-m2.5", ""), ] diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py index b98a07d56..6ffa6e237 100644 --- a/tools/rl_training_tool.py +++ b/tools/rl_training_tool.py @@ -993,7 +993,7 @@ async def rl_list_runs() -> str: TEST_MODELS = [ {"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"}, {"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"}, - {"id": "minimax/minimax-m2.1", "name": "MiniMax M2.1", "scale": "large"}, + {"id": "minimax/minimax-m2.5", "name": "MiniMax M2.5", "scale": "large"}, ] # Default test parameters - quick but representative @@ -1353,7 +1353,7 @@ RL_CHECK_STATUS_SCHEMA = {"name": "rl_check_status", "description": "Get status RL_STOP_TRAINING_SCHEMA = {"name": "rl_stop_training", "description": "Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to stop"}}, "required": ["run_id"]}} RL_GET_RESULTS_SCHEMA = {"name": "rl_get_results", "description": "Get final results and metrics for a completed training run. Returns final metrics and path to trained weights.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to get results for"}}, "required": ["run_id"]}} RL_LIST_RUNS_SCHEMA = {"name": "rl_list_runs", "description": "List all training runs (active and completed) with their status.", "parameters": {"type": "object", "properties": {}, "required": []}} -RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.1"}}, "required": []}} +RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.5"}}, "required": []}} _rl_env = ["TINKER_API_KEY", "WANDB_API_KEY"] diff --git a/website/docs/user-guide/features/rl-training.md b/website/docs/user-guide/features/rl-training.md index 90273441f..862403958 100644 --- a/website/docs/user-guide/features/rl-training.md +++ b/website/docs/user-guide/features/rl-training.md @@ -147,7 +147,7 @@ Default configuration: - Tests 3 models at different scales for robustness: - `qwen/qwen3-8b` (small) - `z-ai/glm-4.7-flash` (medium) - - `minimax/minimax-m2.1` (large) + - `minimax/minimax-m2.5` (large) - Total: ~144 rollouts This validates: