feat: upgrade MiniMax default to M2.7 + add new OpenRouter models
MiniMax: Add M2.7 and M2.7-highspeed as new defaults across provider model lists, auxiliary client, metadata, setup wizard, RL training tool, fallback tests, and docs. Retain M2.5/M2.1 as alternatives. OpenRouter: Add grok-4.20-beta, nemotron-3-super-120b-a12b:free, trinity-large-preview:free, glm-5-turbo, and hunter-alpha to the model catalog. MiniMax changes based on PR #1882 by @octo-patch (applied manually due to stale conflicts in refactored pricing module).
This commit is contained in:
@@ -55,8 +55,8 @@ logger = logging.getLogger(__name__)
|
|||||||
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||||
"zai": "glm-4.5-flash",
|
"zai": "glm-4.5-flash",
|
||||||
"kimi-coding": "kimi-k2-turbo-preview",
|
"kimi-coding": "kimi-k2-turbo-preview",
|
||||||
"minimax": "MiniMax-M2.5-highspeed",
|
"minimax": "MiniMax-M2.7-highspeed",
|
||||||
"minimax-cn": "MiniMax-M2.5-highspeed",
|
"minimax-cn": "MiniMax-M2.7-highspeed",
|
||||||
"anthropic": "claude-haiku-4-5-20251001",
|
"anthropic": "claude-haiku-4-5-20251001",
|
||||||
"ai-gateway": "google/gemini-3-flash",
|
"ai-gateway": "google/gemini-3-flash",
|
||||||
"opencode-zen": "gemini-3-flash",
|
"opencode-zen": "gemini-3-flash",
|
||||||
|
|||||||
@@ -77,6 +77,8 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||||||
"kimi-k2-thinking-turbo": 262144,
|
"kimi-k2-thinking-turbo": 262144,
|
||||||
"kimi-k2-turbo-preview": 262144,
|
"kimi-k2-turbo-preview": 262144,
|
||||||
"kimi-k2-0905-preview": 131072,
|
"kimi-k2-0905-preview": 131072,
|
||||||
|
"MiniMax-M2.7": 204800,
|
||||||
|
"MiniMax-M2.7-highspeed": 204800,
|
||||||
"MiniMax-M2.5": 204800,
|
"MiniMax-M2.5": 204800,
|
||||||
"MiniMax-M2.5-highspeed": 204800,
|
"MiniMax-M2.5-highspeed": 204800,
|
||||||
"MiniMax-M2.1": 204800,
|
"MiniMax-M2.1": 204800,
|
||||||
|
|||||||
@@ -28,7 +28,12 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
|||||||
("stepfun/step-3.5-flash", ""),
|
("stepfun/step-3.5-flash", ""),
|
||||||
("z-ai/glm-5", ""),
|
("z-ai/glm-5", ""),
|
||||||
("moonshotai/kimi-k2.5", ""),
|
("moonshotai/kimi-k2.5", ""),
|
||||||
("minimax/minimax-m2.5", ""),
|
("minimax/minimax-m2.7", ""),
|
||||||
|
("x-ai/grok-4.20-beta", ""),
|
||||||
|
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
|
||||||
|
("arcee-ai/trinity-large-preview:free", "free"),
|
||||||
|
("z-ai/glm-5-turbo", ""),
|
||||||
|
("openrouter/hunter-alpha", ""),
|
||||||
]
|
]
|
||||||
|
|
||||||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||||
@@ -61,11 +66,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||||||
"kimi-k2-0905-preview",
|
"kimi-k2-0905-preview",
|
||||||
],
|
],
|
||||||
"minimax": [
|
"minimax": [
|
||||||
|
"MiniMax-M2.7",
|
||||||
|
"MiniMax-M2.7-highspeed",
|
||||||
"MiniMax-M2.5",
|
"MiniMax-M2.5",
|
||||||
"MiniMax-M2.5-highspeed",
|
"MiniMax-M2.5-highspeed",
|
||||||
"MiniMax-M2.1",
|
"MiniMax-M2.1",
|
||||||
],
|
],
|
||||||
"minimax-cn": [
|
"minimax-cn": [
|
||||||
|
"MiniMax-M2.7",
|
||||||
|
"MiniMax-M2.7-highspeed",
|
||||||
"MiniMax-M2.5",
|
"MiniMax-M2.5",
|
||||||
"MiniMax-M2.5-highspeed",
|
"MiniMax-M2.5-highspeed",
|
||||||
"MiniMax-M2.1",
|
"MiniMax-M2.1",
|
||||||
|
|||||||
@@ -57,8 +57,8 @@ def _set_default_model(config: Dict[str, Any], model_name: str) -> None:
|
|||||||
_DEFAULT_PROVIDER_MODELS = {
|
_DEFAULT_PROVIDER_MODELS = {
|
||||||
"zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
|
"zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
|
||||||
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
|
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
|
||||||
"minimax": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
|
"minimax": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
|
||||||
"minimax-cn": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
|
"minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
|
||||||
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
|
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
|
||||||
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
|
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -131,7 +131,7 @@ class TestTryActivateFallback:
|
|||||||
|
|
||||||
def test_activates_minimax_fallback(self):
|
def test_activates_minimax_fallback(self):
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
|
fallback_model={"provider": "minimax", "model": "MiniMax-M2.7"},
|
||||||
)
|
)
|
||||||
mock_client = _mock_resolve(
|
mock_client = _mock_resolve(
|
||||||
api_key="sk-mm-key",
|
api_key="sk-mm-key",
|
||||||
@@ -139,10 +139,10 @@ class TestTryActivateFallback:
|
|||||||
)
|
)
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.resolve_provider_client",
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
return_value=(mock_client, "MiniMax-M2.5"),
|
return_value=(mock_client, "MiniMax-M2.7"),
|
||||||
):
|
):
|
||||||
assert agent._try_activate_fallback() is True
|
assert agent._try_activate_fallback() is True
|
||||||
assert agent.model == "MiniMax-M2.5"
|
assert agent.model == "MiniMax-M2.7"
|
||||||
assert agent.provider == "minimax"
|
assert agent.provider == "minimax"
|
||||||
assert agent.client is mock_client
|
assert agent.client is mock_client
|
||||||
|
|
||||||
@@ -165,7 +165,7 @@ class TestTryActivateFallback:
|
|||||||
def test_returns_false_when_no_api_key(self):
|
def test_returns_false_when_no_api_key(self):
|
||||||
"""Fallback should fail gracefully when the API key env var is unset."""
|
"""Fallback should fail gracefully when the API key env var is unset."""
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
|
fallback_model={"provider": "minimax", "model": "MiniMax-M2.7"},
|
||||||
)
|
)
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.resolve_provider_client",
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
|||||||
@@ -1009,7 +1009,7 @@ async def rl_list_runs() -> str:
|
|||||||
TEST_MODELS = [
|
TEST_MODELS = [
|
||||||
{"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
|
{"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
|
||||||
{"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"},
|
{"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"},
|
||||||
{"id": "minimax/minimax-m2.5", "name": "MiniMax M2.5", "scale": "large"},
|
{"id": "minimax/minimax-m2.7", "name": "MiniMax M2.7", "scale": "large"},
|
||||||
]
|
]
|
||||||
|
|
||||||
# Default test parameters - quick but representative
|
# Default test parameters - quick but representative
|
||||||
@@ -1370,7 +1370,7 @@ RL_CHECK_STATUS_SCHEMA = {"name": "rl_check_status", "description": "Get status
|
|||||||
RL_STOP_TRAINING_SCHEMA = {"name": "rl_stop_training", "description": "Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to stop"}}, "required": ["run_id"]}}
|
RL_STOP_TRAINING_SCHEMA = {"name": "rl_stop_training", "description": "Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to stop"}}, "required": ["run_id"]}}
|
||||||
RL_GET_RESULTS_SCHEMA = {"name": "rl_get_results", "description": "Get final results and metrics for a completed training run. Returns final metrics and path to trained weights.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to get results for"}}, "required": ["run_id"]}}
|
RL_GET_RESULTS_SCHEMA = {"name": "rl_get_results", "description": "Get final results and metrics for a completed training run. Returns final metrics and path to trained weights.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to get results for"}}, "required": ["run_id"]}}
|
||||||
RL_LIST_RUNS_SCHEMA = {"name": "rl_list_runs", "description": "List all training runs (active and completed) with their status.", "parameters": {"type": "object", "properties": {}, "required": []}}
|
RL_LIST_RUNS_SCHEMA = {"name": "rl_list_runs", "description": "List all training runs (active and completed) with their status.", "parameters": {"type": "object", "properties": {}, "required": []}}
|
||||||
RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.5"}}, "required": []}}
|
RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.7"}}, "required": []}}
|
||||||
|
|
||||||
_rl_env = ["TINKER_API_KEY", "WANDB_API_KEY"]
|
_rl_env = ["TINKER_API_KEY", "WANDB_API_KEY"]
|
||||||
|
|
||||||
|
|||||||
@@ -131,11 +131,11 @@ hermes chat --provider kimi-coding --model moonshot-v1-auto
|
|||||||
# Requires: KIMI_API_KEY in ~/.hermes/.env
|
# Requires: KIMI_API_KEY in ~/.hermes/.env
|
||||||
|
|
||||||
# MiniMax (global endpoint)
|
# MiniMax (global endpoint)
|
||||||
hermes chat --provider minimax --model MiniMax-Text-01
|
hermes chat --provider minimax --model MiniMax-M2.7
|
||||||
# Requires: MINIMAX_API_KEY in ~/.hermes/.env
|
# Requires: MINIMAX_API_KEY in ~/.hermes/.env
|
||||||
|
|
||||||
# MiniMax (China endpoint)
|
# MiniMax (China endpoint)
|
||||||
hermes chat --provider minimax-cn --model MiniMax-Text-01
|
hermes chat --provider minimax-cn --model MiniMax-M2.7
|
||||||
# Requires: MINIMAX_CN_API_KEY in ~/.hermes/.env
|
# Requires: MINIMAX_CN_API_KEY in ~/.hermes/.env
|
||||||
|
|
||||||
# Alibaba Cloud / DashScope (Qwen models)
|
# Alibaba Cloud / DashScope (Qwen models)
|
||||||
|
|||||||
@@ -147,7 +147,7 @@ Default configuration:
|
|||||||
- Tests 3 models at different scales for robustness:
|
- Tests 3 models at different scales for robustness:
|
||||||
- `qwen/qwen3-8b` (small)
|
- `qwen/qwen3-8b` (small)
|
||||||
- `z-ai/glm-4.7-flash` (medium)
|
- `z-ai/glm-4.7-flash` (medium)
|
||||||
- `minimax/minimax-m2.5` (large)
|
- `minimax/minimax-m2.7` (large)
|
||||||
- Total: ~144 rollouts
|
- Total: ~144 rollouts
|
||||||
|
|
||||||
This validates:
|
This validates:
|
||||||
|
|||||||
Reference in New Issue
Block a user