update swift docker

modelscope · Jintao-Huang · Apr 14, 2025 · Apr 14, 2025 · Apr 14, 2025 · Apr 14, 2025
commit 6d9fef249e2d0c4de1a371e85e612b345b12abf7
diff --git a/docs/source/GetStarted/SWIFT安装.md b/docs/source/GetStarted/SWIFT安装.md
@@ -38,6 +38,10 @@ pip install ms-swift==2.*
 ## 镜像
 
 ```
+# vllm0.8.3 (该版本vllm可能导致部分GRPO训练卡住，GRPO建议优先使用vllm0.7.3)
+modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.6.0-vllm0.8.3-modelscope1.25.0-swift3.3.0.post1
+
+# vllm0.7.3
 modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.5.1-modelscope1.25.0-swift3.2.2
 ```
 

diff --git a/docs/source/Instruction/Megatron-SWIFT训练.md b/docs/source/Instruction/Megatron-SWIFT训练.md
@@ -20,7 +20,7 @@ pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation -
 
 或者你也可以使用镜像：
 ```
-modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.5.1-modelscope1.25.0-swift3.2.2
+modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.6.0-vllm0.8.3-modelscope1.25.0-swift3.3.0.post1
 ```
 
 依赖库Megatron-LM将会由swift进行git clone并安装，不需要用户手动安装。你也可以通过环境变量`MEGATRON_LM_PATH`指向已经下载好的repo路径（断网环境，[core_r0.11.0分支](https://github.com/NVIDIA/Megatron-LM/tree/core_r0.11.0)）。

diff --git a/docs/source_en/GetStarted/SWIFT-installation.md b/docs/source_en/GetStarted/SWIFT-installation.md
@@ -39,6 +39,10 @@ pip install ms-swift==2.*
 ## Mirror
 
 ```
+# vllm0.8.3 (This version of vllm may cause some GRPO training to get stuck; it is recommended to use vllm0.7.3 for GRPO training as a priority).
+modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.6.0-vllm0.8.3-modelscope1.25.0-swift3.3.0.post1
+
+# vllm0.7.3
 modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.5.1-modelscope1.25.0-swift3.2.2
 ```
 

diff --git a/docs/source_en/Instruction/Megatron-SWIFT-Training.md b/docs/source_en/Instruction/Megatron-SWIFT-Training.md
@@ -21,7 +21,7 @@ pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation -
 
 Alternatively, you can also use the image:
 ```
-modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.5.1-modelscope1.25.0-swift3.2.2
+modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda12.4.0-py311-torch2.6.0-vllm0.8.3-modelscope1.25.0-swift3.3.0.post1
 ```
 
 The dependency library Megatron-LM will be git cloned and installed by swift, no manual installation by the user is required. You can also use the environment variable `MEGATRON_LM_PATH` to point to the already downloaded repo path (for offline environments, use the [core_r0.11.0 branch](https://github.com/NVIDIA/Megatron-LM/tree/core_r0.11.0)).

diff --git a/requirements/install_all.sh b/requirements/install_all.sh
@@ -7,5 +7,6 @@ pip install auto_gptq optimum bitsandbytes -U
 pip install git+https://github.com/modelscope/ms-swift.git#egg=ms-swift[all]
 pip install timm -U
 pip install deepspeed -U
-pip install qwen_vl_utils qwen_omni_utils decord librosa pyav icecream soundfile liger_kernel -U
+pip install qwen_vl_utils qwen_omni_utils decord librosa pyav icecream soundfile -U
+pip install liger_kernel nvitop -U
 # flash-attn: https://github.com/Dao-AILab/flash-attention/releases
diff --git a/swift/llm/argument/base_args/model_args.py b/swift/llm/argument/base_args/model_args.py
@@ -40,7 +40,7 @@ class ModelArguments:
     torch_dtype: Literal['bfloat16', 'float16', 'float32', None] = None
     # flash_attn: It will automatically convert names based on the model.
     # None: It will be automatically selected between sdpa and eager.
-    attn_impl: Literal['flash_attn', 'sdpa', 'eager', None] = None
+    attn_impl: Literal['flash_attn', 'sdpa', 'eager', 'flex_attention', None] = None
 
     num_labels: Optional[int] = None
     problem_type: Literal['regression', 'single_label_classification', 'multi_label_classification'] = None