Support Qwen3 (#3945)

Jintao-Huang · web-flow · commit 225c4830481a · 2025-04-21T11:17:48.000+08:00
diff --git a/swift/llm/model/constant.py b/swift/llm/model/constant.py
@@ -12,6 +12,8 @@ class LLMModelType:
     qwen2_moe = 'qwen2_moe'
     qwq_preview = 'qwq_preview'
     qwq = 'qwq'
+    qwen3 = 'qwen3'
+    qwen3_moe = 'qwen3_moe'
 
     qwen2_gte = 'qwen2_gte'
 
diff --git a/swift/llm/model/model/qwen.py b/swift/llm/model/model/qwen.py
@@ -487,8 +487,36 @@ def _get_cast_dtype(self) -> torch.dtype:
         get_model_tokenizer_with_flash_attn,
         architectures=['Qwen2MoeForCausalLM'],
         requires=['transformers>=4.40'],
+    ))
+
+register_model(
+    ModelMeta(
+        LLMModelType.qwen3,
+        [
+            ModelGroup([
+                # Model('Qwen/Qwen3-0.6B-Base', 'Qwen/Qwen3-0.6B-Base'),
+            ]),
+        ],
+        TemplateType.qwen,
+        get_model_tokenizer_with_flash_attn,
+        architectures=['Qwen3ForCausalLM'],
+        requires=['transformers>=4.51'],
         model_arch=ModelArch.llama))
 
+register_model(
+    ModelMeta(
+        LLMModelType.qwen3_moe,
+        [
+            ModelGroup([
+                # Model('Qwen/Qwen3-15B-A2B-Base', 'Qwen/Qwen3-15B-A2B-Base'),
+            ]),
+        ],
+        TemplateType.qwen,
+        get_model_tokenizer_with_flash_attn,
+        architectures=['Qwen3MoeForCausalLM'],
+        requires=['transformers>=4.51'],
+    ))
+
 
 def patch_qwen_vl_utils(vision_process):
     if hasattr(vision_process, '_patch'):