Skip to content

Commit 225c483

Browse files
authored
Support Qwen3 (#3945)
1 parent 04e226d commit 225c483

File tree

2 files changed

+30
-0
lines changed

2 files changed

+30
-0
lines changed

swift/llm/model/constant.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ class LLMModelType:
1212
qwen2_moe = 'qwen2_moe'
1313
qwq_preview = 'qwq_preview'
1414
qwq = 'qwq'
15+
qwen3 = 'qwen3'
16+
qwen3_moe = 'qwen3_moe'
1517

1618
qwen2_gte = 'qwen2_gte'
1719

swift/llm/model/model/qwen.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,8 +487,36 @@ def _get_cast_dtype(self) -> torch.dtype:
487487
get_model_tokenizer_with_flash_attn,
488488
architectures=['Qwen2MoeForCausalLM'],
489489
requires=['transformers>=4.40'],
490+
))
491+
492+
register_model(
493+
ModelMeta(
494+
LLMModelType.qwen3,
495+
[
496+
ModelGroup([
497+
# Model('Qwen/Qwen3-0.6B-Base', 'Qwen/Qwen3-0.6B-Base'),
498+
]),
499+
],
500+
TemplateType.qwen,
501+
get_model_tokenizer_with_flash_attn,
502+
architectures=['Qwen3ForCausalLM'],
503+
requires=['transformers>=4.51'],
490504
model_arch=ModelArch.llama))
491505

506+
register_model(
507+
ModelMeta(
508+
LLMModelType.qwen3_moe,
509+
[
510+
ModelGroup([
511+
# Model('Qwen/Qwen3-15B-A2B-Base', 'Qwen/Qwen3-15B-A2B-Base'),
512+
]),
513+
],
514+
TemplateType.qwen,
515+
get_model_tokenizer_with_flash_attn,
516+
architectures=['Qwen3MoeForCausalLM'],
517+
requires=['transformers>=4.51'],
518+
))
519+
492520

493521
def patch_qwen_vl_utils(vision_process):
494522
if hasattr(vision_process, '_patch'):

0 commit comments

Comments
 (0)