Skip to content

Commit 04e226d

Browse files
authored
update qwen2_5_omni (#3908)
1 parent 5fe84bb commit 04e226d

File tree

5 files changed

+7
-7
lines changed

5 files changed

+7
-7
lines changed

examples/train/grpo/qwen2_5_omni/grpo.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# 4 * 50GiB
22
pip uninstall transformers
3-
pip install git+https://github.com/huggingface/transformers@f742a644ca32e65758c3adb36225aef1731bd2a8
3+
pip install git+https://github.com/huggingface/transformers
44
pip install math_verify trl -U
55

66
MAX_PIXELS=1003520 \

examples/train/multimodal/omni/sft.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# 4*35GB
22
# A demo for four modalities that can be run directly
33
pip uninstall transformers
4-
pip install git+https://github.com/huggingface/transformers@f742a644ca32e65758c3adb36225aef1731bd2a8
4+
pip install git+https://github.com/huggingface/transformers
55

66
nproc_per_node=4
77

examples/train/packing/qwen2_5_omni.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# A demo for four modalities that can be run directly
44
# For local datasets, it is recommended to use streaming: `--streaming true` (save memory)
55
pip uninstall transformers
6-
pip install git+https://github.com/huggingface/transformers@f742a644ca32e65758c3adb36225aef1731bd2a8
6+
pip install git+https://github.com/huggingface/transformers
77

88
NPROC_PER_NODE=4 \
99
CUDA_VISIBLE_DEVICES=0,1,2,3 \

swift/llm/model/model/qwen.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -614,9 +614,9 @@ def get_model_tokenizer_qwen2_5_vl(*args, **kwargs):
614614

615615

616616
def get_model_tokenizer_qwen2_5_omni(model_dir, *args, **kwargs):
617-
from transformers import Qwen2_5OmniModel, Qwen2_5OmniProcessor, Qwen2_5OmniConfig
617+
from transformers import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniProcessor, Qwen2_5OmniConfig
618618
from qwen_omni_utils import vision_process
619-
kwargs['automodel_class'] = kwargs['automodel_class'] or Qwen2_5OmniModel
619+
kwargs['automodel_class'] = kwargs['automodel_class'] or Qwen2_5OmniForConditionalGeneration
620620
processor = Qwen2_5OmniProcessor.from_pretrained(model_dir, trust_remote_code=True)
621621
kwargs['tokenizer'] = processor.tokenizer
622622
kwargs['model_config'] = Qwen2_5OmniConfig.from_pretrained(model_dir, trust_remote_code=True)

swift/llm/template/template/qwen.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]:
410410
encoded = Template._encode(self, inputs)
411411
media_inputs = self.processor(
412412
text='',
413-
audios=inputs.audios or None,
413+
audio=inputs.audios or None,
414414
images=inputs.images or None,
415415
videos=inputs.videos or None,
416416
return_tensors='pt')
@@ -424,7 +424,7 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]:
424424
token_id = self._tokenize(token)
425425
idx_list = findall(input_ids, token_id)
426426
if idx_list:
427-
merge_length = self.processor.omni_processor.merge_size**2
427+
merge_length = self.processor.image_processor.merge_size**2
428428
media_grid_thw = media_inputs.get(f'{media_type}_grid_thw')
429429

430430
def _get_new_tokens(i):

0 commit comments

Comments
 (0)