You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
/home/jeeves/.local/lib/python3.10/site-packages/swift/trainers/mixin.py:81: FutureWarning: tokenizer is deprecated and will be removed in version 5.0.0 for Seq2SeqTrainer.__init__. Use processing_class instead.
训练14B的模型,如果max_length"设置为 16000就会出问题,具体问题如下
错误如下:
from torch.distributed.optim import \
/home/jeeves/.local/lib/python3.10/site-packages/swift/trainers/mixin.py:81: FutureWarning:
tokenizer
is deprecated and will be removed in version 5.0.0 forSeq2SeqTrainer.__init__
. Useprocessing_class
instead.super().init(
rank7: Traceback (most recent call last):
rank7: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/cli/sft.py", line 10, in
rank7: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/train/sft.py", line 265, in sft_main
rank7: return SwiftSft(args).main()
rank7: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/base.py", line 47, in main
rank7: result = self.run()
rank7: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/train/sft.py", line 142, in run
rank7: return self.train(trainer)
rank7: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/train/sft.py", line 202, in train
rank7: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/trainers/mixin.py", line 289, in train
rank7: res = super().train(*args, **kwargs)
rank7: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 2241, in train
rank7: return inner_training_loop(
rank7: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 2500, in _inner_training_loop
rank7: batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
rank7: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 5180, in get_batch_samples
rank7: batch_samples += [next(epoch_iterator)]
rank7: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 701, in next
rank7: data = self._next_data()
rank7: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1465, in _next_data
rank7: return self._process_data(data)
rank7: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1491, in _process_data
rank7: File "/opt/conda/lib/python3.10/site-packages/torch/_utils.py", line 715, in reraise
rank7: raise exception
rank7: AttributeError: Caught AttributeError in DataLoader worker process 0.
rank7: Original Traceback (most recent call last):
rank7: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
rank7: data = fetcher.fetch(index) # type: ignore[possibly-undefined]
rank7: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
rank7: return self.collate_fn(data)
rank7: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/template/base.py", line 1063, in data_collator
rank7: return self._data_collator(batch, padding_to=padding_to)
rank7: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/template/base.py", line 1240, in _data_collator
rank7: res = self._torchacc_xtuner_data_collator(res, padding_to, self.tokenizer, padding_side)
rank7: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/template/base.py", line 1272, in _torchacc_xtuner_data_collator
rank7: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/trainers/xtuner.py", line 56, in pad_and_split_for_sequence_parallel
rank7: attention_mask = pad_for_sequence_parallel(attention_mask, padding_value=0, dim=-1)
rank7: File "/opt/conda/lib/python3.10/site-packages/xtuner/parallel/sequence/data_collate.py", line 8, in pad_for_sequence_parallel
rank7: length = tensor.shape[dim]
rank7: AttributeError: 'NoneType' object has no attribute 'shape'
Train: 0%| | 0/804 00:00<?, ?it/s: Traceback (most recent call last):
rank0: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/cli/sft.py", line 10, in
rank0: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/train/sft.py", line 265, in sft_main
rank0: return SwiftSft(args).main()
rank0: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/base.py", line 47, in main
rank0: result = self.run()
rank0: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/train/sft.py", line 142, in run
rank0: return self.train(trainer)
rank0: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/train/sft.py", line 202, in train
rank0: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/trainers/mixin.py", line 289, in train
rank0: res = super().train(*args, **kwargs)
rank0: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 2241, in train
rank0: return inner_training_loop(
rank0: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 2500, in _inner_training_loop
rank0: batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
rank0: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 5180, in get_batch_samples
rank0: batch_samples += [next(epoch_iterator)]
rank0: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 701, in next
rank0: data = self._next_data()
rank0: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1465, in _next_data
rank0: return self._process_data(data)
rank0: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1491, in _process_data
rank0: File "/opt/conda/lib/python3.10/site-packages/torch/_utils.py", line 715, in reraise
rank0: raise exception
rank0: AttributeError: Caught AttributeError in DataLoader worker process 0.
rank0: Original Traceback (most recent call last):
rank0: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
rank0: data = fetcher.fetch(index) # type: ignore[possibly-undefined]
rank0: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
rank0: return self.collate_fn(data)
rank0: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/template/base.py", line 1063, in data_collator
rank0: return self._data_collator(batch, padding_to=padding_to)
rank0: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/template/base.py", line 1240, in _data_collator
rank0: res = self._torchacc_xtuner_data_collator(res, padding_to, self.tokenizer, padding_side)
rank0: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/template/base.py", line 1272, in _torchacc_xtuner_data_collator
rank0: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/trainers/xtuner.py", line 56, in pad_and_split_for_sequence_parallel
rank0: attention_mask = pad_for_sequence_parallel(attention_mask, padding_value=0, dim=-1)
rank0: File "/opt/conda/lib/python3.10/site-packages/xtuner/parallel/sequence/data_collate.py", line 8, in pad_for_sequence_parallel
rank0: length = tensor.shape[dim]
rank0: AttributeError: 'NoneType' object has no attribute 'shape'
rank5: Traceback (most recent call last):
rank5: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/cli/sft.py", line 10, in
rank5: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/train/sft.py", line 265, in sft_main
rank5: return SwiftSft(args).main()
rank5: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/base.py", line 47, in main
rank5: result = self.run()
rank5: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/train/sft.py", line 142, in run
rank5: return self.train(trainer)
rank5: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/train/sft.py", line 202, in train
rank5: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/trainers/mixin.py", line 289, in train
rank5: res = super().train(*args, **kwargs)
rank5: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 2241, in train
rank5: return inner_training_loop(
rank5: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 2500, in _inner_training_loop
rank5: batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
rank5: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 5180, in get_batch_samples
rank5: batch_samples += [next(epoch_iterator)]
rank5: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 701, in next
rank5: data = self._next_data()
rank5: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1465, in _next_data
rank5: return self._process_data(data)
rank5: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1491, in _process_data
rank5: File "/opt/conda/lib/python3.10/site-packages/torch/_utils.py", line 715, in reraise
rank5: raise exception
rank5: AttributeError: Caught AttributeError in DataLoader worker process 0.
rank5: Original Traceback (most recent call last):
rank5: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
rank5: data = fetcher.fetch(index) # type: ignore[possibly-undefined]
rank5: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
rank5: return self.collate_fn(data)
rank5: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/template/base.py", line 1063, in data_collator
rank5: return self._data_collator(batch, padding_to=padding_to)
rank5: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/template/base.py", line 1240, in _data_collator
rank5: res = self._torchacc_xtuner_data_collator(res, padding_to, self.tokenizer, padding_side)
rank5: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/template/base.py", line 1272, in _torchacc_xtuner_data_collator
rank5: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/trainers/xtuner.py", line 56, in pad_and_split_for_sequence_parallel
rank5: attention_mask = pad_for_sequence_parallel(attention_mask, padding_value=0, dim=-1)
rank5: File "/opt/conda/lib/python3.10/site-packages/xtuner/parallel/sequence/data_collate.py", line 8, in pad_for_sequence_parallel
rank5: length = tensor.shape[dim]
rank5: AttributeError: 'NoneType' object has no attribute 'shape'
rank6: Traceback (most recent call last):
rank6: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/cli/sft.py", line 10, in
rank6: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/train/sft.py", line 265, in sft_main
rank6: return SwiftSft(args).main()
rank6: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/base.py", line 47, in main
rank6: result = self.run()
rank6: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/train/sft.py", line 142, in run
rank6: return self.train(trainer)
rank6: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/train/sft.py", line 202, in train
rank6: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/trainers/mixin.py", line 289, in train
rank6: res = super().train(*args, **kwargs)
rank6: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 2241, in train
rank6: return inner_training_loop(
rank6: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 2500, in _inner_training_loop
rank6: batch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches)
rank6: File "/opt/conda/lib/python3.10/site-packages/transformers/trainer.py", line 5180, in get_batch_samples
rank6: batch_samples += [next(epoch_iterator)]
rank6: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 701, in next
rank6: data = self._next_data()
rank6: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1465, in _next_data
rank6: return self._process_data(data)
rank6: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1491, in _process_data
rank6: File "/opt/conda/lib/python3.10/site-packages/torch/_utils.py", line 715, in reraise
rank6: raise exception
rank6: AttributeError: Caught AttributeError in DataLoader worker process 0.
rank6: Original Traceback (most recent call last):
rank6: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
rank6: data = fetcher.fetch(index) # type: ignore[possibly-undefined]
rank6: File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
rank6: return self.collate_fn(data)
rank6: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/template/base.py", line 1063, in data_collator
rank6: return self._data_collator(batch, padding_to=padding_to)
rank6: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/template/base.py", line 1240, in _data_collator
rank6: res = self._torchacc_xtuner_data_collator(res, padding_to, self.tokenizer, padding_side)
rank6: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/llm/template/base.py", line 1272, in _torchacc_xtuner_data_collator
rank6: File "/home/jeeves/.local/lib/python3.10/site-packages/swift/trainers/xtuner.py", line 56, in pad_and_split_for_sequence_parallel
rank6: attention_mask = pad_for_sequence_parallel(attention_mask, padding_value=0, dim=-1)
rank6: File "/opt/conda/lib/python3.10/site-packages/xtuner/parallel/sequence/data_collate.py", line 8, in pad_for_sequence_parallel
rank6: length = tensor.shape[dim]
rank6: AttributeError: 'NoneType' object has no attribute 'shape'
Train: 0%| | 0/804 [00:01<?, ?it/s]
The text was updated successfully, but these errors were encountered: