You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_3067/2457985955.py in <module>
89 compute_metrics=paddlenlp.metrics.AccuracyAndF1,
90 )
---> 91 trainer.train()
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlenlp/trainer/trainer.py in train(self, resume_from_checkpoint, ignore_keys_for_eval)
714
715 self.control = self.callback_handler.on_step_end(args, self.state, self.control)
--> 716 self._maybe_log_save_evaluate(tr_loss, model, epoch, ignore_keys_for_eval, inputs=inputs)
717 else:
718 self.control = self.callback_handler.on_substep_end(args, self.state, self.control)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlenlp/trainer/trainer.py in _maybe_log_save_evaluate(self, tr_loss, model, epoch, ignore_keys_for_eval, **kwargs)
846 )
847 else:
--> 848 metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
849
850 if self.control.should_save:
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlenlp/trainer/trainer.py in evaluate(self, eval_dataset, ignore_keys, metric_key_prefix)
1614 prediction_loss_only=True if self.compute_metrics is None else None,
1615 ignore_keys=ignore_keys,
-> 1616 metric_key_prefix=metric_key_prefix,
1617 )
1618
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlenlp/trainer/trainer.py in evaluation_loop(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix, max_eval_iters)
1787 # Metrics!
1788 if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
-> 1789 metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
1790 else:
1791 metrics = {}
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddlenlp/metrics/glue.py in __init__(self, topk, pos_label, name, *args, **kwargs)
64 self.pos_label = pos_label
65 self._name = name
---> 66 self.acc = Accuracy(self.topk, *args, **kwargs)
67 self.precision = Precision(*args, **kwargs)
68 self.recall = Recall(*args, **kwargs)
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/paddle/metric/metrics.py in __init__(self, topk, name, *args, **kwargs)
238 super(Accuracy, self).__init__(*args, **kwargs)
239 self.topk = topk
--> 240 self.maxk = max(topk)
241 self._init_name(name)
242 self.reset()
ValueError: operands could not be broadcast together with shapes (1000,) (1000,2)
### 稳定复现步骤 & 代码
这是一个二分类的任务,我的数据读取是这样的:
def collate_fn(data):
feats = [d[0] for d in data]
labels = paddle.to_tensor([d[1] for d in data], dtype='int64')
encodings = tokenizer.encode_batch(feats)
input_ids = paddle.to_tensor([enc.ids for enc in encodings], dtype='int64')
attn_mask = paddle.to_tensor([enc.attention_mask for enc in encodings], dtype='int64')
return {
"input_ids": input_ids,
"attention_mask": attn_mask,
"labels": labels
}
软件环境
重复问题
错误描述
def collate_fn(data):
feats = [d[0] for d in data]
labels = paddle.to_tensor([d[1] for d in data], dtype='int64')
encodings = tokenizer.encode_batch(feats)
input_ids = paddle.to_tensor([enc.ids for enc in encodings], dtype='int64')
attn_mask = paddle.to_tensor([enc.attention_mask for enc in encodings], dtype='int64')
return {
"input_ids": input_ids,
"attention_mask": attn_mask,
"labels": labels
}
args = paddlenlp.trainer.TrainingArguments(
output_dir='models',
do_train=True,
do_eval=True,
per_device_train_batch_size=8,
gradient_accumulation_steps=32,
evaluation_strategy='steps',
per_device_eval_batch_size=16,
eval_steps=10,
save_total_limit=1,
report_to='visualdl',
logging_steps=50,
)
trainer = paddlenlp.trainer.Trainer(
model=model,
criterion=paddle.nn.CrossEntropyLoss(),
args=args,
data_collator=collate_fn,
train_dataset=MyDataset('train_data/0.txt'),
eval_dataset=MyDataset('train_data/1.txt', 'eval'),
optimizers=[opt, paddle.optimizer.lr.NoamDecay(512, 25000, 5)],
compute_metrics=paddlenlp.metrics.AccuracyAndF1,
)
trainer.train()
The text was updated successfully, but these errors were encountered: