fix split

modelscope · hjh0119 · Apr 30, 2025 · Apr 23, 2025 · Apr 23, 2025 · Apr 23, 2025
commit 550f06fe017932770fa688aa2166834a64779005
diff --git a/swift/trainers/rlhf_trainer/grpo_trainer.py b/swift/trainers/rlhf_trainer/grpo_trainer.py
@@ -57,14 +57,13 @@
 OutputsType = List[List[Tuple[List[Dict], str]]]
 
 
-def _batch_split_tensor_dict(tensor_dict_list: List[Dict[str, Optional[torch.Tensor]]],
-                             num_chunks: int) -> List[List[Dict[str, Optional[torch.Tensor]]]]:
-    return [hf_split_tensor_dict(tensor_dict, num_chunks) for tensor_dict in tensor_dict_list]
-
-
 def apply_split_tensor_dict_patch():
     from trl.trainer import grpo_trainer
 
+    def _batch_split_tensor_dict(tensor_dict_list: List[Dict[str, Optional[torch.Tensor]]],
+                                 num_chunks: int) -> List[List[Dict[str, Optional[torch.Tensor]]]]:
+        return [hf_split_tensor_dict(tensor_dict, num_chunks) for tensor_dict in tensor_dict_list]
+
     if not hasattr(grpo_trainer, '_original_split_tensor_dict'):
         grpo_trainer._original_split_tensor_dict = hf_split_tensor_dict
         grpo_trainer.split_tensor_dict = _batch_split_tensor_dict
@@ -863,6 +862,7 @@ def _generate_and_score_completions(self, inputs: InputsType) -> InputsType:
         self._log_metrics(batch_encoded_inputs, messages, completions, total_rewards, total_rewards_per_func)
 
         # TODO: Confirm that everything is a tensor.
+        batch_encoded_inputs.pop('logits_to_keep')
         return batch_encoded_inputs
 
     def _score_completions(self, inputs: InputsType) -> Tuple[torch.Tensor, torch.Tensor, List[str]]: