Description
训练脚本如下:
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
swift rlhf
--rlhf_type rm
--model ${model_path}
--model_type qwen2_5
--train_type ${train_type}
--dataset ${dataset}
--torch_dtype bfloat16
--num_train_epochs 2
--per_device_train_batch_size 8
--per_device_eval_batch_size 8
--learning_rate 5e-5
--gradient_accumulation_steps 8
--eval_steps 100
--save_steps 100
--logging_steps 5
--max_length 4096
--output_dir ${output_dir}
--warmup_ratio 0.05
--dataloader_num_workers 4
--deepspeed zero2
--dataset_num_proc 4
模型结构如下:
Qwen2ForCausalLM(
(model): Qwen2Model(
(embed_tokens): Embedding(152064, 3584, padding_idx=151643)
(layers): ModuleList(
(0-27): 28 x Qwen2DecoderLayer(
(self_attn): Qwen2Attention(
(q_proj): Linear(in_features=3584, out_features=3584, bias=True)
(k_proj): Linear(in_features=3584, out_features=512, bias=True)
(v_proj): Linear(in_features=3584, out_features=512, bias=True)
(o_proj): Linear(in_features=3584, out_features=3584, bias=False)
)
(mlp): Qwen2MLP(
(gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
(up_proj): Linear(in_features=3584, out_features=18944, bias=False)
(down_proj): Linear(in_features=18944, out_features=3584, bias=False)
(act_fn): SiLU()
)
(input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
(post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
)
)
(norm): Qwen2RMSNorm((3584,), eps=1e-06)
(rotary_emb): Qwen2RotaryEmbedding()
)
(lm_head): Linear(in_features=3584, out_features=152064, bias=False)
)