Description
这是我的base model qwen25-vl-3b原始的config.json文件:
{ "architectures": [ "Qwen2_5_VLForConditionalGeneration" ], "attention_dropout": 0.0, "bos_token_id": 151643, "eos_token_id": 151645, "vision_start_token_id": 151652, "vision_end_token_id": 151653, "vision_token_id": 151654, "image_token_id": 151655, "video_token_id": 151656, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 128000, "max_window_layers": 70, "model_type": "qwen2_5_vl", "num_attention_heads": 16, "num_hidden_layers": 36, "num_key_value_heads": 2, "rms_norm_eps": 1e-06, "rope_theta": 1000000.0, "sliding_window": 32768, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "transformers_version": "4.41.2", "use_cache": true, "use_sliding_window": false, "vision_config": { "depth": 32, "hidden_act": "silu", "hidden_size": 1280, "intermediate_size": 3420, "num_heads": 16, "in_chans": 3, "out_hidden_size": 2048, "patch_size": 14, "spatial_merge_size": 2, "spatial_patch_size": 14, "window_size": 112, "fullatt_block_indexes": [ 7, 15, 23, 31 ], "tokens_per_second": 2, "temporal_patch_size": 2 }, "rope_scaling": { "type": "mrope", "mrope_section": [ 16, 24, 24 ] }, "vocab_size": 151936 }
这是我的sft之后在checkpoint中看到的config.json文件:
{ "architectures": [ "Qwen2_5_VLForConditionalGeneration" ], "attention_dropout": 0.0, "bos_token_id": 151643, "eos_token_id": 151645, "hidden_act": "silu", "hidden_size": 2048, "image_token_id": 151655, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 128000, "max_window_layers": 70, "model_type": "qwen2_5_vl", "num_attention_heads": 16, "num_hidden_layers": 36, "num_key_value_heads": 2, "pad_token_id": 151643, "rms_norm_eps": 1e-06, "rope_scaling": { "mrope_section": [ 16, 24, 24 ], "rope_type": "default", "type": "default" }, "rope_theta": 1000000.0, "sliding_window": 32768, "text_config": { "architectures": [ "Qwen2_5_VLForConditionalGeneration" ], "attention_dropout": 0.0, "bos_token_id": 151643, "eos_token_id": 151645, "hidden_act": "silu", "hidden_size": 2048, "image_token_id": null, "initializer_range": 0.02, "intermediate_size": 11008, "max_position_embeddings": 128000, "max_window_layers": 70, "model_type": "qwen2_5_vl_text", "num_attention_heads": 16, "num_hidden_layers": 36, "num_key_value_heads": 2, "rms_norm_eps": 1e-06, "rope_scaling": { "mrope_section": [ 16, 24, 24 ], "rope_type": "default", "type": "default" }, "rope_theta": 1000000.0, "sliding_window": 32768, "tie_word_embeddings": true, "torch_dtype": "bfloat16", "use_cache": true, "use_sliding_window": false, "video_token_id": null, "vision_end_token_id": 151653, "vision_start_token_id": 151652, "vision_token_id": 151654, "vocab_size": 151936 }, "torch_dtype": "bfloat16", "transformers_version": "4.52.4", "use_cache": false, "use_sliding_window": false, "video_token_id": 151656, "vision_config": { "depth": 32, "fullatt_block_indexes": [ 7, 15, 23, 31 ], "hidden_act": "silu", "hidden_size": 1280, "in_channels": 3, "in_chans": 3, "initializer_range": 0.02, "intermediate_size": 3420, "model_type": "qwen2_5_vl", "num_heads": 16, "out_hidden_size": 2048, "patch_size": 14, "spatial_merge_size": 2, "spatial_patch_size": 14, "temporal_patch_size": 2, "tokens_per_second": 2, "torch_dtype": "bfloat16", "window_size": 112 }, "vision_end_token_id": 151653, "vision_start_token_id": 151652, "vision_token_id": 151654, "vocab_size": 151936 }
我使用这样的config.json去使用vllm部署模型,
vllm serve /train_critic_model/Qwen2.5-VL/qwen-vl-finetune/output/20250624-qwen25vl_3b_sft_mlp_llm/Qwen2.5-VL-3B-Instruct --served-model-name qwen25-vl-3b --tensor-parallel-size 2
发现报错:
pydantic_core._pydantic_core.ValidationError: 1 validation error for ModelConfig
Assertion failed, [type=assertion_error, input_value=ArgsKwargs((), {'model': ..., 'model_impl': 'auto'}), input_type=ArgsKwargs]
For further information visit https://errors.pydantic.dev/2.11/v/assertion_error
(qwen25-vl) root@dsw-237778-5c6754cfd4-glf7x:/nas-alinlp/zhoubb/code/demo_TabRectSet# mv pydantic_core._pydantic_core.ValidationError: 1 validation error for ModelConfig。
为了解决该问题我只能把原始的config.json文件复制到这个checkpoint中。 请问如何解决