@@ -1447,9 +1447,9 @@ def convert_open_clip_checkpoint(
14471447 text_proj_key = prefix + "text_projection"
14481448
14491449 if text_proj_key in checkpoint :
1450- text_proj_dim = int (checkpoint [text_proj_key ].shape [0 ])
1451- elif hasattr (text_model .config , "projection_dim " ):
1452- text_proj_dim = text_model .config .projection_dim
1450+ text_proj_dim = int (checkpoint [text_proj_key ].shape [1 ])
1451+ elif hasattr (text_model .config , "hidden_size " ):
1452+ text_proj_dim = text_model .config .hidden_size
14531453 else :
14541454 text_proj_dim = LDM_OPEN_CLIP_TEXT_PROJECTION_DIM
14551455
@@ -1545,14 +1545,6 @@ def create_diffusers_clip_model_from_ldm(
15451545 config ["pretrained_model_name_or_path" ] = clip_config
15461546 subfolder = ""
15471547
1548- if is_open_clip_model (checkpoint ):
1549- # infer projection_dim for the text_encoder using the checkpoint.
1550- # should fix SD2.X LDM checkpoint loads from CivitAI and similar.
1551- # The configuration on the hub is often (or always) incorrect for these models
1552- # which need projection_dim=1024 and not projection_dim=512
1553- if 'cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.weight' in checkpoint :
1554- config ['projection_dim' ] = checkpoint ['cond_stage_model.model.transformer.resblocks.0.mlp.c_proj.weight' ].shape [0 ]
1555-
15561548 model_config = cls .config_class .from_pretrained (** config , subfolder = subfolder , local_files_only = local_files_only )
15571549 ctx = init_empty_weights if is_accelerate_available () else nullcontext
15581550 with ctx ():
0 commit comments