@@ -126,6 +126,8 @@ class StableDiffusionPipelineFastTests(
126126 callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS
127127
128128 def get_dummy_components (self , time_cond_proj_dim = None ):
129+ cross_attention_dim = 8
130+
129131 torch .manual_seed (0 )
130132 unet = UNet2DConditionModel (
131133 block_out_channels = (4 , 8 ),
@@ -136,7 +138,7 @@ def get_dummy_components(self, time_cond_proj_dim=None):
136138 out_channels = 4 ,
137139 down_block_types = ("DownBlock2D" , "CrossAttnDownBlock2D" ),
138140 up_block_types = ("CrossAttnUpBlock2D" , "UpBlock2D" ),
139- cross_attention_dim = 32 ,
141+ cross_attention_dim = cross_attention_dim ,
140142 norm_num_groups = 2 ,
141143 )
142144 scheduler = DDIMScheduler (
@@ -160,11 +162,11 @@ def get_dummy_components(self, time_cond_proj_dim=None):
160162 text_encoder_config = CLIPTextConfig (
161163 bos_token_id = 0 ,
162164 eos_token_id = 2 ,
163- hidden_size = 32 ,
164- intermediate_size = 64 ,
165+ hidden_size = cross_attention_dim ,
166+ intermediate_size = 16 ,
165167 layer_norm_eps = 1e-05 ,
166- num_attention_heads = 8 ,
167- num_hidden_layers = 3 ,
168+ num_attention_heads = 2 ,
169+ num_hidden_layers = 2 ,
168170 pad_token_id = 1 ,
169171 vocab_size = 1000 ,
170172 )
@@ -212,7 +214,7 @@ def test_stable_diffusion_ddim(self):
212214 image_slice = image [0 , - 3 :, - 3 :, - 1 ]
213215
214216 assert image .shape == (1 , 64 , 64 , 3 )
215- expected_slice = np .array ([0.3203 , 0.4555 , 0.4711 , 0.3505 , 0.3973 , 0.4650 , 0.5137 , 0.3392 , 0.4045 ])
217+ expected_slice = np .array ([0.1763 , 0.4776 , 0.4986 , 0.2566 , 0.3802 , 0.4596 , 0.5363 , 0.3277 , 0.3949 ])
216218
217219 assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
218220
@@ -232,7 +234,7 @@ def test_stable_diffusion_lcm(self):
232234 image_slice = image [0 , - 3 :, - 3 :, - 1 ]
233235
234236 assert image .shape == (1 , 64 , 64 , 3 )
235- expected_slice = np .array ([0.3454 , 0.5349 , 0.5185 , 0.2808 , 0.4509 , 0.4612 , 0.4655 , 0.3601 , 0.4315 ])
237+ expected_slice = np .array ([0.2368 , 0.4900 , 0.5019 , 0.2723 , 0.4473 , 0.4578 , 0.4551 , 0.3532 , 0.4133 ])
236238
237239 assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
238240
@@ -254,7 +256,7 @@ def test_stable_diffusion_lcm_custom_timesteps(self):
254256 image_slice = image [0 , - 3 :, - 3 :, - 1 ]
255257
256258 assert image .shape == (1 , 64 , 64 , 3 )
257- expected_slice = np .array ([0.3454 , 0.5349 , 0.5185 , 0.2808 , 0.4509 , 0.4612 , 0.4655 , 0.3601 , 0.4315 ])
259+ expected_slice = np .array ([0.2368 , 0.4900 , 0.5019 , 0.2723 , 0.4473 , 0.4578 , 0.4551 , 0.3532 , 0.4133 ])
258260
259261 assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
260262
@@ -373,12 +375,6 @@ def test_stable_diffusion_prompt_embeds_with_plain_negative_prompt_list(self):
373375
374376 assert np .abs (image_slice_1 .flatten () - image_slice_2 .flatten ()).max () < 1e-4
375377
376- def test_ip_adapter_single (self ):
377- expected_pipe_slice = None
378- if torch_device == "cpu" :
379- expected_pipe_slice = np .array ([0.3203 , 0.4555 , 0.4711 , 0.3505 , 0.3973 , 0.4650 , 0.5137 , 0.3392 , 0.4045 ])
380- return super ().test_ip_adapter_single (expected_pipe_slice = expected_pipe_slice )
381-
382378 def test_stable_diffusion_ddim_factor_8 (self ):
383379 device = "cpu" # ensure determinism for the device-dependent torch.Generator
384380
@@ -394,7 +390,7 @@ def test_stable_diffusion_ddim_factor_8(self):
394390 image_slice = image [0 , - 3 :, - 3 :, - 1 ]
395391
396392 assert image .shape == (1 , 136 , 136 , 3 )
397- expected_slice = np .array ([0.4346 , 0.5621 , 0.5016 , 0.3926 , 0.4533 , 0.4134 , 0.5625 , 0.5632 , 0.5265 ])
393+ expected_slice = np .array ([0.4720 , 0.5426 , 0.5160 , 0.3961 , 0.4696 , 0.4296 , 0.5738 , 0.5888 , 0.5481 ])
398394
399395 assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
400396
@@ -412,7 +408,7 @@ def test_stable_diffusion_pndm(self):
412408 image_slice = image [0 , - 3 :, - 3 :, - 1 ]
413409
414410 assert image .shape == (1 , 64 , 64 , 3 )
415- expected_slice = np .array ([0.3411 , 0.5032 , 0.4704 , 0.3135 , 0.4323 , 0.4740 , 0.5150 , 0.3498 , 0.4022 ])
411+ expected_slice = np .array ([0.1941 , 0.4748 , 0.4880 , 0.2222 , 0.4221 , 0.4545 , 0.5604 , 0.3488 , 0.3902 ])
416412
417413 assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
418414
@@ -452,7 +448,7 @@ def test_stable_diffusion_k_lms(self):
452448 image_slice = image [0 , - 3 :, - 3 :, - 1 ]
453449
454450 assert image .shape == (1 , 64 , 64 , 3 )
455- expected_slice = np .array ([0.3149 , 0.5246 , 0.4796 , 0.3218 , 0.4469 , 0.4729 , 0.5151 , 0.3597 , 0.3954 ])
451+ expected_slice = np .array ([0.2681 , 0.4785 , 0.4857 , 0.2426 , 0.4473 , 0.4481 , 0.5610 , 0.3676 , 0.3855 ])
456452
457453 assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
458454
@@ -471,7 +467,7 @@ def test_stable_diffusion_k_euler_ancestral(self):
471467 image_slice = image [0 , - 3 :, - 3 :, - 1 ]
472468
473469 assert image .shape == (1 , 64 , 64 , 3 )
474- expected_slice = np .array ([0.3151 , 0.5243 , 0.4794 , 0.3217 , 0.4468 , 0.4728 , 0.5152 , 0.3598 , 0.3954 ])
470+ expected_slice = np .array ([0.2682 , 0.4782 , 0.4855 , 0.2424 , 0.4472 , 0.4479 , 0.5612 , 0.3676 , 0.3854 ])
475471
476472 assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
477473
@@ -490,7 +486,7 @@ def test_stable_diffusion_k_euler(self):
490486 image_slice = image [0 , - 3 :, - 3 :, - 1 ]
491487
492488 assert image .shape == (1 , 64 , 64 , 3 )
493- expected_slice = np .array ([0.3149 , 0.5246 , 0.4796 , 0.3218 , 0.4469 , 0.4729 , 0.5151 , 0.3597 , 0.3954 ])
489+ expected_slice = np .array ([0.2681 , 0.4785 , 0.4857 , 0.2426 , 0.4473 , 0.4481 , 0.5610 , 0.3676 , 0.3855 ])
494490
495491 assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
496492
@@ -562,7 +558,7 @@ def test_stable_diffusion_negative_prompt(self):
562558 image_slice = image [0 , - 3 :, - 3 :, - 1 ]
563559
564560 assert image .shape == (1 , 64 , 64 , 3 )
565- expected_slice = np .array ([0.3458 , 0.5120 , 0.4800 , 0.3116 , 0.4348 , 0.4802 , 0.5237 , 0.3467 , 0.3991 ])
561+ expected_slice = np .array ([0.1907 , 0.4709 , 0.4858 , 0.2224 , 0.4223 , 0.4539 , 0.5606 , 0.3489 , 0.3900 ])
566562
567563 assert np .abs (image_slice .flatten () - expected_slice ).max () < 1e-2
568564
0 commit comments