2727 PixArtAlphaPipeline ,
2828 Transformer2DModel ,
2929)
30- from diffusers .utils .testing_utils import enable_full_determinism , require_torch_gpu , slow , torch_device
30+ from diffusers .utils .testing_utils import (
31+ enable_full_determinism ,
32+ numpy_cosine_similarity_distance ,
33+ require_torch_gpu ,
34+ slow ,
35+ torch_device ,
36+ )
3137
3238from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS , TEXT_TO_IMAGE_IMAGE_PARAMS , TEXT_TO_IMAGE_PARAMS
3339from ..test_pipelines_common import PipelineTesterMixin , to_np
@@ -332,37 +338,35 @@ def tearDown(self):
332338 torch .cuda .empty_cache ()
333339
334340 def test_pixart_1024 (self ):
335- generator = torch .manual_seed (0 )
341+ generator = torch .Generator ( "cpu" ). manual_seed (0 )
336342
337343 pipe = PixArtAlphaPipeline .from_pretrained (self .ckpt_id_1024 , torch_dtype = torch .float16 )
338344 pipe .enable_model_cpu_offload ()
339345 prompt = self .prompt
340346
341- image = pipe (prompt , generator = generator , output_type = "np" ).images
347+ image = pipe (prompt , generator = generator , num_inference_steps = 2 , output_type = "np" ).images
342348
343349 image_slice = image [0 , - 3 :, - 3 :, - 1 ]
350+ expected_slice = np .array ([0.0742 , 0.0835 , 0.2114 , 0.0295 , 0.0784 , 0.2361 , 0.1738 , 0.2251 , 0.3589 ])
344351
345- expected_slice = np .array ([0.1941 , 0.2117 , 0.2188 , 0.1946 , 0.218 , 0.2124 , 0.199 , 0.2437 , 0.2583 ])
346-
347- max_diff = np .abs (image_slice .flatten () - expected_slice ).max ()
348- self .assertLessEqual (max_diff , 1e-3 )
352+ max_diff = numpy_cosine_similarity_distance (image_slice .flatten (), expected_slice )
353+ self .assertLessEqual (max_diff , 1e-4 )
349354
350355 def test_pixart_512 (self ):
351- generator = torch .manual_seed (0 )
356+ generator = torch .Generator ( "cpu" ). manual_seed (0 )
352357
353358 pipe = PixArtAlphaPipeline .from_pretrained (self .ckpt_id_512 , torch_dtype = torch .float16 )
354359 pipe .enable_model_cpu_offload ()
355360
356361 prompt = self .prompt
357362
358- image = pipe (prompt , generator = generator , output_type = "np" ).images
363+ image = pipe (prompt , generator = generator , num_inference_steps = 2 , output_type = "np" ).images
359364
360365 image_slice = image [0 , - 3 :, - 3 :, - 1 ]
366+ expected_slice = np .array ([0.3477 , 0.3882 , 0.4541 , 0.3413 , 0.3821 , 0.4463 , 0.4001 , 0.4409 , 0.4958 ])
361367
362- expected_slice = np .array ([0.2637 , 0.291 , 0.2939 , 0.207 , 0.2512 , 0.2783 , 0.2168 , 0.2324 , 0.2817 ])
363-
364- max_diff = np .abs (image_slice .flatten () - expected_slice ).max ()
365- self .assertLessEqual (max_diff , 1e-3 )
368+ max_diff = numpy_cosine_similarity_distance (image_slice .flatten (), expected_slice )
369+ self .assertLessEqual (max_diff , 1e-4 )
366370
367371 def test_pixart_1024_without_resolution_binning (self ):
368372 generator = torch .manual_seed (0 )
@@ -372,7 +376,7 @@ def test_pixart_1024_without_resolution_binning(self):
372376
373377 prompt = self .prompt
374378 height , width = 1024 , 768
375- num_inference_steps = 10
379+ num_inference_steps = 2
376380
377381 image = pipe (
378382 prompt ,
@@ -406,7 +410,7 @@ def test_pixart_512_without_resolution_binning(self):
406410
407411 prompt = self .prompt
408412 height , width = 512 , 768
409- num_inference_steps = 10
413+ num_inference_steps = 2
410414
411415 image = pipe (
412416 prompt ,
0 commit comments