enable semantic diffusion and stable diffusion panorama cases on XPU (h…

…uggingface#11459) Signed-off-by: Yao Matrix <[email protected]>
Skquark · Skquark · May 12, 2025 · Apr 26, 2025 · Apr 28, 2025 · Apr 28, 2025
commit a674914fd5f45ef7bcec71061aa2fb315ceb3495
diff --git a/tests/pipelines/semantic_stable_diffusion/test_semantic_diffusion.py b/tests/pipelines/semantic_stable_diffusion/test_semantic_diffusion.py
@@ -25,11 +25,11 @@
 from diffusers import AutoencoderKL, DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel
 from diffusers.pipelines.semantic_stable_diffusion import SemanticStableDiffusionPipeline as StableDiffusionPipeline
 from diffusers.utils.testing_utils import (
+    backend_empty_cache,
     enable_full_determinism,
     floats_tensor,
     nightly,
-    require_accelerator,
-    require_torch_gpu,
+    require_torch_accelerator,
     torch_device,
 )
 
@@ -42,13 +42,13 @@ def setUp(self):
         # clean up the VRAM before each test
         super().setUp()
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     @property
     def dummy_image(self):
@@ -238,7 +238,7 @@ def test_semantic_diffusion_no_safety_checker(self):
         image = pipe("example prompt", num_inference_steps=2).images[0]
         assert image is not None
 
-    @require_accelerator
+    @require_torch_accelerator
     def test_semantic_diffusion_fp16(self):
         """Test that stable diffusion works with fp16"""
         unet = self.dummy_cond_unet
@@ -272,22 +272,21 @@ def test_semantic_diffusion_fp16(self):
 
 
 @nightly
-@require_torch_gpu
+@require_torch_accelerator
 class SemanticDiffusionPipelineIntegrationTests(unittest.TestCase):
     def setUp(self):
         # clean up the VRAM before each test
         super().setUp()
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     def tearDown(self):
         # clean up the VRAM after each test
         super().tearDown()
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     def test_positive_guidance(self):
-        torch_device = "cuda"
         pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
         pipe = pipe.to(torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -370,7 +369,6 @@ def test_positive_guidance(self):
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
     def test_negative_guidance(self):
-        torch_device = "cuda"
         pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
         pipe = pipe.to(torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -453,7 +451,6 @@ def test_negative_guidance(self):
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
     def test_multi_cond_guidance(self):
-        torch_device = "cuda"
         pipe = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
         pipe = pipe.to(torch_device)
         pipe.set_progress_bar_config(disable=None)
@@ -536,7 +533,6 @@ def test_multi_cond_guidance(self):
         assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
     def test_guidance_fp16(self):
-        torch_device = "cuda"
         pipe = StableDiffusionPipeline.from_pretrained(
             "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16
         )

diff --git a/tests/pipelines/stable_diffusion_panorama/test_stable_diffusion_panorama.py b/tests/pipelines/stable_diffusion_panorama/test_stable_diffusion_panorama.py
@@ -29,7 +29,17 @@
     StableDiffusionPanoramaPipeline,
     UNet2DConditionModel,
 )
-from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, skip_mps, torch_device
+from diffusers.utils.testing_utils import (
+    backend_empty_cache,
+    backend_max_memory_allocated,
+    backend_reset_max_memory_allocated,
+    backend_reset_peak_memory_stats,
+    enable_full_determinism,
+    nightly,
+    require_torch_accelerator,
+    skip_mps,
+    torch_device,
+)
 
 from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
 from ..test_pipelines_common import (
@@ -267,17 +277,17 @@ def test_encode_prompt_works_in_isolation(self):
 
 
 @nightly
-@require_torch_gpu
+@require_torch_accelerator
 class StableDiffusionPanoramaNightlyTests(unittest.TestCase):
     def setUp(self):
         super().setUp()
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     def tearDown(self):
         super().tearDown()
         gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)
 
     def get_inputs(self, seed=0):
         generator = torch.manual_seed(seed)
@@ -415,9 +425,9 @@ def callback_fn(step: int, timestep: int, latents: torch.Tensor) -> None:
         assert number_of_steps == 3
 
     def test_stable_diffusion_panorama_pipeline_with_sequential_cpu_offloading(self):
-        torch.cuda.empty_cache()
-        torch.cuda.reset_max_memory_allocated()
-        torch.cuda.reset_peak_memory_stats()
+        backend_empty_cache(torch_device)
+        backend_reset_max_memory_allocated(torch_device)
+        backend_reset_peak_memory_stats(torch_device)
 
         model_ckpt = "stabilityai/stable-diffusion-2-base"
         scheduler = DDIMScheduler.from_pretrained(model_ckpt, subfolder="scheduler")
@@ -429,6 +439,6 @@ def test_stable_diffusion_panorama_pipeline_with_sequential_cpu_offloading(self)
         inputs = self.get_inputs()
         _ = pipe(**inputs)
 
-        mem_bytes = torch.cuda.max_memory_allocated()
+        mem_bytes = backend_max_memory_allocated(torch_device)
         # make sure that less than 5.2 GB is allocated
         assert mem_bytes < 5.5 * 10**9