Skip to content

Commit 46893ad

Browse files
authored
[AltDiffusion] add tests (huggingface#1311)
* being tests * fix model ids * don't use safety checker in tests * add im2img2 tests * fix integration tests * integration tests * style * add sentencepiece in test dep * quality * 4 decimalk points * fix im2img test * increase the tok slightly
1 parent 327ddc8 commit 46893ad

File tree

5 files changed

+606
-0
lines changed

5 files changed

+606
-0
lines changed

setup.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
"pytest",
9898
"pytest-timeout",
9999
"pytest-xdist",
100+
"sentencepiece>=0.1.91,!=0.1.92",
100101
"scipy",
101102
"regex!=2019.12.17",
102103
"requests",
@@ -183,6 +184,7 @@ def run(self):
183184
"pytest",
184185
"pytest-timeout",
185186
"pytest-xdist",
187+
"sentencepiece",
186188
"scipy",
187189
"torchvision",
188190
"transformers"

src/diffusers/dependency_versions_table.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"pytest": "pytest",
2222
"pytest-timeout": "pytest-timeout",
2323
"pytest-xdist": "pytest-xdist",
24+
"sentencepiece": "sentencepiece>=0.1.91,!=0.1.92",
2425
"scipy": "scipy",
2526
"regex": "regex!=2019.12.17",
2627
"requests": "requests",

tests/pipelines/altdiffusion/__init__.py

Whitespace-only changes.
Lines changed: 347 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,347 @@
1+
# coding=utf-8
2+
# Copyright 2022 HuggingFace Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import gc
17+
import random
18+
import unittest
19+
20+
import numpy as np
21+
import torch
22+
23+
from diffusers import AltDiffusionPipeline, AutoencoderKL, DDIMScheduler, PNDMScheduler, UNet2DConditionModel
24+
from diffusers.pipelines.alt_diffusion.modeling_roberta_series import (
25+
RobertaSeriesConfig,
26+
RobertaSeriesModelWithTransformation,
27+
)
28+
from diffusers.utils import floats_tensor, slow, torch_device
29+
from diffusers.utils.testing_utils import require_torch_gpu
30+
from transformers import XLMRobertaTokenizer
31+
32+
from ...test_pipelines_common import PipelineTesterMixin
33+
34+
35+
torch.backends.cuda.matmul.allow_tf32 = False
36+
37+
38+
class AltDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
39+
def tearDown(self):
40+
# clean up the VRAM after each test
41+
super().tearDown()
42+
gc.collect()
43+
torch.cuda.empty_cache()
44+
45+
@property
46+
def dummy_image(self):
47+
batch_size = 1
48+
num_channels = 3
49+
sizes = (32, 32)
50+
51+
image = floats_tensor((batch_size, num_channels) + sizes, rng=random.Random(0)).to(torch_device)
52+
return image
53+
54+
@property
55+
def dummy_cond_unet(self):
56+
torch.manual_seed(0)
57+
model = UNet2DConditionModel(
58+
block_out_channels=(32, 64),
59+
layers_per_block=2,
60+
sample_size=32,
61+
in_channels=4,
62+
out_channels=4,
63+
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
64+
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
65+
cross_attention_dim=32,
66+
)
67+
return model
68+
69+
@property
70+
def dummy_cond_unet_inpaint(self):
71+
torch.manual_seed(0)
72+
model = UNet2DConditionModel(
73+
block_out_channels=(32, 64),
74+
layers_per_block=2,
75+
sample_size=32,
76+
in_channels=9,
77+
out_channels=4,
78+
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
79+
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
80+
cross_attention_dim=32,
81+
)
82+
return model
83+
84+
@property
85+
def dummy_vae(self):
86+
torch.manual_seed(0)
87+
model = AutoencoderKL(
88+
block_out_channels=[32, 64],
89+
in_channels=3,
90+
out_channels=3,
91+
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
92+
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
93+
latent_channels=4,
94+
)
95+
return model
96+
97+
@property
98+
def dummy_text_encoder(self):
99+
torch.manual_seed(0)
100+
config = RobertaSeriesConfig(
101+
hidden_size=32,
102+
project_dim=32,
103+
intermediate_size=37,
104+
layer_norm_eps=1e-05,
105+
num_attention_heads=4,
106+
num_hidden_layers=5,
107+
vocab_size=5002,
108+
)
109+
return RobertaSeriesModelWithTransformation(config)
110+
111+
@property
112+
def dummy_extractor(self):
113+
def extract(*args, **kwargs):
114+
class Out:
115+
def __init__(self):
116+
self.pixel_values = torch.ones([0])
117+
118+
def to(self, device):
119+
self.pixel_values.to(device)
120+
return self
121+
122+
return Out()
123+
124+
return extract
125+
126+
def test_alt_diffusion_ddim(self):
127+
device = "cpu" # ensure determinism for the device-dependent torch.Generator
128+
unet = self.dummy_cond_unet
129+
scheduler = DDIMScheduler(
130+
beta_start=0.00085,
131+
beta_end=0.012,
132+
beta_schedule="scaled_linear",
133+
clip_sample=False,
134+
set_alpha_to_one=False,
135+
)
136+
137+
vae = self.dummy_vae
138+
bert = self.dummy_text_encoder
139+
tokenizer = XLMRobertaTokenizer.from_pretrained("hf-internal-testing/tiny-xlm-roberta")
140+
tokenizer.model_max_length = 77
141+
142+
# make sure here that pndm scheduler skips prk
143+
alt_pipe = AltDiffusionPipeline(
144+
unet=unet,
145+
scheduler=scheduler,
146+
vae=vae,
147+
text_encoder=bert,
148+
tokenizer=tokenizer,
149+
safety_checker=None,
150+
feature_extractor=self.dummy_extractor,
151+
)
152+
alt_pipe = alt_pipe.to(device)
153+
alt_pipe.set_progress_bar_config(disable=None)
154+
155+
prompt = "A photo of an astronaut"
156+
157+
generator = torch.Generator(device=device).manual_seed(0)
158+
output = alt_pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np")
159+
image = output.images
160+
161+
generator = torch.Generator(device=device).manual_seed(0)
162+
image_from_tuple = alt_pipe(
163+
[prompt],
164+
generator=generator,
165+
guidance_scale=6.0,
166+
num_inference_steps=2,
167+
output_type="np",
168+
return_dict=False,
169+
)[0]
170+
171+
image_slice = image[0, -3:, -3:, -1]
172+
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
173+
174+
assert image.shape == (1, 128, 128, 3)
175+
expected_slice = np.array(
176+
[0.49249017, 0.46064827, 0.4790093, 0.50883967, 0.4811985, 0.51540506, 0.5084924, 0.4860553, 0.47318557]
177+
)
178+
179+
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
180+
assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
181+
182+
def test_alt_diffusion_pndm(self):
183+
device = "cpu" # ensure determinism for the device-dependent torch.Generator
184+
unet = self.dummy_cond_unet
185+
scheduler = PNDMScheduler(skip_prk_steps=True)
186+
vae = self.dummy_vae
187+
bert = self.dummy_text_encoder
188+
tokenizer = XLMRobertaTokenizer.from_pretrained("hf-internal-testing/tiny-xlm-roberta")
189+
tokenizer.model_max_length = 77
190+
191+
# make sure here that pndm scheduler skips prk
192+
alt_pipe = AltDiffusionPipeline(
193+
unet=unet,
194+
scheduler=scheduler,
195+
vae=vae,
196+
text_encoder=bert,
197+
tokenizer=tokenizer,
198+
safety_checker=None,
199+
feature_extractor=self.dummy_extractor,
200+
)
201+
alt_pipe = alt_pipe.to(device)
202+
alt_pipe.set_progress_bar_config(disable=None)
203+
204+
prompt = "A painting of a squirrel eating a burger"
205+
generator = torch.Generator(device=device).manual_seed(0)
206+
output = alt_pipe([prompt], generator=generator, guidance_scale=6.0, num_inference_steps=2, output_type="np")
207+
208+
image = output.images
209+
210+
generator = torch.Generator(device=device).manual_seed(0)
211+
image_from_tuple = alt_pipe(
212+
[prompt],
213+
generator=generator,
214+
guidance_scale=6.0,
215+
num_inference_steps=2,
216+
output_type="np",
217+
return_dict=False,
218+
)[0]
219+
220+
image_slice = image[0, -3:, -3:, -1]
221+
image_from_tuple_slice = image_from_tuple[0, -3:, -3:, -1]
222+
223+
assert image.shape == (1, 128, 128, 3)
224+
expected_slice = np.array(
225+
[0.4786532, 0.45791715, 0.47507674, 0.50763345, 0.48375353, 0.515062, 0.51244247, 0.48673993, 0.47105807]
226+
)
227+
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
228+
assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
229+
230+
@unittest.skipIf(torch_device != "cuda", "This test requires a GPU")
231+
def test_alt_diffusion_fp16(self):
232+
"""Test that stable diffusion works with fp16"""
233+
unet = self.dummy_cond_unet
234+
scheduler = PNDMScheduler(skip_prk_steps=True)
235+
vae = self.dummy_vae
236+
bert = self.dummy_text_encoder
237+
tokenizer = XLMRobertaTokenizer.from_pretrained("hf-internal-testing/tiny-xlm-roberta")
238+
tokenizer.model_max_length = 77
239+
240+
# put models in fp16
241+
unet = unet.half()
242+
vae = vae.half()
243+
bert = bert.half()
244+
245+
# make sure here that pndm scheduler skips prk
246+
alt_pipe = AltDiffusionPipeline(
247+
unet=unet,
248+
scheduler=scheduler,
249+
vae=vae,
250+
text_encoder=bert,
251+
tokenizer=tokenizer,
252+
safety_checker=None,
253+
feature_extractor=self.dummy_extractor,
254+
)
255+
alt_pipe = alt_pipe.to(torch_device)
256+
alt_pipe.set_progress_bar_config(disable=None)
257+
258+
prompt = "A painting of a squirrel eating a burger"
259+
generator = torch.Generator(device=torch_device).manual_seed(0)
260+
image = alt_pipe([prompt], generator=generator, num_inference_steps=2, output_type="np").images
261+
262+
assert image.shape == (1, 128, 128, 3)
263+
264+
265+
@slow
266+
@require_torch_gpu
267+
class AltDiffusionPipelineIntegrationTests(unittest.TestCase):
268+
def tearDown(self):
269+
# clean up the VRAM after each test
270+
super().tearDown()
271+
gc.collect()
272+
torch.cuda.empty_cache()
273+
274+
def test_alt_diffusion(self):
275+
# make sure here that pndm scheduler skips prk
276+
alt_pipe = AltDiffusionPipeline.from_pretrained("BAAI/AltDiffusion", safety_checker=None)
277+
alt_pipe = alt_pipe.to(torch_device)
278+
alt_pipe.set_progress_bar_config(disable=None)
279+
280+
prompt = "A painting of a squirrel eating a burger"
281+
generator = torch.Generator(device=torch_device).manual_seed(0)
282+
with torch.autocast("cuda"):
283+
output = alt_pipe(
284+
[prompt], generator=generator, guidance_scale=6.0, num_inference_steps=20, output_type="np"
285+
)
286+
287+
image = output.images
288+
289+
image_slice = image[0, -3:, -3:, -1]
290+
291+
assert image.shape == (1, 512, 512, 3)
292+
expected_slice = np.array(
293+
[0.8720703, 0.87109375, 0.87402344, 0.87109375, 0.8779297, 0.8925781, 0.8823242, 0.8808594, 0.8613281]
294+
)
295+
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
296+
297+
def test_alt_diffusion_fast_ddim(self):
298+
scheduler = DDIMScheduler.from_pretrained("BAAI/AltDiffusion", subfolder="scheduler")
299+
300+
alt_pipe = AltDiffusionPipeline.from_pretrained("BAAI/AltDiffusion", scheduler=scheduler, safety_checker=None)
301+
alt_pipe = alt_pipe.to(torch_device)
302+
alt_pipe.set_progress_bar_config(disable=None)
303+
304+
prompt = "A painting of a squirrel eating a burger"
305+
generator = torch.Generator(device=torch_device).manual_seed(0)
306+
307+
with torch.autocast("cuda"):
308+
output = alt_pipe([prompt], generator=generator, num_inference_steps=2, output_type="numpy")
309+
image = output.images
310+
311+
image_slice = image[0, -3:, -3:, -1]
312+
313+
assert image.shape == (1, 512, 512, 3)
314+
expected_slice = np.array(
315+
[0.9267578, 0.9301758, 0.9013672, 0.9345703, 0.92578125, 0.94433594, 0.9423828, 0.9423828, 0.9160156]
316+
)
317+
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
318+
319+
def test_alt_diffusion_text2img_pipeline_fp16(self):
320+
torch.cuda.reset_peak_memory_stats()
321+
model_id = "BAAI/AltDiffusion"
322+
pipe = AltDiffusionPipeline.from_pretrained(
323+
model_id, revision="fp16", torch_dtype=torch.float16, safety_checker=None
324+
)
325+
pipe = pipe.to(torch_device)
326+
pipe.set_progress_bar_config(disable=None)
327+
328+
prompt = "a photograph of an astronaut riding a horse"
329+
330+
generator = torch.Generator(device=torch_device).manual_seed(0)
331+
output_chunked = pipe(
332+
[prompt], generator=generator, guidance_scale=7.5, num_inference_steps=10, output_type="numpy"
333+
)
334+
image_chunked = output_chunked.images
335+
336+
generator = torch.Generator(device=torch_device).manual_seed(0)
337+
with torch.autocast(torch_device):
338+
output = pipe(
339+
[prompt], generator=generator, guidance_scale=7.5, num_inference_steps=10, output_type="numpy"
340+
)
341+
image = output.images
342+
343+
# Make sure results are close enough
344+
diff = np.abs(image_chunked.flatten() - image.flatten())
345+
# They ARE different since ops are not run always at the same precision
346+
# however, they should be extremely close.
347+
assert diff.mean() < 2e-2

0 commit comments

Comments
 (0)