|
14 | 14 | # limitations under the License. |
15 | 15 |
|
16 | 16 | import gc |
17 | | -import random |
18 | 17 | import unittest |
19 | 18 |
|
20 | 19 | import torch |
21 | 20 |
|
22 | 21 | from diffusers import ( |
23 | | - IFImg2ImgPipeline, |
24 | | - IFImg2ImgSuperResolutionPipeline, |
25 | | - IFInpaintingPipeline, |
26 | | - IFInpaintingSuperResolutionPipeline, |
27 | 22 | IFPipeline, |
28 | | - IFSuperResolutionPipeline, |
29 | 23 | ) |
30 | 24 | from diffusers.models.attention_processor import AttnAddedKVProcessor |
31 | 25 | from diffusers.utils.import_utils import is_xformers_available |
32 | | -from diffusers.utils.testing_utils import floats_tensor, load_numpy, require_torch_gpu, skip_mps, slow, torch_device |
| 26 | +from diffusers.utils.testing_utils import load_numpy, require_torch_gpu, skip_mps, slow, torch_device |
33 | 27 |
|
34 | 28 | from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS |
35 | 29 | from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference |
@@ -97,250 +91,30 @@ def tearDown(self): |
97 | 91 | gc.collect() |
98 | 92 | torch.cuda.empty_cache() |
99 | 93 |
|
100 | | - def test_all(self): |
101 | | - # if |
| 94 | + def test_if_text_to_image(self): |
| 95 | + pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) |
| 96 | + pipe.unet.set_attn_processor(AttnAddedKVProcessor()) |
| 97 | + pipe.enable_model_cpu_offload() |
102 | 98 |
|
103 | | - pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16) |
104 | | - |
105 | | - pipe_2 = IFSuperResolutionPipeline.from_pretrained( |
106 | | - "DeepFloyd/IF-II-L-v1.0", variant="fp16", torch_dtype=torch.float16, text_encoder=None, tokenizer=None |
107 | | - ) |
108 | | - |
109 | | - # pre compute text embeddings and remove T5 to save memory |
110 | | - |
111 | | - pipe_1.text_encoder.to("cuda") |
112 | | - |
113 | | - prompt_embeds, negative_prompt_embeds = pipe_1.encode_prompt("anime turtle", device="cuda") |
114 | | - |
115 | | - del pipe_1.tokenizer |
116 | | - del pipe_1.text_encoder |
117 | | - gc.collect() |
118 | | - |
119 | | - pipe_1.tokenizer = None |
120 | | - pipe_1.text_encoder = None |
121 | | - |
122 | | - pipe_1.enable_model_cpu_offload() |
123 | | - pipe_2.enable_model_cpu_offload() |
124 | | - |
125 | | - pipe_1.unet.set_attn_processor(AttnAddedKVProcessor()) |
126 | | - pipe_2.unet.set_attn_processor(AttnAddedKVProcessor()) |
127 | | - |
128 | | - self._test_if(pipe_1, pipe_2, prompt_embeds, negative_prompt_embeds) |
129 | | - |
130 | | - pipe_1.remove_all_hooks() |
131 | | - pipe_2.remove_all_hooks() |
132 | | - |
133 | | - # img2img |
134 | | - |
135 | | - pipe_1 = IFImg2ImgPipeline(**pipe_1.components) |
136 | | - pipe_2 = IFImg2ImgSuperResolutionPipeline(**pipe_2.components) |
137 | | - |
138 | | - pipe_1.enable_model_cpu_offload() |
139 | | - pipe_2.enable_model_cpu_offload() |
140 | | - |
141 | | - pipe_1.unet.set_attn_processor(AttnAddedKVProcessor()) |
142 | | - pipe_2.unet.set_attn_processor(AttnAddedKVProcessor()) |
143 | | - |
144 | | - self._test_if_img2img(pipe_1, pipe_2, prompt_embeds, negative_prompt_embeds) |
145 | | - |
146 | | - pipe_1.remove_all_hooks() |
147 | | - pipe_2.remove_all_hooks() |
148 | | - |
149 | | - # inpainting |
150 | | - |
151 | | - pipe_1 = IFInpaintingPipeline(**pipe_1.components) |
152 | | - pipe_2 = IFInpaintingSuperResolutionPipeline(**pipe_2.components) |
153 | | - |
154 | | - pipe_1.enable_model_cpu_offload() |
155 | | - pipe_2.enable_model_cpu_offload() |
156 | | - |
157 | | - pipe_1.unet.set_attn_processor(AttnAddedKVProcessor()) |
158 | | - pipe_2.unet.set_attn_processor(AttnAddedKVProcessor()) |
159 | | - |
160 | | - self._test_if_inpainting(pipe_1, pipe_2, prompt_embeds, negative_prompt_embeds) |
161 | | - |
162 | | - def _test_if(self, pipe_1, pipe_2, prompt_embeds, negative_prompt_embeds): |
163 | | - # pipeline 1 |
164 | | - |
165 | | - _start_torch_memory_measurement() |
| 99 | + torch.cuda.reset_max_memory_allocated() |
| 100 | + torch.cuda.empty_cache() |
| 101 | + torch.cuda.reset_peak_memory_stats() |
166 | 102 |
|
167 | 103 | generator = torch.Generator(device="cpu").manual_seed(0) |
168 | | - output = pipe_1( |
169 | | - prompt_embeds=prompt_embeds, |
170 | | - negative_prompt_embeds=negative_prompt_embeds, |
| 104 | + output = pipe( |
| 105 | + prompt="anime turtle", |
171 | 106 | num_inference_steps=2, |
172 | 107 | generator=generator, |
173 | 108 | output_type="np", |
174 | 109 | ) |
175 | 110 |
|
176 | 111 | image = output.images[0] |
177 | 112 |
|
178 | | - assert image.shape == (64, 64, 3) |
179 | | - |
180 | 113 | mem_bytes = torch.cuda.max_memory_allocated() |
181 | | - assert mem_bytes < 13 * 10**9 |
| 114 | + assert mem_bytes < 12 * 10**9 |
182 | 115 |
|
183 | 116 | expected_image = load_numpy( |
184 | 117 | "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if.npy" |
185 | 118 | ) |
186 | 119 | assert_mean_pixel_difference(image, expected_image) |
187 | | - |
188 | | - # pipeline 2 |
189 | | - |
190 | | - _start_torch_memory_measurement() |
191 | | - |
192 | | - generator = torch.Generator(device="cpu").manual_seed(0) |
193 | | - |
194 | | - image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device) |
195 | | - |
196 | | - output = pipe_2( |
197 | | - prompt_embeds=prompt_embeds, |
198 | | - negative_prompt_embeds=negative_prompt_embeds, |
199 | | - image=image, |
200 | | - generator=generator, |
201 | | - num_inference_steps=2, |
202 | | - output_type="np", |
203 | | - ) |
204 | | - |
205 | | - image = output.images[0] |
206 | | - |
207 | | - assert image.shape == (256, 256, 3) |
208 | | - |
209 | | - mem_bytes = torch.cuda.max_memory_allocated() |
210 | | - assert mem_bytes < 4 * 10**9 |
211 | | - |
212 | | - expected_image = load_numpy( |
213 | | - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if_superresolution_stage_II.npy" |
214 | | - ) |
215 | | - assert_mean_pixel_difference(image, expected_image) |
216 | | - |
217 | | - def _test_if_img2img(self, pipe_1, pipe_2, prompt_embeds, negative_prompt_embeds): |
218 | | - # pipeline 1 |
219 | | - |
220 | | - _start_torch_memory_measurement() |
221 | | - |
222 | | - image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device) |
223 | | - |
224 | | - generator = torch.Generator(device="cpu").manual_seed(0) |
225 | | - |
226 | | - output = pipe_1( |
227 | | - prompt_embeds=prompt_embeds, |
228 | | - negative_prompt_embeds=negative_prompt_embeds, |
229 | | - image=image, |
230 | | - num_inference_steps=2, |
231 | | - generator=generator, |
232 | | - output_type="np", |
233 | | - ) |
234 | | - |
235 | | - image = output.images[0] |
236 | | - |
237 | | - assert image.shape == (64, 64, 3) |
238 | | - |
239 | | - mem_bytes = torch.cuda.max_memory_allocated() |
240 | | - assert mem_bytes < 10 * 10**9 |
241 | | - |
242 | | - expected_image = load_numpy( |
243 | | - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if_img2img.npy" |
244 | | - ) |
245 | | - assert_mean_pixel_difference(image, expected_image) |
246 | | - |
247 | | - # pipeline 2 |
248 | | - |
249 | | - _start_torch_memory_measurement() |
250 | | - |
251 | | - generator = torch.Generator(device="cpu").manual_seed(0) |
252 | | - |
253 | | - original_image = floats_tensor((1, 3, 256, 256), rng=random.Random(0)).to(torch_device) |
254 | | - image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device) |
255 | | - |
256 | | - output = pipe_2( |
257 | | - prompt_embeds=prompt_embeds, |
258 | | - negative_prompt_embeds=negative_prompt_embeds, |
259 | | - image=image, |
260 | | - original_image=original_image, |
261 | | - generator=generator, |
262 | | - num_inference_steps=2, |
263 | | - output_type="np", |
264 | | - ) |
265 | | - |
266 | | - image = output.images[0] |
267 | | - |
268 | | - assert image.shape == (256, 256, 3) |
269 | | - |
270 | | - mem_bytes = torch.cuda.max_memory_allocated() |
271 | | - assert mem_bytes < 4 * 10**9 |
272 | | - |
273 | | - expected_image = load_numpy( |
274 | | - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if_img2img_superresolution_stage_II.npy" |
275 | | - ) |
276 | | - assert_mean_pixel_difference(image, expected_image) |
277 | | - |
278 | | - def _test_if_inpainting(self, pipe_1, pipe_2, prompt_embeds, negative_prompt_embeds): |
279 | | - # pipeline 1 |
280 | | - |
281 | | - _start_torch_memory_measurement() |
282 | | - |
283 | | - image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device) |
284 | | - mask_image = floats_tensor((1, 3, 64, 64), rng=random.Random(1)).to(torch_device) |
285 | | - |
286 | | - generator = torch.Generator(device="cpu").manual_seed(0) |
287 | | - output = pipe_1( |
288 | | - prompt_embeds=prompt_embeds, |
289 | | - negative_prompt_embeds=negative_prompt_embeds, |
290 | | - image=image, |
291 | | - mask_image=mask_image, |
292 | | - num_inference_steps=2, |
293 | | - generator=generator, |
294 | | - output_type="np", |
295 | | - ) |
296 | | - |
297 | | - image = output.images[0] |
298 | | - |
299 | | - assert image.shape == (64, 64, 3) |
300 | | - |
301 | | - mem_bytes = torch.cuda.max_memory_allocated() |
302 | | - assert mem_bytes < 10 * 10**9 |
303 | | - |
304 | | - expected_image = load_numpy( |
305 | | - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if_inpainting.npy" |
306 | | - ) |
307 | | - assert_mean_pixel_difference(image, expected_image) |
308 | | - |
309 | | - # pipeline 2 |
310 | | - |
311 | | - _start_torch_memory_measurement() |
312 | | - |
313 | | - generator = torch.Generator(device="cpu").manual_seed(0) |
314 | | - |
315 | | - image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device) |
316 | | - original_image = floats_tensor((1, 3, 256, 256), rng=random.Random(0)).to(torch_device) |
317 | | - mask_image = floats_tensor((1, 3, 256, 256), rng=random.Random(1)).to(torch_device) |
318 | | - |
319 | | - output = pipe_2( |
320 | | - prompt_embeds=prompt_embeds, |
321 | | - negative_prompt_embeds=negative_prompt_embeds, |
322 | | - image=image, |
323 | | - mask_image=mask_image, |
324 | | - original_image=original_image, |
325 | | - generator=generator, |
326 | | - num_inference_steps=2, |
327 | | - output_type="np", |
328 | | - ) |
329 | | - |
330 | | - image = output.images[0] |
331 | | - |
332 | | - assert image.shape == (256, 256, 3) |
333 | | - |
334 | | - mem_bytes = torch.cuda.max_memory_allocated() |
335 | | - assert mem_bytes < 4 * 10**9 |
336 | | - |
337 | | - expected_image = load_numpy( |
338 | | - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if_inpainting_superresolution_stage_II.npy" |
339 | | - ) |
340 | | - assert_mean_pixel_difference(image, expected_image) |
341 | | - |
342 | | - |
343 | | -def _start_torch_memory_measurement(): |
344 | | - torch.cuda.empty_cache() |
345 | | - torch.cuda.reset_max_memory_allocated() |
346 | | - torch.cuda.reset_peak_memory_stats() |
| 120 | + pipe.remove_all_hooks() |
0 commit comments