Skip to content

Commit e6a48db

Browse files
authored
Refactor Deepfloyd IF tests. (huggingface#6855)
* update * update * update
1 parent 4f1df69 commit e6a48db

File tree

6 files changed

+266
-249
lines changed

6 files changed

+266
-249
lines changed

tests/pipelines/deepfloyd_if/test_if.py

Lines changed: 12 additions & 238 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,16 @@
1414
# limitations under the License.
1515

1616
import gc
17-
import random
1817
import unittest
1918

2019
import torch
2120

2221
from diffusers import (
23-
IFImg2ImgPipeline,
24-
IFImg2ImgSuperResolutionPipeline,
25-
IFInpaintingPipeline,
26-
IFInpaintingSuperResolutionPipeline,
2722
IFPipeline,
28-
IFSuperResolutionPipeline,
2923
)
3024
from diffusers.models.attention_processor import AttnAddedKVProcessor
3125
from diffusers.utils.import_utils import is_xformers_available
32-
from diffusers.utils.testing_utils import floats_tensor, load_numpy, require_torch_gpu, skip_mps, slow, torch_device
26+
from diffusers.utils.testing_utils import load_numpy, require_torch_gpu, skip_mps, slow, torch_device
3327

3428
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_PARAMS
3529
from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
@@ -97,250 +91,30 @@ def tearDown(self):
9791
gc.collect()
9892
torch.cuda.empty_cache()
9993

100-
def test_all(self):
101-
# if
94+
def test_if_text_to_image(self):
95+
pipe = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
96+
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
97+
pipe.enable_model_cpu_offload()
10298

103-
pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL-v1.0", variant="fp16", torch_dtype=torch.float16)
104-
105-
pipe_2 = IFSuperResolutionPipeline.from_pretrained(
106-
"DeepFloyd/IF-II-L-v1.0", variant="fp16", torch_dtype=torch.float16, text_encoder=None, tokenizer=None
107-
)
108-
109-
# pre compute text embeddings and remove T5 to save memory
110-
111-
pipe_1.text_encoder.to("cuda")
112-
113-
prompt_embeds, negative_prompt_embeds = pipe_1.encode_prompt("anime turtle", device="cuda")
114-
115-
del pipe_1.tokenizer
116-
del pipe_1.text_encoder
117-
gc.collect()
118-
119-
pipe_1.tokenizer = None
120-
pipe_1.text_encoder = None
121-
122-
pipe_1.enable_model_cpu_offload()
123-
pipe_2.enable_model_cpu_offload()
124-
125-
pipe_1.unet.set_attn_processor(AttnAddedKVProcessor())
126-
pipe_2.unet.set_attn_processor(AttnAddedKVProcessor())
127-
128-
self._test_if(pipe_1, pipe_2, prompt_embeds, negative_prompt_embeds)
129-
130-
pipe_1.remove_all_hooks()
131-
pipe_2.remove_all_hooks()
132-
133-
# img2img
134-
135-
pipe_1 = IFImg2ImgPipeline(**pipe_1.components)
136-
pipe_2 = IFImg2ImgSuperResolutionPipeline(**pipe_2.components)
137-
138-
pipe_1.enable_model_cpu_offload()
139-
pipe_2.enable_model_cpu_offload()
140-
141-
pipe_1.unet.set_attn_processor(AttnAddedKVProcessor())
142-
pipe_2.unet.set_attn_processor(AttnAddedKVProcessor())
143-
144-
self._test_if_img2img(pipe_1, pipe_2, prompt_embeds, negative_prompt_embeds)
145-
146-
pipe_1.remove_all_hooks()
147-
pipe_2.remove_all_hooks()
148-
149-
# inpainting
150-
151-
pipe_1 = IFInpaintingPipeline(**pipe_1.components)
152-
pipe_2 = IFInpaintingSuperResolutionPipeline(**pipe_2.components)
153-
154-
pipe_1.enable_model_cpu_offload()
155-
pipe_2.enable_model_cpu_offload()
156-
157-
pipe_1.unet.set_attn_processor(AttnAddedKVProcessor())
158-
pipe_2.unet.set_attn_processor(AttnAddedKVProcessor())
159-
160-
self._test_if_inpainting(pipe_1, pipe_2, prompt_embeds, negative_prompt_embeds)
161-
162-
def _test_if(self, pipe_1, pipe_2, prompt_embeds, negative_prompt_embeds):
163-
# pipeline 1
164-
165-
_start_torch_memory_measurement()
99+
torch.cuda.reset_max_memory_allocated()
100+
torch.cuda.empty_cache()
101+
torch.cuda.reset_peak_memory_stats()
166102

167103
generator = torch.Generator(device="cpu").manual_seed(0)
168-
output = pipe_1(
169-
prompt_embeds=prompt_embeds,
170-
negative_prompt_embeds=negative_prompt_embeds,
104+
output = pipe(
105+
prompt="anime turtle",
171106
num_inference_steps=2,
172107
generator=generator,
173108
output_type="np",
174109
)
175110

176111
image = output.images[0]
177112

178-
assert image.shape == (64, 64, 3)
179-
180113
mem_bytes = torch.cuda.max_memory_allocated()
181-
assert mem_bytes < 13 * 10**9
114+
assert mem_bytes < 12 * 10**9
182115

183116
expected_image = load_numpy(
184117
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if.npy"
185118
)
186119
assert_mean_pixel_difference(image, expected_image)
187-
188-
# pipeline 2
189-
190-
_start_torch_memory_measurement()
191-
192-
generator = torch.Generator(device="cpu").manual_seed(0)
193-
194-
image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
195-
196-
output = pipe_2(
197-
prompt_embeds=prompt_embeds,
198-
negative_prompt_embeds=negative_prompt_embeds,
199-
image=image,
200-
generator=generator,
201-
num_inference_steps=2,
202-
output_type="np",
203-
)
204-
205-
image = output.images[0]
206-
207-
assert image.shape == (256, 256, 3)
208-
209-
mem_bytes = torch.cuda.max_memory_allocated()
210-
assert mem_bytes < 4 * 10**9
211-
212-
expected_image = load_numpy(
213-
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if_superresolution_stage_II.npy"
214-
)
215-
assert_mean_pixel_difference(image, expected_image)
216-
217-
def _test_if_img2img(self, pipe_1, pipe_2, prompt_embeds, negative_prompt_embeds):
218-
# pipeline 1
219-
220-
_start_torch_memory_measurement()
221-
222-
image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
223-
224-
generator = torch.Generator(device="cpu").manual_seed(0)
225-
226-
output = pipe_1(
227-
prompt_embeds=prompt_embeds,
228-
negative_prompt_embeds=negative_prompt_embeds,
229-
image=image,
230-
num_inference_steps=2,
231-
generator=generator,
232-
output_type="np",
233-
)
234-
235-
image = output.images[0]
236-
237-
assert image.shape == (64, 64, 3)
238-
239-
mem_bytes = torch.cuda.max_memory_allocated()
240-
assert mem_bytes < 10 * 10**9
241-
242-
expected_image = load_numpy(
243-
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if_img2img.npy"
244-
)
245-
assert_mean_pixel_difference(image, expected_image)
246-
247-
# pipeline 2
248-
249-
_start_torch_memory_measurement()
250-
251-
generator = torch.Generator(device="cpu").manual_seed(0)
252-
253-
original_image = floats_tensor((1, 3, 256, 256), rng=random.Random(0)).to(torch_device)
254-
image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
255-
256-
output = pipe_2(
257-
prompt_embeds=prompt_embeds,
258-
negative_prompt_embeds=negative_prompt_embeds,
259-
image=image,
260-
original_image=original_image,
261-
generator=generator,
262-
num_inference_steps=2,
263-
output_type="np",
264-
)
265-
266-
image = output.images[0]
267-
268-
assert image.shape == (256, 256, 3)
269-
270-
mem_bytes = torch.cuda.max_memory_allocated()
271-
assert mem_bytes < 4 * 10**9
272-
273-
expected_image = load_numpy(
274-
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if_img2img_superresolution_stage_II.npy"
275-
)
276-
assert_mean_pixel_difference(image, expected_image)
277-
278-
def _test_if_inpainting(self, pipe_1, pipe_2, prompt_embeds, negative_prompt_embeds):
279-
# pipeline 1
280-
281-
_start_torch_memory_measurement()
282-
283-
image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
284-
mask_image = floats_tensor((1, 3, 64, 64), rng=random.Random(1)).to(torch_device)
285-
286-
generator = torch.Generator(device="cpu").manual_seed(0)
287-
output = pipe_1(
288-
prompt_embeds=prompt_embeds,
289-
negative_prompt_embeds=negative_prompt_embeds,
290-
image=image,
291-
mask_image=mask_image,
292-
num_inference_steps=2,
293-
generator=generator,
294-
output_type="np",
295-
)
296-
297-
image = output.images[0]
298-
299-
assert image.shape == (64, 64, 3)
300-
301-
mem_bytes = torch.cuda.max_memory_allocated()
302-
assert mem_bytes < 10 * 10**9
303-
304-
expected_image = load_numpy(
305-
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if_inpainting.npy"
306-
)
307-
assert_mean_pixel_difference(image, expected_image)
308-
309-
# pipeline 2
310-
311-
_start_torch_memory_measurement()
312-
313-
generator = torch.Generator(device="cpu").manual_seed(0)
314-
315-
image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
316-
original_image = floats_tensor((1, 3, 256, 256), rng=random.Random(0)).to(torch_device)
317-
mask_image = floats_tensor((1, 3, 256, 256), rng=random.Random(1)).to(torch_device)
318-
319-
output = pipe_2(
320-
prompt_embeds=prompt_embeds,
321-
negative_prompt_embeds=negative_prompt_embeds,
322-
image=image,
323-
mask_image=mask_image,
324-
original_image=original_image,
325-
generator=generator,
326-
num_inference_steps=2,
327-
output_type="np",
328-
)
329-
330-
image = output.images[0]
331-
332-
assert image.shape == (256, 256, 3)
333-
334-
mem_bytes = torch.cuda.max_memory_allocated()
335-
assert mem_bytes < 4 * 10**9
336-
337-
expected_image = load_numpy(
338-
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if_inpainting_superresolution_stage_II.npy"
339-
)
340-
assert_mean_pixel_difference(image, expected_image)
341-
342-
343-
def _start_torch_memory_measurement():
344-
torch.cuda.empty_cache()
345-
torch.cuda.reset_max_memory_allocated()
346-
torch.cuda.reset_peak_memory_stats()
120+
pipe.remove_all_hooks()

tests/pipelines/deepfloyd_if/test_if_img2img.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,22 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
import gc
1617
import random
1718
import unittest
1819

1920
import torch
2021

2122
from diffusers import IFImg2ImgPipeline
23+
from diffusers.models.attention_processor import AttnAddedKVProcessor
2224
from diffusers.utils.import_utils import is_xformers_available
23-
from diffusers.utils.testing_utils import floats_tensor, skip_mps, torch_device
25+
from diffusers.utils.testing_utils import floats_tensor, load_numpy, require_torch_gpu, skip_mps, slow, torch_device
2426

2527
from ..pipeline_params import (
2628
TEXT_GUIDED_IMAGE_VARIATION_BATCH_PARAMS,
2729
TEXT_GUIDED_IMAGE_VARIATION_PARAMS,
2830
)
29-
from ..test_pipelines_common import PipelineTesterMixin
31+
from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
3032
from . import IFPipelineTesterMixin
3133

3234

@@ -87,3 +89,43 @@ def test_inference_batch_single_identical(self):
8789
self._test_inference_batch_single_identical(
8890
expected_max_diff=1e-2,
8991
)
92+
93+
94+
@slow
95+
@require_torch_gpu
96+
class IFImg2ImgPipelineSlowTests(unittest.TestCase):
97+
def tearDown(self):
98+
# clean up the VRAM after each test
99+
super().tearDown()
100+
gc.collect()
101+
torch.cuda.empty_cache()
102+
103+
def test_if_img2img(self):
104+
pipe = IFImg2ImgPipeline.from_pretrained(
105+
"DeepFloyd/IF-I-L-v1.0",
106+
variant="fp16",
107+
torch_dtype=torch.float16,
108+
)
109+
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
110+
pipe.enable_model_cpu_offload()
111+
112+
image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
113+
generator = torch.Generator(device="cpu").manual_seed(0)
114+
output = pipe(
115+
prompt="anime turtle",
116+
image=image,
117+
num_inference_steps=2,
118+
generator=generator,
119+
output_type="np",
120+
)
121+
image = output.images[0]
122+
123+
mem_bytes = torch.cuda.max_memory_allocated()
124+
assert mem_bytes < 12 * 10**9
125+
126+
expected_image = load_numpy(
127+
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/if/test_if_img2img.npy"
128+
)
129+
assert_mean_pixel_difference(image, expected_image)
130+
131+
pipe.remove_all_hooks()

0 commit comments

Comments
 (0)