@@ -29,7 +29,7 @@ Our work underscores the potential of larger UNet architectures in the first sta
2929
3030Before you can use IF, you need to accept its usage conditions. To do so:
31311. Make sure to have a [Hugging Face account](https://huggingface.co/join) and be logged in
32- 2. Accept the license on the model card of [DeepFloyd/IF-I-IF -v1.0](https://huggingface.co/DeepFloyd/IF-I-IF -v1.0). Accepting the license on the stage I model card will auto accept for the other IF models.
32+ 2. Accept the license on the model card of [DeepFloyd/IF-I-XL -v1.0](https://huggingface.co/DeepFloyd/IF-I-XL -v1.0). Accepting the license on the stage I model card will auto accept for the other IF models.
33333. Make sure to login locally. Install `huggingface_hub`
3434```sh
3535pip install huggingface_hub --upgrade
@@ -62,7 +62,7 @@ The following sections give more in-detail examples of how to use IF. Specifical
6262
6363**Available checkpoints**
6464- *Stage-1*
65- - [DeepFloyd/IF-I-IF -v1.0](https://huggingface.co/DeepFloyd/IF-I-IF -v1.0)
65+ - [DeepFloyd/IF-I-XL -v1.0](https://huggingface.co/DeepFloyd/IF-I-XL -v1.0)
6666 - [DeepFloyd/IF-I-L-v1.0](https://huggingface.co/DeepFloyd/IF-I-L-v1.0)
6767 - [DeepFloyd/IF-I-M-v1.0](https://huggingface.co/DeepFloyd/IF-I-M-v1.0)
6868
@@ -90,7 +90,7 @@ from diffusers.utils import pt_to_pil
9090import torch
9191
9292# stage 1
93- stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
93+ stage_1 = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
9494stage_1.enable_model_cpu_offload()
9595
9696# stage 2
@@ -162,7 +162,7 @@ original_image = Image.open(BytesIO(response.content)).convert("RGB")
162162original_image = original_image.resize((768, 512))
163163
164164# stage 1
165- stage_1 = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
165+ stage_1 = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
166166stage_1.enable_model_cpu_offload()
167167
168168# stage 2
@@ -244,7 +244,7 @@ mask_image = Image.open(BytesIO(response.content))
244244mask_image = mask_image
245245
246246# stage 1
247- stage_1 = IFInpaintingPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
247+ stage_1 = IFInpaintingPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
248248stage_1.enable_model_cpu_offload()
249249
250250# stage 2
@@ -305,7 +305,7 @@ In addition to being loaded with `from_pretrained`, Pipelines can also be loaded
305305```python
306306from diffusers import IFPipeline, IFSuperResolutionPipeline
307307
308- pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0")
308+ pipe_1 = IFPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0")
309309pipe_2 = IFSuperResolutionPipeline.from_pretrained("DeepFloyd/IF-II-L-v1.0")
310310
311311
@@ -326,7 +326,7 @@ pipe_2 = IFInpaintingSuperResolutionPipeline(**pipe_2.components)
326326The simplest optimization to run IF faster is to move all model components to the GPU.
327327
328328```py
329- pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
329+ pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
330330pipe.to("cuda")
331331```
332332
@@ -352,7 +352,7 @@ the input image which also determines how many steps to run in the denoising pro
352352A smaller number will vary the image less but run faster.
353353
354354```py
355- pipe = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
355+ pipe = IFImg2ImgPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
356356pipe.to("cuda")
357357
358358image = pipe(image=image, prompt = " <prompt>" , strength =0.3).images
@@ -364,7 +364,7 @@ with IF and it might not give expected results.
364364```py
365365import torch
366366
367- pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
367+ pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
368368pipe.to("cuda")
369369
370370pipe.text_encoder = torch.compile(pipe.text_encoder)
@@ -378,14 +378,14 @@ When optimizing for GPU memory, we can use the standard diffusers cpu offloading
378378Either the model based CPU offloading,
379379
380380```py
381- pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
381+ pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
382382pipe.enable_model_cpu_offload()
383383```
384384
385385or the more aggressive layer based CPU offloading.
386386
387387```py
388- pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-IF -v1.0", variant = " fp16" , torch_dtype =torch.float16)
388+ pipe = DiffusionPipeline.from_pretrained("DeepFloyd/IF-I-XL -v1.0", variant = " fp16" , torch_dtype =torch.float16)
389389pipe.enable_sequential_cpu_offload()
390390```
391391
@@ -395,13 +395,13 @@ Additionally, T5 can be loaded in 8bit precision
395395from transformers import T5EncoderModel
396396
397397text_encoder = T5EncoderModel.from_pretrained(
398- " DeepFloyd/IF-I-IF -v1.0" , subfolder = " text_encoder" , device_map = " auto" , load_in_8bit =True, variant = " 8bit"
398+ " DeepFloyd/IF-I-XL -v1.0" , subfolder = " text_encoder" , device_map = " auto" , load_in_8bit =True, variant = " 8bit"
399399)
400400
401401from diffusers import DiffusionPipeline
402402
403403pipe = DiffusionPipeline.from_pretrained(
404- " DeepFloyd/IF-I-IF -v1.0" ,
404+ " DeepFloyd/IF-I-XL -v1.0" ,
405405 text_encoder =text_encoder, # pass the previously instantiated 8bit text encoder
406406 unet =None,
407407 device_map = " auto" ,
@@ -422,13 +422,13 @@ from transformers import T5EncoderModel
422422from diffusers.utils import pt_to_pil
423423
424424text_encoder = T5EncoderModel.from_pretrained(
425- " DeepFloyd/IF-I-IF -v1.0" , subfolder = " text_encoder" , device_map = " auto" , load_in_8bit =True, variant = " 8bit"
425+ " DeepFloyd/IF-I-XL -v1.0" , subfolder = " text_encoder" , device_map = " auto" , load_in_8bit =True, variant = " 8bit"
426426)
427427
428428# text to image
429429
430430pipe = DiffusionPipeline.from_pretrained(
431- " DeepFloyd/IF-I-IF -v1.0" ,
431+ " DeepFloyd/IF-I-XL -v1.0" ,
432432 text_encoder =text_encoder, # pass the previously instantiated 8bit text encoder
433433 unet =None,
434434 device_map = " auto" ,
@@ -444,7 +444,7 @@ gc.collect()
444444torch.cuda.empty_cache()
445445
446446pipe = IFPipeline.from_pretrained(
447- " DeepFloyd/IF-I-IF -v1.0" , text_encoder =None, variant = " fp16" , torch_dtype =torch.float16, device_map = " auto"
447+ " DeepFloyd/IF-I-XL -v1.0" , text_encoder =None, variant = " fp16" , torch_dtype =torch.float16, device_map = " auto"
448448)
449449
450450generator = torch.Generator().manual_seed(0)
0 commit comments