@@ -57,6 +57,50 @@ prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
5757image = pipe(prompt=prompt).images[0]
5858```
5959
60+ ### Image-to-image
61+
62+ You can use SDXL as follows for * image-to-image* :
63+
64+ ``` py
65+ import torch
66+ from diffusers import StableDiffusionXLImg2ImgPipeline
67+ from diffusers.utils import load_image
68+
69+ pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(
70+ " stabilityai/stable-diffusion-xl-refiner-0.9" , torch_dtype = torch.float16, variant = " fp16" , use_safetensors = True
71+ )
72+ pipe = pipe.to(" cuda" )
73+ url = " https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"
74+
75+ init_image = load_image(url).convert(" RGB" )
76+ prompt = " a photo of an astronaut riding a horse on mars"
77+ image = pipe(prompt, image = init_image).images[0 ]
78+ ```
79+
80+ ### Inpainting
81+
82+ You can use SDXL as follows for * inpainting*
83+
84+ ``` py
85+ import torch
86+ from diffusers import StableDiffusionXLInpaintPipeline
87+ from diffusers.utils import load_image
88+
89+ pipe = StableDiffusionXLInpaintPipeline.from_pretrained(
90+ " stabilityai/stable-diffusion-xl-base-0.9" , torch_dtype = torch.float16, variant = " fp16" , use_safetensors = True
91+ )
92+ pipe.to(" cuda" )
93+
94+ img_url = " https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
95+ mask_url = " https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
96+
97+ init_image = load_image(img_url).convert(" RGB" )
98+ mask_image = load_image(mask_url).convert(" RGB" )
99+
100+ prompt = " A majestic tiger sitting on a bench"
101+ image = pipe(prompt = prompt, image = init_image, mask_image = mask_image, num_inference_steps = 50 , strength = 0.80 ).images[0 ]
102+ ```
103+
60104### Refining the image output
61105
62106In addition to the [ base model checkpoint] ( https://huggingface.co/stabilityai/stable-diffusion-xl-base-0.9 ) ,
@@ -183,24 +227,65 @@ image = refiner(prompt=prompt, image=image[None, :]).images[0]
183227| ---| ---|
184228| ![ ] ( https://huggingface.co/datasets/diffusers/docs-images/resolve/main/sd_xl/init_image.png ) | ![ ] ( https://huggingface.co/datasets/diffusers/docs-images/resolve/main/sd_xl/refined_image.png ) |
185229
186- ### Image-to-image
230+ < Tip >
187231
188- ``` py
189- import torch
190- from diffusers import StableDiffusionXLImg2ImgPipeline
232+ The refiner can also very well be used in an in-painting setting. To do so just make
233+ sure you use the [ ` StableDiffusionXLInpaintPipeline ` ] classes as shown below
234+
235+ </Tip >
236+
237+ To use the refiner for inpainting in the Ensemble of Expert Denoisers setting you can do the following:
238+
239+ ``` py
240+ from diffusers import StableDiffusionXLInpaintPipeline
191241from diffusers.utils import load_image
192242
193- pipe = StableDiffusionXLImg2ImgPipeline .from_pretrained(
194- " stabilityai/stable-diffusion-xl-refiner -0.9" , torch_dtype = torch.float16
243+ pipe = StableDiffusionXLInpaintPipeline .from_pretrained(
244+ " stabilityai/stable-diffusion-xl-base -0.9" , torch_dtype = torch.float16, variant = " fp16 " , use_safetensors = True
195245)
196- pipe = pipe.to(" cuda" )
197- url = " https://huggingface.co/datasets/patrickvonplaten/images/resolve/main/aa_xl/000000009.png"
246+ pipe.to(" cuda" )
198247
199- init_image = load_image(url).convert(" RGB" )
200- prompt = " a photo of an astronaut riding a horse on mars"
201- image = pipe(prompt, image = init_image).images[0 ]
248+ refiner = StableDiffusionXLInpaintPipeline.from_pretrained(
249+ " stabilityai/stable-diffusion-xl-refiner-0.9" ,
250+ text_encoder_2 = pipe.text_encoder_2,
251+ vae = pipe.vae,
252+ torch_dtype = torch.float16,
253+ use_safetensors = True ,
254+ variant = " fp16" ,
255+ )
256+ refiner.to(" cuda" )
257+
258+ img_url = " https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
259+ mask_url = " https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
260+
261+ init_image = load_image(img_url).convert(" RGB" )
262+ mask_image = load_image(mask_url).convert(" RGB" )
263+
264+ prompt = " A majestic tiger sitting on a bench"
265+ num_inference_steps = 75
266+ high_noise_frac = 0.7
267+
268+ image = pipe(
269+ prompt = prompt,
270+ image = init_image,
271+ mask_image = mask_image,
272+ num_inference_steps = num_inference_steps,
273+ strength = 0.80 ,
274+ denoising_start = high_noise_frac,
275+ output_type = " latent" ,
276+ ).images
277+ image = refiner(
278+ prompt = prompt,
279+ image = image,
280+ mask_image = mask_image,
281+ num_inference_steps = num_inference_steps,
282+ denoising_start = high_noise_frac,
283+ ).images[0 ]
202284```
203285
286+ To use the refiner for inpainting in the standard SDE-style setting, simply remove ` denoising_end ` and ` denoising_start ` and choose a smaller
287+ number of inference steps for the refiner.
288+
204289### Loading single file checkpoints / original file format
205290
206291By making use of [ ` ~diffusers.loaders.FromSingleFileMixin.from_single_file ` ] you can also load the
@@ -271,3 +356,9 @@ pip install xformers
271356[[ autodoc]] StableDiffusionXLImg2ImgPipeline
272357 - all
273358 - __ call__
359+
360+ ## StableDiffusionXLInpaintPipeline
361+
362+ [[ autodoc]] StableDiffusionXLInpaintPipeline
363+ - all
364+ - __ call__
0 commit comments