Skip to content

Commit 5cbed8e

Browse files
authored
Fix inpainting script (huggingface#258)
* expand latents before the check, style * update readme
1 parent 11133dc commit 5cbed8e

File tree

2 files changed

+58
-14
lines changed

2 files changed

+58
-14
lines changed

examples/inference/inpainting.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
1212

1313

14-
def preprocess(image):
14+
def preprocess_image(image):
1515
w, h = image.size
1616
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
1717
image = image.resize((w, h), resample=PIL.Image.LANCZOS)
@@ -20,15 +20,16 @@ def preprocess(image):
2020
image = torch.from_numpy(image)
2121
return 2.0 * image - 1.0
2222

23+
2324
def preprocess_mask(mask):
24-
mask=mask.convert("L")
25+
mask = mask.convert("L")
2526
w, h = mask.size
2627
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
27-
mask = mask.resize((w//8, h//8), resample=PIL.Image.NEAREST)
28+
mask = mask.resize((w // 8, h // 8), resample=PIL.Image.NEAREST)
2829
mask = np.array(mask).astype(np.float32) / 255.0
29-
mask = np.tile(mask,(4,1,1))
30-
mask = mask[None].transpose(0, 1, 2, 3)#what does this step do?
31-
mask = 1 - mask #repaint white, keep black
30+
mask = np.tile(mask, (4, 1, 1))
31+
mask = mask[None].transpose(0, 1, 2, 3) # what does this step do?
32+
mask = 1 - mask # repaint white, keep black
3233
mask = torch.from_numpy(mask)
3334
return mask
3435

@@ -90,25 +91,25 @@ def __call__(
9091

9192
self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs)
9293

93-
#preprocess image
94-
init_image = preprocess(init_image).to(self.device)
94+
# preprocess image
95+
init_image = preprocess_image(init_image).to(self.device)
9596

9697
# encode the init image into latents and scale the latents
9798
init_latents = self.vae.encode(init_image).sample()
9899
init_latents = 0.18215 * init_latents
100+
101+
# prepare init_latents noise to latents
102+
init_latents = torch.cat([init_latents] * batch_size)
99103
init_latents_orig = init_latents
100104

101105
# preprocess mask
102106
mask = preprocess_mask(mask_image).to(self.device)
103107
mask = torch.cat([mask] * batch_size)
104108

105-
#check sizes
109+
# check sizes
106110
if not mask.shape == init_latents.shape:
107111
raise ValueError(f"The mask and init_image should be the same size!")
108112

109-
# prepare init_latents noise to latents
110-
init_latents = torch.cat([init_latents] * batch_size)
111-
112113
# get the original timestep using init_timestep
113114
init_timestep = int(num_inference_steps * strength) + offset
114115
init_timestep = min(init_timestep, num_inference_steps)
@@ -172,9 +173,9 @@ def __call__(
172173
# compute the previous noisy sample x_t -> x_t-1
173174
latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs)["prev_sample"]
174175

175-
#masking
176+
# masking
176177
init_latents_proper = self.scheduler.add_noise(init_latents_orig, noise, t)
177-
latents = ( init_latents_proper * mask ) + ( latents * (1-mask) )
178+
latents = (init_latents_proper * mask) + (latents * (1 - mask))
178179

179180
# scale and decode the image latents with vae
180181
latents = 1 / 0.18215 * latents

examples/inference/readme.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,46 @@ You can also run this example on colab [![Open In Colab](https://colab.research.
5252
## Tweak prompts reusing seeds and latents
5353

5454
You can generate your own latents to reproduce results, or tweak your prompt on a specific result you liked. [This notebook](stable-diffusion-seeds.ipynb) shows how to do it step by step. You can also run it in Google Colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pcuenca/diffusers-examples/blob/main/notebooks/stable-diffusion-seeds.ipynb).
55+
56+
57+
## In-painting using Stable Diffusion
58+
59+
The `inpainting.py` script implements `StableDiffusionInpaintingPipeline`. This script lets you edit specific parts of an image by providing a mask and text prompt.
60+
61+
### How to use it
62+
63+
```python
64+
from io import BytesIO
65+
66+
from torch import autocast
67+
import requests
68+
import PIL
69+
70+
from inpainting import StableDiffusionInpaintingPipeline
71+
72+
def download_image(url):
73+
response = requests.get(url)
74+
return PIL.Image.open(BytesIO(response.content)).convert("RGB")
75+
76+
img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
77+
mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
78+
79+
init_image = download_image(img_url).resize((512, 512))
80+
mask_image = download_image(mask_url).resize((512, 512))
81+
82+
device = "cuda"
83+
pipe = StableDiffusionInpaintingPipeline.from_pretrained(
84+
"CompVis/stable-diffusion-v1-4",
85+
revision="fp16",
86+
torch_dtype=torch.float16,
87+
use_auth_token=True
88+
).to(device)
89+
90+
prompt = "a cat sitting on a bench"
91+
with autocast("cuda"):
92+
images = pipe(prompt=prompt, init_image=init_image, mask_image=mask_image, strength=0.75)["sample"]
93+
94+
images[0].save("cat_on_bench.png")
95+
```
96+
97+
You can also run this example on colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patil-suraj/Notebooks/blob/master/in_painting_with_stable_diffusion_using_diffusers.ipynb)

0 commit comments

Comments
 (0)