Skip to content

Commit 3dc97bd

Browse files
Update CLIPFeatureExtractor to CLIPImageProcessor and DPTFeatureExtractor to DPTImageProcessor (huggingface#9002)
* fix: update `CLIPFeatureExtractor` to `CLIPImageProcessor` in codebase * `make style && make quality` * Update `DPTFeatureExtractor` to `DPTImageProcessor` in codebase * `make style` --------- Co-authored-by: Aryan <[email protected]>
1 parent 6d32b29 commit 3dc97bd

30 files changed

+73
-77
lines changed

docs/source/en/using-diffusers/custom_pipeline_overview.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,9 +289,9 @@ scheduler = DPMSolverMultistepScheduler.from_pretrained(pipe_id, subfolder="sche
289289
3. Load an image processor:
290290

291291
```python
292-
from transformers import CLIPFeatureExtractor
292+
from transformers import CLIPImageProcessor
293293

294-
feature_extractor = CLIPFeatureExtractor.from_pretrained(pipe_id, subfolder="feature_extractor")
294+
feature_extractor = CLIPImageProcessor.from_pretrained(pipe_id, subfolder="feature_extractor")
295295
```
296296

297297
<Tip warning={true}>

docs/source/en/using-diffusers/inference_with_tcd_lora.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,14 +212,14 @@ TCD-LoRA is very versatile, and it can be combined with other adapter types like
212212
import torch
213213
import numpy as np
214214
from PIL import Image
215-
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
215+
from transformers import DPTImageProcessor, DPTForDepthEstimation
216216
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline
217217
from diffusers.utils import load_image, make_image_grid
218218
from scheduling_tcd import TCDScheduler
219219

220220
device = "cuda"
221221
depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device)
222-
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")
222+
feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")
223223

224224
def get_depth_map(image):
225225
image = feature_extractor(images=image, return_tensors="pt").pixel_values.to(device)

docs/source/ko/using-diffusers/loading.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ print(pipeline)
307307

308308
위의 코드 출력 결과를 확인해보면, `pipeline`[`StableDiffusionPipeline`]의 인스턴스이며, 다음과 같이 총 7개의 컴포넌트로 구성된다는 것을 알 수 있습니다.
309309

310-
- `"feature_extractor"`: [`~transformers.CLIPFeatureExtractor`]의 인스턴스
310+
- `"feature_extractor"`: [`~transformers.CLIPImageProcessor`]의 인스턴스
311311
- `"safety_checker"`: 유해한 컨텐츠를 스크리닝하기 위한 [컴포넌트](https://github.com/huggingface/diffusers/blob/e55687e1e15407f60f32242027b7bb8170e58266/src/diffusers/pipelines/stable_diffusion/safety_checker.py#L32)
312312
- `"scheduler"`: [`PNDMScheduler`]의 인스턴스
313313
- `"text_encoder"`: [`~transformers.CLIPTextModel`]의 인스턴스

docs/source/ko/using-diffusers/textual_inversion_inference.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import PIL
2424
from PIL import Image
2525

2626
from diffusers import StableDiffusionPipeline
27-
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
27+
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
2828

2929

3030
def image_grid(imgs, rows, cols):

examples/community/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,9 +1435,9 @@ import requests
14351435
import torch
14361436
from diffusers import DiffusionPipeline
14371437
from PIL import Image
1438-
from transformers import CLIPFeatureExtractor, CLIPModel
1438+
from transformers import CLIPImageProcessor, CLIPModel
14391439

1440-
feature_extractor = CLIPFeatureExtractor.from_pretrained(
1440+
feature_extractor = CLIPImageProcessor.from_pretrained(
14411441
"laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
14421442
)
14431443
clip_model = CLIPModel.from_pretrained(
@@ -2122,15 +2122,15 @@ import torch
21222122
import open_clip
21232123
from open_clip import SimpleTokenizer
21242124
from diffusers import DiffusionPipeline
2125-
from transformers import CLIPFeatureExtractor, CLIPModel
2125+
from transformers import CLIPImageProcessor, CLIPModel
21262126

21272127

21282128
def download_image(url):
21292129
response = requests.get(url)
21302130
return PIL.Image.open(BytesIO(response.content)).convert("RGB")
21312131

21322132
# Loading additional models
2133-
feature_extractor = CLIPFeatureExtractor.from_pretrained(
2133+
feature_extractor = CLIPImageProcessor.from_pretrained(
21342134
"laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
21352135
)
21362136
clip_model = CLIPModel.from_pretrained(

examples/community/clip_guided_images_mixing_stable_diffusion.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import torch
88
from torch.nn import functional as F
99
from torchvision import transforms
10-
from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTextModel, CLIPTokenizer
10+
from transformers import CLIPImageProcessor, CLIPModel, CLIPTextModel, CLIPTokenizer
1111

1212
from diffusers import (
1313
AutoencoderKL,
@@ -86,7 +86,7 @@ def __init__(
8686
tokenizer: CLIPTokenizer,
8787
unet: UNet2DConditionModel,
8888
scheduler: Union[PNDMScheduler, LMSDiscreteScheduler, DDIMScheduler, DPMSolverMultistepScheduler],
89-
feature_extractor: CLIPFeatureExtractor,
89+
feature_extractor: CLIPImageProcessor,
9090
coca_model=None,
9191
coca_tokenizer=None,
9292
coca_transform=None,

examples/community/clip_guided_stable_diffusion_img2img.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from torch import nn
88
from torch.nn import functional as F
99
from torchvision import transforms
10-
from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTextModel, CLIPTokenizer
10+
from transformers import CLIPImageProcessor, CLIPModel, CLIPTextModel, CLIPTokenizer
1111

1212
from diffusers import (
1313
AutoencoderKL,
@@ -32,9 +32,9 @@
3232
import torch
3333
from diffusers import DiffusionPipeline
3434
from PIL import Image
35-
from transformers import CLIPFeatureExtractor, CLIPModel
35+
from transformers import CLIPImageProcessor, CLIPModel
3636
37-
feature_extractor = CLIPFeatureExtractor.from_pretrained(
37+
feature_extractor = CLIPImageProcessor.from_pretrained(
3838
"laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
3939
)
4040
clip_model = CLIPModel.from_pretrained(
@@ -139,7 +139,7 @@ def __init__(
139139
tokenizer: CLIPTokenizer,
140140
unet: UNet2DConditionModel,
141141
scheduler: Union[PNDMScheduler, LMSDiscreteScheduler, DDIMScheduler, DPMSolverMultistepScheduler],
142-
feature_extractor: CLIPFeatureExtractor,
142+
feature_extractor: CLIPImageProcessor,
143143
):
144144
super().__init__()
145145
self.register_modules(

examples/community/mixture_canvas.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from numpy import exp, pi, sqrt
1010
from torchvision.transforms.functional import resize
1111
from tqdm.auto import tqdm
12-
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
12+
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
1313

1414
from diffusers.models import AutoencoderKL, UNet2DConditionModel
1515
from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
@@ -275,7 +275,7 @@ def __init__(
275275
unet: UNet2DConditionModel,
276276
scheduler: Union[DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler],
277277
safety_checker: StableDiffusionSafetyChecker,
278-
feature_extractor: CLIPFeatureExtractor,
278+
feature_extractor: CLIPImageProcessor,
279279
):
280280
super().__init__()
281281
self.register_modules(

examples/community/mixture_tiling.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
try:
1717
from ligo.segments import segment
18-
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
18+
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
1919
except ImportError:
2020
raise ImportError("Please install transformers and ligo-segments to use the mixture pipeline")
2121

@@ -144,7 +144,7 @@ def __init__(
144144
unet: UNet2DConditionModel,
145145
scheduler: Union[DDIMScheduler, PNDMScheduler],
146146
safety_checker: StableDiffusionSafetyChecker,
147-
feature_extractor: CLIPFeatureExtractor,
147+
feature_extractor: CLIPImageProcessor,
148148
):
149149
super().__init__()
150150
self.register_modules(

examples/community/pipeline_stable_diffusion_xl_controlnet_adapter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ class StableDiffusionXLControlNetAdapterPipeline(
189189
safety_checker ([`StableDiffusionSafetyChecker`]):
190190
Classification module that estimates whether generated images could be considered offensive or harmful.
191191
Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details.
192-
feature_extractor ([`CLIPFeatureExtractor`]):
192+
feature_extractor ([`CLIPImageProcessor`]):
193193
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
194194
"""
195195

0 commit comments

Comments
 (0)