Skip to content

Commit 51b3ffd

Browse files
authored
Merge pull request #211 from huggingface/main
Merge changes
2 parents 51172ea + c372615 commit 51b3ffd

File tree

241 files changed

+14482
-2960
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

241 files changed

+14482
-2960
lines changed

.github/workflows/nightly_tests.yml

+104
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ jobs:
142142
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
143143
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
144144
CUBLAS_WORKSPACE_CONFIG: :16:8
145+
RUN_COMPILE: yes
145146
run: |
146147
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
147148
-s -v -k "not Flax and not Onnx" \
@@ -180,6 +181,55 @@ jobs:
180181
pip install slack_sdk tabulate
181182
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
182183
184+
run_torch_compile_tests:
185+
name: PyTorch Compile CUDA tests
186+
187+
runs-on:
188+
group: aws-g4dn-2xlarge
189+
190+
container:
191+
image: diffusers/diffusers-pytorch-compile-cuda
192+
options: --gpus 0 --shm-size "16gb" --ipc host
193+
194+
steps:
195+
- name: Checkout diffusers
196+
uses: actions/checkout@v3
197+
with:
198+
fetch-depth: 2
199+
200+
- name: NVIDIA-SMI
201+
run: |
202+
nvidia-smi
203+
- name: Install dependencies
204+
run: |
205+
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
206+
python -m uv pip install -e [quality,test,training]
207+
- name: Environment
208+
run: |
209+
python utils/print_env.py
210+
- name: Run torch compile tests on GPU
211+
env:
212+
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
213+
RUN_COMPILE: yes
214+
run: |
215+
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "compile" --make-reports=tests_torch_compile_cuda tests/
216+
- name: Failure short reports
217+
if: ${{ failure() }}
218+
run: cat reports/tests_torch_compile_cuda_failures_short.txt
219+
220+
- name: Test suite reports artifacts
221+
if: ${{ always() }}
222+
uses: actions/upload-artifact@v4
223+
with:
224+
name: torch_compile_test_reports
225+
path: reports
226+
227+
- name: Generate Report and Notify Channel
228+
if: always()
229+
run: |
230+
pip install slack_sdk tabulate
231+
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
232+
183233
run_big_gpu_torch_tests:
184234
name: Torch tests on big GPU
185235
strategy:
@@ -476,6 +526,60 @@ jobs:
476526
pip install slack_sdk tabulate
477527
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
478528
529+
run_nightly_pipeline_level_quantization_tests:
530+
name: Torch quantization nightly tests
531+
strategy:
532+
fail-fast: false
533+
max-parallel: 2
534+
runs-on:
535+
group: aws-g6e-xlarge-plus
536+
container:
537+
image: diffusers/diffusers-pytorch-cuda
538+
options: --shm-size "20gb" --ipc host --gpus 0
539+
steps:
540+
- name: Checkout diffusers
541+
uses: actions/checkout@v3
542+
with:
543+
fetch-depth: 2
544+
- name: NVIDIA-SMI
545+
run: nvidia-smi
546+
- name: Install dependencies
547+
run: |
548+
python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
549+
python -m uv pip install -e [quality,test]
550+
python -m uv pip install -U bitsandbytes optimum_quanto
551+
python -m uv pip install pytest-reportlog
552+
- name: Environment
553+
run: |
554+
python utils/print_env.py
555+
- name: Pipeline-level quantization tests on GPU
556+
env:
557+
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
558+
# https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
559+
CUBLAS_WORKSPACE_CONFIG: :16:8
560+
BIG_GPU_MEMORY: 40
561+
run: |
562+
python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
563+
--make-reports=tests_pipeline_level_quant_torch_cuda \
564+
--report-log=tests_pipeline_level_quant_torch_cuda.log \
565+
tests/quantization/test_pipeline_level_quantization.py
566+
- name: Failure short reports
567+
if: ${{ failure() }}
568+
run: |
569+
cat reports/tests_pipeline_level_quant_torch_cuda_stats.txt
570+
cat reports/tests_pipeline_level_quant_torch_cuda_failures_short.txt
571+
- name: Test suite reports artifacts
572+
if: ${{ always() }}
573+
uses: actions/upload-artifact@v4
574+
with:
575+
name: torch_cuda_pipeline_level_quant_reports
576+
path: reports
577+
- name: Generate Report and Notify Channel
578+
if: always()
579+
run: |
580+
pip install slack_sdk tabulate
581+
python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
582+
479583
# M1 runner currently not well supported
480584
# TODO: (Dhruv) add these back when we setup better testing for Apple Silicon
481585
# run_nightly_tests_apple_m1:

.github/workflows/pr_tests.yml

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ on:
1111
- "tests/**.py"
1212
- ".github/**.yml"
1313
- "utils/**.py"
14+
- "setup.py"
1415
push:
1516
branches:
1617
- ci-*

.github/workflows/release_tests_fast.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ jobs:
335335
- name: Environment
336336
run: |
337337
python utils/print_env.py
338-
- name: Run example tests on GPU
338+
- name: Run torch compile tests on GPU
339339
env:
340340
HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
341341
RUN_COMPILE: yes

docs/source/en/_toctree.yml

+24-17
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,8 @@
1717
title: AutoPipeline
1818
- local: tutorials/basic_training
1919
title: Train a diffusion model
20-
- local: tutorials/using_peft_for_inference
21-
title: Load LoRAs for inference
2220
- local: tutorials/fast_diffusion
2321
title: Accelerate inference of text-to-image diffusion models
24-
- local: tutorials/inference_with_big_models
25-
title: Working with big models
2622
title: Tutorials
2723
- sections:
2824
- local: using-diffusers/loading
@@ -33,11 +29,24 @@
3329
title: Load schedulers and models
3430
- local: using-diffusers/other-formats
3531
title: Model files and layouts
36-
- local: using-diffusers/loading_adapters
37-
title: Load adapters
3832
- local: using-diffusers/push_to_hub
3933
title: Push files to the Hub
4034
title: Load pipelines and adapters
35+
- sections:
36+
- local: tutorials/using_peft_for_inference
37+
title: LoRA
38+
- local: using-diffusers/ip_adapter
39+
title: IP-Adapter
40+
- local: using-diffusers/controlnet
41+
title: ControlNet
42+
- local: using-diffusers/t2i_adapter
43+
title: T2I-Adapter
44+
- local: using-diffusers/dreambooth
45+
title: DreamBooth
46+
- local: using-diffusers/textual_inversion_inference
47+
title: Textual inversion
48+
title: Adapters
49+
isExpanded: false
4150
- sections:
4251
- local: using-diffusers/unconditional_image_generation
4352
title: Unconditional image generation
@@ -59,8 +68,6 @@
5968
title: Create a server
6069
- local: training/distributed_inference
6170
title: Distributed inference
62-
- local: using-diffusers/merge_loras
63-
title: Merge LoRAs
6471
- local: using-diffusers/scheduler_features
6572
title: Scheduler features
6673
- local: using-diffusers/callback
@@ -97,20 +104,12 @@
97104
title: SDXL Turbo
98105
- local: using-diffusers/kandinsky
99106
title: Kandinsky
100-
- local: using-diffusers/ip_adapter
101-
title: IP-Adapter
102107
- local: using-diffusers/omnigen
103108
title: OmniGen
104109
- local: using-diffusers/pag
105110
title: PAG
106-
- local: using-diffusers/controlnet
107-
title: ControlNet
108-
- local: using-diffusers/t2i_adapter
109-
title: T2I-Adapter
110111
- local: using-diffusers/inference_with_lcm
111112
title: Latent Consistency Model
112-
- local: using-diffusers/textual_inversion_inference
113-
title: Textual inversion
114113
- local: using-diffusers/shap-e
115114
title: Shap-E
116115
- local: using-diffusers/diffedit
@@ -180,7 +179,7 @@
180179
title: Quantization Methods
181180
- sections:
182181
- local: optimization/fp16
183-
title: Speed up inference
182+
title: Accelerate inference
184183
- local: optimization/memory
185184
title: Reduce memory usage
186185
- local: optimization/torch2.0
@@ -296,6 +295,8 @@
296295
title: CogView4Transformer2DModel
297296
- local: api/models/consisid_transformer3d
298297
title: ConsisIDTransformer3DModel
298+
- local: api/models/cosmos_transformer3d
299+
title: CosmosTransformer3DModel
299300
- local: api/models/dit_transformer2d
300301
title: DiTTransformer2DModel
301302
- local: api/models/easyanimate_transformer3d
@@ -364,6 +365,8 @@
364365
title: AutoencoderKLAllegro
365366
- local: api/models/autoencoderkl_cogvideox
366367
title: AutoencoderKLCogVideoX
368+
- local: api/models/autoencoderkl_cosmos
369+
title: AutoencoderKLCosmos
367370
- local: api/models/autoencoder_kl_hunyuan_video
368371
title: AutoencoderKLHunyuanVideo
369372
- local: api/models/autoencoderkl_ltx_video
@@ -434,6 +437,8 @@
434437
title: ControlNet-XS with Stable Diffusion XL
435438
- local: api/pipelines/controlnet_union
436439
title: ControlNetUnion
440+
- local: api/pipelines/cosmos
441+
title: Cosmos
437442
- local: api/pipelines/dance_diffusion
438443
title: Dance Diffusion
439444
- local: api/pipelines/ddim
@@ -452,6 +457,8 @@
452457
title: Flux
453458
- local: api/pipelines/control_flux_inpaint
454459
title: FluxControlInpaint
460+
- local: api/pipelines/framepack
461+
title: Framepack
455462
- local: api/pipelines/hidream
456463
title: HiDream-I1
457464
- local: api/pipelines/hunyuandit
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
4+
the License. You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
9+
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10+
specific language governing permissions and limitations under the License. -->
11+
12+
# AutoencoderKLCosmos
13+
14+
[Cosmos Tokenizers](https://github.com/NVIDIA/Cosmos-Tokenizer).
15+
16+
Supported models:
17+
- [nvidia/Cosmos-1.0-Tokenizer-CV8x8x8](https://huggingface.co/nvidia/Cosmos-1.0-Tokenizer-CV8x8x8)
18+
19+
The model can be loaded with the following code snippet.
20+
21+
```python
22+
from diffusers import AutoencoderKLCosmos
23+
24+
vae = AutoencoderKLCosmos.from_pretrained("nvidia/Cosmos-1.0-Tokenizer-CV8x8x8", subfolder="vae")
25+
```
26+
27+
## AutoencoderKLCosmos
28+
29+
[[autodoc]] AutoencoderKLCosmos
30+
- decode
31+
- encode
32+
- all
33+
34+
## AutoencoderKLOutput
35+
36+
[[autodoc]] models.autoencoders.autoencoder_kl.AutoencoderKLOutput
37+
38+
## DecoderOutput
39+
40+
[[autodoc]] models.autoencoders.vae.DecoderOutput
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
4+
the License. You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
9+
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10+
specific language governing permissions and limitations under the License. -->
11+
12+
# CosmosTransformer3DModel
13+
14+
A Diffusion Transformer model for 3D video-like data was introduced in [Cosmos World Foundation Model Platform for Physical AI](https://huggingface.co/papers/2501.03575) by NVIDIA.
15+
16+
The model can be loaded with the following code snippet.
17+
18+
```python
19+
from diffusers import CosmosTransformer3DModel
20+
21+
transformer = CosmosTransformer3DModel.from_pretrained("nvidia/Cosmos-1.0-Diffusion-7B-Text2World", subfolder="transformer", torch_dtype=torch.bfloat16)
22+
```
23+
24+
## CosmosTransformer3DModel
25+
26+
[[autodoc]] CosmosTransformer3DModel
27+
28+
## Transformer2DModelOutput
29+
30+
[[autodoc]] models.modeling_outputs.Transformer2DModelOutput

docs/source/en/api/pipelines/animatediff.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -966,7 +966,7 @@ pipe.to("cuda")
966966
prompt = {
967967
0: "A caterpillar on a leaf, high quality, photorealistic",
968968
40: "A caterpillar transforming into a cocoon, on a leaf, near flowers, photorealistic",
969-
80: "A cocoon on a leaf, flowers in the backgrond, photorealistic",
969+
80: "A cocoon on a leaf, flowers in the background, photorealistic",
970970
120: "A cocoon maturing and a butterfly being born, flowers and leaves visible in the background, photorealistic",
971971
160: "A beautiful butterfly, vibrant colors, sitting on a leaf, flowers in the background, photorealistic",
972972
200: "A beautiful butterfly, flying away in a forest, photorealistic",
+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License. -->
14+
15+
# Cosmos
16+
17+
[Cosmos World Foundation Model Platform for Physical AI](https://huggingface.co/papers/2501.03575) by NVIDIA.
18+
19+
*Physical AI needs to be trained digitally first. It needs a digital twin of itself, the policy model, and a digital twin of the world, the world model. In this paper, we present the Cosmos World Foundation Model Platform to help developers build customized world models for their Physical AI setups. We position a world foundation model as a general-purpose world model that can be fine-tuned into customized world models for downstream applications. Our platform covers a video curation pipeline, pre-trained world foundation models, examples of post-training of pre-trained world foundation models, and video tokenizers. To help Physical AI builders solve the most critical problems of our society, we make our platform open-source and our models open-weight with permissive licenses available via https://github.com/NVIDIA/Cosmos.*
20+
21+
<Tip>
22+
23+
Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) to learn how to explore the tradeoff between scheduler speed and quality, and see the [reuse components across pipelines](../../using-diffusers/loading#reuse-a-pipeline) section to learn how to efficiently load the same components into multiple pipelines.
24+
25+
</Tip>
26+
27+
## CosmosTextToWorldPipeline
28+
29+
[[autodoc]] CosmosTextToWorldPipeline
30+
- all
31+
- __call__
32+
33+
## CosmosVideoToWorldPipeline
34+
35+
[[autodoc]] CosmosVideoToWorldPipeline
36+
- all
37+
- __call__
38+
39+
## CosmosPipelineOutput
40+
41+
[[autodoc]] pipelines.cosmos.pipeline_output.CosmosPipelineOutput

0 commit comments

Comments
 (0)