@@ -532,7 +532,7 @@ def __call__(
532532 width : Optional [int ] = None ,
533533 num_inference_steps : int = 50 ,
534534 sigmas : Optional [List [float ]] = None ,
535- guidance_scale : float = 1.0 ,
535+ guidance_scale : Optional [ float ] = None ,
536536 num_images_per_prompt : int = 1 ,
537537 generator : Optional [Union [torch .Generator , List [torch .Generator ]]] = None ,
538538 latents : Optional [torch .Tensor ] = None ,
@@ -559,7 +559,12 @@ def __call__(
559559 `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `true_cfg_scale` is
560560 not greater than `1`).
561561 true_cfg_scale (`float`, *optional*, defaults to 1.0):
562- When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance.
562+ true_cfg_scale (`float`, *optional*, defaults to 1.0): Guidance scale as defined in [Classifier-Free
563+ Diffusion Guidance](https://huggingface.co/papers/2207.12598). `true_cfg_scale` is defined as `w` of
564+ equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Classifier-free guidance is
565+ enabled by setting `true_cfg_scale > 1` and a provided `negative_prompt`. Higher guidance scale
566+ encourages to generate images that are closely linked to the text `prompt`, usually at the expense of
567+ lower image quality.
563568 height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
564569 The height in pixels of the generated image. This is set to 1024 by default for the best results.
565570 width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
@@ -571,17 +576,16 @@ def __call__(
571576 Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
572577 their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
573578 will be used.
574- guidance_scale (`float`, *optional*, defaults to 3.5):
575- Guidance scale as defined in [Classifier-Free Diffusion
576- Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
577- of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
578- `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
579- the text `prompt`, usually at the expense of lower image quality.
580-
581- This parameter in the pipeline is there to support future guidance-distilled models when they come up.
582- Note that passing `guidance_scale` to the pipeline is ineffective. To enable classifier-free guidance,
583- please pass `true_cfg_scale` and `negative_prompt` (even an empty negative prompt like " ") should
584- enable classifier-free guidance computations.
579+ guidance_scale (`float`, *optional*, defaults to None):
580+ A guidance scale value for guidance distilled models. Unlike the traditional classifier-free guidance
581+ where the guidance scale is applied during inference through noise prediction rescaling, guidance
582+ distilled models take the guidance scale directly as an input parameter during forward pass. Guidance
583+ scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images
584+ that are closely linked to the text `prompt`, usually at the expense of lower image quality. This
585+ parameter in the pipeline is there to support future guidance-distilled models when they come up. It is
586+ ignored when not using guidance distilled models. To enable traditional classifier-free guidance,
587+ please pass `true_cfg_scale > 1.0` and `negative_prompt` (even an empty negative prompt like " " should
588+ enable classifier-free guidance computations).
585589 num_images_per_prompt (`int`, *optional*, defaults to 1):
586590 The number of images to generate per prompt.
587591 generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
@@ -672,6 +676,16 @@ def __call__(
672676 has_neg_prompt = negative_prompt is not None or (
673677 negative_prompt_embeds is not None and negative_prompt_embeds_mask is not None
674678 )
679+
680+ if true_cfg_scale > 1 and not has_neg_prompt :
681+ logger .warning (
682+ f"true_cfg_scale is passed as { true_cfg_scale } , but classifier-free guidance is not enabled since no negative_prompt is provided."
683+ )
684+ elif true_cfg_scale <= 1 and has_neg_prompt :
685+ logger .warning (
686+ " negative_prompt is passed but classifier-free guidance is not enabled since true_cfg_scale <= 1"
687+ )
688+
675689 do_true_cfg = true_cfg_scale > 1 and has_neg_prompt
676690 prompt_embeds , prompt_embeds_mask = self .encode_prompt (
677691 image = prompt_image ,
@@ -734,10 +748,17 @@ def __call__(
734748 self ._num_timesteps = len (timesteps )
735749
736750 # handle guidance
737- if self .transformer .config .guidance_embeds :
751+ if self .transformer .config .guidance_embeds and guidance_scale is None :
752+ raise ValueError ("guidance_scale is required for guidance-distilled model." )
753+ elif self .transformer .config .guidance_embeds :
738754 guidance = torch .full ([1 ], guidance_scale , device = device , dtype = torch .float32 )
739755 guidance = guidance .expand (latents .shape [0 ])
740- else :
756+ elif not self .transformer .config .guidance_embeds and guidance_scale is not None :
757+ logger .warning (
758+ f"guidance_scale is passed as { guidance_scale } , but ignored since the model is not guidance-distilled."
759+ )
760+ guidance = None
761+ elif not self .transformer .config .guidance_embeds and guidance_scale is None :
741762 guidance = None
742763
743764 if self .attention_kwargs is None :
0 commit comments