@@ -808,6 +808,10 @@ class StableDiffusionGGML {
808
808
// TODO (Pix2Pix): separate image guidance params (right now it's reusing distilled guidance)
809
809
810
810
float img_cfg_scale = guidance;
811
+ if (img_cfg_scale != cfg_scale && !sd_version_use_concat (version)) {
812
+ LOG_WARN (" 2-conditioning CFG is not supported with this model, disabling it..." );
813
+ img_cfg_scale = cfg_scale;
814
+ }
811
815
812
816
LOG_DEBUG (" Sample" );
813
817
struct ggml_init_params params;
@@ -830,9 +834,8 @@ class StableDiffusionGGML {
830
834
831
835
struct ggml_tensor * noised_input = ggml_dup_tensor (work_ctx, noise);
832
836
833
- bool has_unconditioned = cfg_scale != 1.0 && uncond.c_crossattn != NULL ;
834
- bool has_img_guidance = version == VERSION_INSTRUCT_PIX2PIX && cfg_scale != img_cfg_scale;
835
- has_unconditioned = has_unconditioned || has_img_guidance;
837
+ bool has_unconditioned = img_cfg_scale != 1.0 && uncond.c_crossattn != NULL ;
838
+ bool has_img_guidance = cfg_scale != img_cfg_scale && uncond.c_crossattn != NULL ;
836
839
bool has_skiplayer = slg_scale != 0.0 && skip_layers.size () > 0 ;
837
840
838
841
// denoise wrapper
@@ -989,9 +992,13 @@ class StableDiffusionGGML {
989
992
if (has_img_guidance) {
990
993
latent_result = negative_data[i] + img_cfg_scale * (img_cond_data[i] - negative_data[i]) + cfg_scale * (positive_data[i] - img_cond_data[i]);
991
994
} else {
995
+ // img_cfg_scale == cfg_scale
992
996
latent_result = negative_data[i] + cfg_scale * (positive_data[i] - negative_data[i]);
993
997
}
994
998
}
999
+ } else if (has_img_guidance){
1000
+ // img_cfg_scale == 1
1001
+ latent_result = img_cond_data[i] + cfg_scale * (positive_data[i] - img_cond_data[i]);
995
1002
}
996
1003
if (is_skiplayer_step) {
997
1004
latent_result = latent_result + (positive_data[i] - skip_layer_data[i]) * slg_scale;
0 commit comments