Skip to content

Commit bb99623

Browse files
authored
Update IP Adapter tests to use cosine similarity distance (huggingface#6806)
* update * update
1 parent fdf55b1 commit bb99623

File tree

1 file changed

+27
-19
lines changed

1 file changed

+27
-19
lines changed

tests/pipelines/ip_adapters/test_ip_adapter_stable_diffusion.py

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from diffusers.utils import load_image
3636
from diffusers.utils.testing_utils import (
3737
enable_full_determinism,
38+
numpy_cosine_similarity_distance,
3839
require_torch_gpu,
3940
slow,
4041
torch_device,
@@ -119,7 +120,8 @@ def test_text_to_image(self):
119120

120121
expected_slice = np.array([0.80810547, 0.88183594, 0.9296875, 0.9189453, 0.9848633, 1.0, 0.97021484, 1.0, 1.0])
121122

122-
assert np.allclose(image_slice, expected_slice, atol=1e-3)
123+
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
124+
assert max_diff < 5e-4
123125

124126
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus_sd15.bin")
125127

@@ -131,7 +133,8 @@ def test_text_to_image(self):
131133
[0.30444336, 0.26513672, 0.22436523, 0.2758789, 0.25585938, 0.20751953, 0.25390625, 0.24633789, 0.21923828]
132134
)
133135

134-
assert np.allclose(image_slice, expected_slice, atol=1e-3)
136+
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
137+
assert max_diff < 5e-4
135138

136139
def test_image_to_image(self):
137140
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
@@ -149,7 +152,8 @@ def test_image_to_image(self):
149152
[0.22167969, 0.21875, 0.21728516, 0.22607422, 0.21948242, 0.23925781, 0.22387695, 0.25268555, 0.2722168]
150153
)
151154

152-
assert np.allclose(image_slice, expected_slice, atol=1e-3)
155+
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
156+
assert max_diff < 5e-4
153157

154158
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus_sd15.bin")
155159

@@ -161,7 +165,8 @@ def test_image_to_image(self):
161165
[0.35913086, 0.265625, 0.26367188, 0.24658203, 0.19750977, 0.39990234, 0.15258789, 0.20336914, 0.5517578]
162166
)
163167

164-
assert np.allclose(image_slice, expected_slice, atol=1e-3)
168+
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
169+
assert max_diff < 5e-4
165170

166171
def test_inpainting(self):
167172
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
@@ -179,19 +184,17 @@ def test_inpainting(self):
179184
[0.27148438, 0.24047852, 0.22167969, 0.23217773, 0.21118164, 0.21142578, 0.21875, 0.20751953, 0.20019531]
180185
)
181186

182-
assert np.allclose(image_slice, expected_slice, atol=1e-3)
187+
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
188+
assert max_diff < 5e-4
183189

184190
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="models", weight_name="ip-adapter-plus_sd15.bin")
185191

186192
inputs = self.get_dummy_inputs(for_inpainting=True)
187193
images = pipeline(**inputs).images
188194
image_slice = images[0, :3, :3, -1].flatten()
189195

190-
expected_slice = np.array(
191-
[0.27294922, 0.24023438, 0.21948242, 0.23242188, 0.20825195, 0.2055664, 0.21679688, 0.20336914, 0.19360352]
192-
)
193-
194-
assert np.allclose(image_slice, expected_slice, atol=1e-3)
196+
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
197+
assert max_diff < 5e-4
195198

196199
def test_text_to_image_model_cpu_offload(self):
197200
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
@@ -233,11 +236,10 @@ def test_text_to_image_full_face(self):
233236
images = pipeline(**inputs).images
234237
image_slice = images[0, :3, :3, -1].flatten()
235238

236-
expected_slice = np.array(
237-
[0.18115234, 0.13500977, 0.13427734, 0.24194336, 0.17138672, 0.16625977, 0.4260254, 0.43359375, 0.4416504]
238-
)
239+
expected_slice = np.array([0.1958, 0.1475, 0.1396, 0.2412, 0.1658, 0.1533, 0.3997, 0.4055, 0.4128])
239240

240-
assert np.allclose(image_slice, expected_slice, atol=1e-3)
241+
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
242+
assert max_diff < 5e-4
241243

242244
def test_unload(self):
243245
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
@@ -277,7 +279,9 @@ def test_multi(self):
277279
expected_slice = np.array(
278280
[0.5234375, 0.53515625, 0.5629883, 0.57128906, 0.59521484, 0.62109375, 0.57910156, 0.6201172, 0.6508789]
279281
)
280-
assert np.allclose(image_slice, expected_slice, atol=1e-3)
282+
283+
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
284+
assert max_diff < 5e-4
281285

282286

283287
@slow
@@ -314,7 +318,8 @@ def test_text_to_image_sdxl(self):
314318
]
315319
)
316320

317-
assert np.allclose(image_slice, expected_slice, atol=1e-3)
321+
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
322+
assert max_diff < 5e-4
318323

319324
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
320325

@@ -339,7 +344,8 @@ def test_text_to_image_sdxl(self):
339344
[0.0576596, 0.05600825, 0.04479006, 0.05288461, 0.05461192, 0.05137569, 0.04867965, 0.05301541, 0.04939842]
340345
)
341346

342-
assert np.allclose(image_slice, expected_slice, atol=1e-3)
347+
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
348+
assert max_diff < 5e-4
343349

344350
def test_image_to_image_sdxl(self):
345351
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="sdxl_models/image_encoder")
@@ -432,7 +438,8 @@ def test_inpainting_sdxl(self):
432438
[0.14181179, 0.1493012, 0.14283323, 0.14602411, 0.14915377, 0.15015268, 0.14725655, 0.15009224, 0.15164584]
433439
)
434440

435-
assert np.allclose(image_slice, expected_slice, atol=1e-3)
441+
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
442+
assert max_diff < 5e-4
436443

437444
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
438445
feature_extractor = self.get_image_processor("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k")
@@ -457,4 +464,5 @@ def test_inpainting_sdxl(self):
457464

458465
expected_slice = np.array([0.1398, 0.1476, 0.1407, 0.1442, 0.1470, 0.1480, 0.1449, 0.1481, 0.1494])
459466

460-
assert np.allclose(image_slice, expected_slice, atol=1e-4, rtol=1e-4)
467+
max_diff = numpy_cosine_similarity_distance(image_slice, expected_slice)
468+
assert max_diff < 5e-4

0 commit comments

Comments
 (0)