@@ -584,41 +584,115 @@ def test_pairwise_distances_chunked():
584584 assert_raises (StopIteration , next , gen )
585585
586586
587- def test_euclidean_distances ():
588- # Check the pairwise Euclidean distances computation
589- X = [[0 ]]
590- Y = [[1 ], [2 ]]
587+ @pytest .mark .parametrize ("x_array_constr" , [np .array , csr_matrix ],
588+ ids = ["dense" , "sparse" ])
589+ @pytest .mark .parametrize ("y_array_constr" , [np .array , csr_matrix ],
590+ ids = ["dense" , "sparse" ])
591+ def test_euclidean_distances_known_result (x_array_constr , y_array_constr ):
592+ # Check the pairwise Euclidean distances computation on known result
593+ X = x_array_constr ([[0 ]])
594+ Y = y_array_constr ([[1 ], [2 ]])
591595 D = euclidean_distances (X , Y )
592- assert_array_almost_equal (D , [[1. , 2. ]])
596+ assert_allclose (D , [[1. , 2. ]])
593597
594- X = csr_matrix (X )
595- Y = csr_matrix (Y )
596- D = euclidean_distances (X , Y )
597- assert_array_almost_equal (D , [[1. , 2. ]])
598598
599+ @pytest .mark .parametrize ("dtype" , [np .float32 , np .float64 ])
600+ @pytest .mark .parametrize ("y_array_constr" , [np .array , csr_matrix ],
601+ ids = ["dense" , "sparse" ])
602+ def test_euclidean_distances_with_norms (dtype , y_array_constr ):
603+ # check that we still get the right answers with {X,Y}_norm_squared
604+ # and that we get a wrong answer with wrong {X,Y}_norm_squared
599605 rng = np .random .RandomState (0 )
600- X = rng .random_sample ((10 , 4 ))
601- Y = rng .random_sample ((20 , 4 ))
602- X_norm_sq = (X ** 2 ).sum (axis = 1 ).reshape (1 , - 1 )
603- Y_norm_sq = (Y ** 2 ).sum (axis = 1 ).reshape (1 , - 1 )
606+ X = rng .random_sample ((10 , 10 )).astype (dtype , copy = False )
607+ Y = rng .random_sample ((20 , 10 )).astype (dtype , copy = False )
608+
609+ # norms will only be used if their dtype is float64
610+ X_norm_sq = (X .astype (np .float64 ) ** 2 ).sum (axis = 1 ).reshape (1 , - 1 )
611+ Y_norm_sq = (Y .astype (np .float64 ) ** 2 ).sum (axis = 1 ).reshape (1 , - 1 )
612+
613+ Y = y_array_constr (Y )
604614
605- # check that we still get the right answers with {X,Y}_norm_squared
606615 D1 = euclidean_distances (X , Y )
607616 D2 = euclidean_distances (X , Y , X_norm_squared = X_norm_sq )
608617 D3 = euclidean_distances (X , Y , Y_norm_squared = Y_norm_sq )
609618 D4 = euclidean_distances (X , Y , X_norm_squared = X_norm_sq ,
610619 Y_norm_squared = Y_norm_sq )
611- assert_array_almost_equal (D2 , D1 )
612- assert_array_almost_equal (D3 , D1 )
613- assert_array_almost_equal (D4 , D1 )
620+ assert_allclose (D2 , D1 )
621+ assert_allclose (D3 , D1 )
622+ assert_allclose (D4 , D1 )
614623
615624 # check we get the wrong answer with wrong {X,Y}_norm_squared
616- X_norm_sq *= 0.5
617- Y_norm_sq *= 0.5
618625 wrong_D = euclidean_distances (X , Y ,
619626 X_norm_squared = np .zeros_like (X_norm_sq ),
620627 Y_norm_squared = np .zeros_like (Y_norm_sq ))
621- assert_greater (np .max (np .abs (wrong_D - D1 )), .01 )
628+ with pytest .raises (AssertionError ):
629+ assert_allclose (wrong_D , D1 )
630+
631+
632+ @pytest .mark .parametrize ("dtype" , [np .float32 , np .float64 ])
633+ @pytest .mark .parametrize ("x_array_constr" , [np .array , csr_matrix ],
634+ ids = ["dense" , "sparse" ])
635+ @pytest .mark .parametrize ("y_array_constr" , [np .array , csr_matrix ],
636+ ids = ["dense" , "sparse" ])
637+ def test_euclidean_distances (dtype , x_array_constr , y_array_constr ):
638+ # check that euclidean distances gives same result as scipy cdist
639+ # when X and Y != X are provided
640+ rng = np .random .RandomState (0 )
641+ X = rng .random_sample ((100 , 10 )).astype (dtype , copy = False )
642+ X [X < 0.8 ] = 0
643+ Y = rng .random_sample ((10 , 10 )).astype (dtype , copy = False )
644+ Y [Y < 0.8 ] = 0
645+
646+ expected = cdist (X , Y )
647+
648+ X = x_array_constr (X )
649+ Y = y_array_constr (Y )
650+ distances = euclidean_distances (X , Y )
651+
652+ # the default rtol=1e-7 is too close to the float32 precision
653+ # and fails due too rounding errors.
654+ assert_allclose (distances , expected , rtol = 1e-6 )
655+ assert distances .dtype == dtype
656+
657+
658+ @pytest .mark .parametrize ("dtype" , [np .float32 , np .float64 ])
659+ @pytest .mark .parametrize ("x_array_constr" , [np .array , csr_matrix ],
660+ ids = ["dense" , "sparse" ])
661+ def test_euclidean_distances_sym (dtype , x_array_constr ):
662+ # check that euclidean distances gives same result as scipy pdist
663+ # when only X is provided
664+ rng = np .random .RandomState (0 )
665+ X = rng .random_sample ((100 , 10 )).astype (dtype , copy = False )
666+ X [X < 0.8 ] = 0
667+
668+ expected = squareform (pdist (X ))
669+
670+ X = x_array_constr (X )
671+ distances = euclidean_distances (X )
672+
673+ # the default rtol=1e-7 is too close to the float32 precision
674+ # and fails due too rounding errors.
675+ assert_allclose (distances , expected , rtol = 1e-6 )
676+ assert distances .dtype == dtype
677+
678+
679+ @pytest .mark .parametrize (
680+ "dtype, eps, rtol" ,
681+ [(np .float32 , 1e-4 , 1e-5 ),
682+ pytest .param (
683+ np .float64 , 1e-8 , 0.99 ,
684+ marks = pytest .mark .xfail (reason = 'failing due to lack of precision' ))])
685+ @pytest .mark .parametrize ("dim" , [1 , 1000000 ])
686+ def test_euclidean_distances_extreme_values (dtype , eps , rtol , dim ):
687+ # check that euclidean distances is correct with float32 input thanks to
688+ # upcasting. On float64 there are still precision issues.
689+ X = np .array ([[1. ] * dim ], dtype = dtype )
690+ Y = np .array ([[1. + eps ] * dim ], dtype = dtype )
691+
692+ distances = euclidean_distances (X , Y )
693+ expected = cdist (X , Y )
694+
695+ assert_allclose (distances , expected , rtol = 1e-5 )
622696
623697
624698def test_cosine_distances ():
0 commit comments