@@ -112,30 +112,45 @@ def test_hasher_zeros():
112112
113113@ignore_warnings (category = DeprecationWarning )
114114def test_hasher_alternate_sign ():
115- # the last two tokens produce a hash collision that sums as 0
116- X = [["foo" , "bar" , "baz" , "investigation need" , "records" ]]
115+ X = [list ("Thequickbrownfoxjumped" )]
117116
118117 Xt = FeatureHasher (alternate_sign = True , non_negative = False ,
119118 input_type = 'string' ).fit_transform (X )
120- assert_true (Xt .data .min () < 0 and Xt .data .max () > 0 )
121- # check that we have a collision that produces a 0 count
122- assert_true (len (Xt .data ) < len (X [0 ]))
123- assert_true ((Xt .data == 0. ).any ())
119+ assert Xt .data .min () < 0 and Xt .data .max () > 0
124120
125121 Xt = FeatureHasher (alternate_sign = True , non_negative = True ,
126122 input_type = 'string' ).fit_transform (X )
127- assert_true (( Xt .data >= 0 ). all ()) # all counts are positive
128- assert_true (( Xt . data == 0. ). any ()) # we still have a collision
123+ assert Xt .data . min () > 0
124+
129125 Xt = FeatureHasher (alternate_sign = False , non_negative = True ,
130126 input_type = 'string' ).fit_transform (X )
131- assert_true (( Xt .data > 0 ). all ()) # strictly positive counts
127+ assert Xt .data . min () > 0
132128 Xt_2 = FeatureHasher (alternate_sign = False , non_negative = False ,
133129 input_type = 'string' ).fit_transform (X )
134130 # With initially positive features, the non_negative option should
135131 # have no impact when alternate_sign=False
136132 assert_array_equal (Xt .data , Xt_2 .data )
137133
138134
135+ @ignore_warnings (category = DeprecationWarning )
136+ def test_hash_collisions ():
137+ X = [list ("Thequickbrownfoxjumped" )]
138+
139+ Xt = FeatureHasher (alternate_sign = True , non_negative = False ,
140+ n_features = 1 , input_type = 'string' ).fit_transform (X )
141+ # check that some of the hashed tokens are added
142+ # with an opposite sign and cancel out
143+ assert abs (Xt .data [0 ]) < len (X [0 ])
144+
145+ Xt = FeatureHasher (alternate_sign = True , non_negative = True ,
146+ n_features = 1 , input_type = 'string' ).fit_transform (X )
147+ assert abs (Xt .data [0 ]) < len (X [0 ])
148+
149+ Xt = FeatureHasher (alternate_sign = False , non_negative = True ,
150+ n_features = 1 , input_type = 'string' ).fit_transform (X )
151+ assert Xt .data [0 ] == len (X [0 ])
152+
153+
139154@ignore_warnings (category = DeprecationWarning )
140155def test_hasher_negative ():
141156 X = [{"foo" : 2 , "bar" : - 4 , "baz" : - 1 }.items ()]
0 commit comments