@@ -319,9 +319,11 @@ def test_sgd_proba(self):
319319 clf = self .factory (loss = "hinge" , alpha = 0.01 , n_iter = 10 ).fit (X , Y )
320320 assert_raises (NotImplementedError , clf .predict_proba , [3 , 2 ])
321321
322- # the log and modified_huber losses can output "probability" estimates
323- for loss in ("log" , "modified_huber" ):
324- clf = self .factory (loss = loss , alpha = 0.01 , n_iter = 10 ).fit (X , Y )
322+ # log and modified_huber losses can output probability estimates
323+ # binary case
324+ for loss in ["log" , "modified_huber" ]:
325+ clf = self .factory (loss = "modified_huber" , alpha = 0.01 , n_iter = 10 )
326+ clf .fit (X , Y )
325327 p = clf .predict_proba ([3 , 2 ])
326328 assert_true (p [0 , 1 ] > 0.5 )
327329 p = clf .predict_proba ([- 1 , - 1 ])
@@ -332,6 +334,49 @@ def test_sgd_proba(self):
332334 p = clf .predict_log_proba ([- 1 , - 1 ])
333335 assert_true (p [0 , 1 ] < p [0 , 0 ])
334336
337+ # log loss multiclass probability estimates
338+ clf = self .factory (loss = "log" , alpha = 0.01 , n_iter = 10 ).fit (X2 , Y2 )
339+
340+ d = clf .decision_function ([[.1 , - .1 ], [.3 , .2 ]])
341+ p = clf .predict_proba ([[.1 , - .1 ], [.3 , .2 ]])
342+ assert_array_equal (np .argmax (p , axis = 1 ), np .argmax (d , axis = 1 ))
343+ assert_almost_equal (p [0 ].sum (), 1 )
344+ assert_true (np .all (p [0 ] >= 0 ))
345+
346+ p = clf .predict_proba ([- 1 , - 1 ])
347+ d = clf .decision_function ([- 1 , - 1 ])
348+ assert_array_equal (np .argsort (p [0 ]), np .argsort (d [0 ]))
349+
350+ l = clf .predict_log_proba ([3 , 2 ])
351+ p = clf .predict_proba ([3 , 2 ])
352+ assert_array_almost_equal (np .log (p ), l )
353+
354+ l = clf .predict_log_proba ([- 1 , - 1 ])
355+ p = clf .predict_proba ([- 1 , - 1 ])
356+ assert_array_almost_equal (np .log (p ), l )
357+
358+ # Modified Huber multiclass probability estimates; requires a separate
359+ # test because the hard zero/one probabilities may destroy the
360+ # ordering present in decision_function output.
361+ clf = self .factory (loss = "modified_huber" , alpha = 0.01 , n_iter = 10 )
362+ clf .fit (X2 , Y2 )
363+ d = clf .decision_function ([3 , 2 ])
364+ p = clf .predict_proba ([3 , 2 ])
365+ if not isinstance (self , SparseSGDClassifierTestCase ):
366+ assert_equal (np .argmax (d , axis = 1 ), np .argmax (p , axis = 1 ))
367+ else : # XXX the sparse test gets a different X2 (?)
368+ assert_equal (np .argmin (d , axis = 1 ), np .argmin (p , axis = 1 ))
369+
370+ # the following sample produces decision_function values < -1,
371+ # which would cause naive normalization to fail (see comment
372+ # in SGDClassifier.predict_proba)
373+ x = X .mean (axis = 0 )
374+ d = clf .decision_function (x )
375+ if np .all (d < - 1 ): # XXX not true in sparse test case (why?)
376+ p = clf .predict_proba (x )
377+ assert_array_almost_equal (p [0 ], [1 / 3. ] * 3 )
378+
379+
335380 def test_sgd_l1 (self ):
336381 """Test L1 regularization"""
337382 n = len (X4 )
0 commit comments