44from scipy import linalg
55
66from sklearn .cross_validation import train_test_split
7- from sklearn .externals import joblib
87from sklearn .utils .testing import assert_array_almost_equal
98from sklearn .utils .testing import assert_true
109from sklearn .utils .testing import assert_less
@@ -154,7 +153,8 @@ def test_no_path_precomputed():
154153
155154
156155def test_no_path_all_precomputed ():
157- # Test that the ``return_path=False`` option with Gram and Xy remains correct
156+ # Test that the ``return_path=False`` option with Gram and Xy remains
157+ # correct
158158 X , y = 3 * diabetes .data , diabetes .target
159159 G = np .dot (X .T , X )
160160 Xy = np .dot (X .T , y )
@@ -182,13 +182,13 @@ def test_rank_deficient_design():
182182 # deficient input data (with n_features < rank) in the same way
183183 # as coordinate descent Lasso
184184 y = [5 , 0 , 5 ]
185- for X in ([[5 , 0 ],
186- [0 , 5 ],
185+ for X in ([[5 , 0 ],
186+ [0 , 5 ],
187187 [10 , 10 ]],
188188
189- [[10 , 10 , 0 ],
190- [1e-32 , 0 , 0 ],
191- [0 , 0 , 1 ]],
189+ [[10 , 10 , 0 ],
190+ [1e-32 , 0 , 0 ],
191+ [0 , 0 , 1 ]],
192192 ):
193193 # To be able to use the coefs to compute the objective function,
194194 # we need to turn off normalization
@@ -318,9 +318,9 @@ def test_lasso_lars_vs_lasso_cd_ill_conditioned2():
318318 # Note it used to be the case that Lars had to use the drop for good
319319 # strategy for this but this is no longer the case with the
320320 # equality_tolerance checks
321- X = [[1e20 , 1e20 , 0 ],
322- [- 1e-32 , 0 , 0 ],
323- [1 , 1 , 1 ]]
321+ X = [[1e20 , 1e20 , 0 ],
322+ [- 1e-32 , 0 , 0 ],
323+ [1 , 1 , 1 ]]
324324 y = [10 , 10 , 1 ]
325325 alpha = .0001
326326
@@ -449,53 +449,36 @@ def test_lars_path_positive_constraint():
449449 # we do the test on the diabetes dataset
450450
451451 # ensure that we get negative coefficients when positive=False
452- # for method 'lar' (default)
453- alpha , active , coefs = linear_model .lars_path (
454- diabetes ['data' ], diabetes ['target' ],
455- return_path = True ,
456- method = 'lar' ,
457- positive = False )
458- assert_true (coefs .min () < 0 )
459- # for method 'lasso'
460- alpha , active , coefs = linear_model .lars_path (
461- diabetes ['data' ], diabetes ['target' ],
462- return_path = True ,
463- method = 'lasso' ,
464- positive = False )
465- assert_true (coefs .min () < 0 )
466-
467- # now let's restrict the solution to be positive
468- # for method 'lar' (default)
469- alpha , active , coefs = linear_model .lars_path (
470- diabetes ['data' ], diabetes ['target' ],
471- return_path = True ,
472- method = 'lar' ,
473- positive = True )
474- assert_true (coefs .min () >= 0 )
475- # for method 'lasso'
476- alpha , active , coefs = linear_model .lars_path (
477- diabetes ['data' ], diabetes ['target' ],
478- return_path = True ,
479- method = 'lasso' ,
480- positive = True )
481- assert_true (coefs .min () >= 0 )
452+ # and all positive when positive=True
453+ # for method 'lar' (default) and lasso
454+ for method in ['lar' , 'lasso' ]:
455+ alpha , active , coefs = \
456+ linear_model .lars_path (diabetes ['data' ], diabetes ['target' ],
457+ return_path = True , method = method ,
458+ positive = False )
459+ assert_true (coefs .min () < 0 )
460+
461+ alpha , active , coefs = \
462+ linear_model .lars_path (diabetes ['data' ], diabetes ['target' ],
463+ return_path = True , method = method ,
464+ positive = True )
465+ assert_true (coefs .min () >= 0 )
482466
483467
484468# now we gonna test the positive option for all estimator classes
485469
486- default_parameter = { 'fit_intercept' : False , }
470+ default_parameter = {'fit_intercept' : False }
471+
472+ estimator_parameter_map = {'Lars' : {'n_nonzero_coefs' : 5 },
473+ 'LassoLars' : {'alpha' : 0.1 },
474+ 'LarsCV' : {},
475+ 'LassoLarsCV' : {},
476+ 'LassoLarsIC' : {}}
487477
488- estimator_parameter_map = {
489- 'Lars' : { 'n_nonzero_coefs' : 5 , },
490- 'LassoLars' : { 'alpha' : 0.1 , },
491- 'LarsCV' : { },
492- 'LassoLarsCV' : { },
493- 'LassoLarsIC' : { },
494- }
495478
496479def test_estimatorclasses_positive_constraint ():
497480 # testing the transmissibility for the positive option of all estimator
498- # classes in this same function here
481+ # classes in this same function here
499482
500483 for estname in estimator_parameter_map :
501484 params = default_parameter .copy ()
@@ -516,12 +499,11 @@ def test_lasso_lars_vs_lasso_cd_positive(verbose=False):
516499 # option. However for the middle part, the comparison of coefficient values
517500 # for a range of alphas, we had to make an adaptations. See below.
518501
519-
520502 # not normalized data
521503 X = 3 * diabetes .data
522504
523505 alphas , _ , lasso_path = linear_model .lars_path (X , y , method = 'lasso' ,
524- positive = True )
506+ positive = True )
525507 lasso_cd = linear_model .Lasso (fit_intercept = False , tol = 1e-8 , positive = True )
526508 for c , a in zip (lasso_path .T , alphas ):
527509 if a == 0 :
@@ -531,7 +513,6 @@ def test_lasso_lars_vs_lasso_cd_positive(verbose=False):
531513 error = linalg .norm (c - lasso_cd .coef_ )
532514 assert_less (error , 0.01 )
533515
534-
535516 # The range of alphas chosen for coefficient comparison here is restricted
536517 # as compared with the above test without the positive option. This is due
537518 # to the circumstance that the Lars-Lasso algorithm does not converge to
@@ -542,25 +523,20 @@ def test_lasso_lars_vs_lasso_cd_positive(verbose=False):
542523 # https://gist.github.com/michigraber/7e7d7c75eca694c7a6ff
543524
544525 for alpha in np .linspace (6e-1 , 1 - 1e-2 , 20 ):
545- clf1 = linear_model .LassoLars (
546- fit_intercept = False , alpha = alpha , normalize = False ,
547- positive = True ).fit (X , y )
548- clf2 = linear_model .Lasso (
549- fit_intercept = False , alpha = alpha , tol = 1e-8 ,
550- normalize = False , positive = True ).fit (X , y )
526+ clf1 = linear_model .LassoLars (fit_intercept = False , alpha = alpha ,
527+ normalize = False , positive = True ).fit (X , y )
528+ clf2 = linear_model .Lasso (fit_intercept = False , alpha = alpha , tol = 1e-8 ,
529+ normalize = False , positive = True ).fit (X , y )
551530 err = linalg .norm (clf1 .coef_ - clf2 .coef_ )
552531 assert_less (err , 1e-3 )
553532
554-
555533 # normalized data
556534 X = diabetes .data
557535 alphas , _ , lasso_path = linear_model .lars_path (X , y , method = 'lasso' ,
558- positive = True )
536+ positive = True )
559537 lasso_cd = linear_model .Lasso (fit_intercept = False , normalize = True ,
560538 tol = 1e-8 , positive = True )
561- for c , a in zip (lasso_path .T , alphas ):
562- if a == 0 :
563- continue
539+ for c , a in zip (lasso_path .T [:- 1 ], alphas [:- 1 ]): # don't include alpha=0
564540 lasso_cd .alpha = a
565541 lasso_cd .fit (X , y )
566542 error = linalg .norm (c - lasso_cd .coef_ )
0 commit comments