Skip to content

Commit 98fefd1

Browse files
committed
tests for comparison of results for Lasso and LarsLasso added.
1 parent db1f67b commit 98fefd1

File tree

1 file changed

+97
-0
lines changed

1 file changed

+97
-0
lines changed

sklearn/linear_model/tests/test_least_angle.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,3 +506,100 @@ def test_estimatorclasses_positive_constraint():
506506
estimator = getattr(linear_model, estname)(positive=True, **params)
507507
estimator.fit(diabetes['data'], diabetes['target'])
508508
assert_true(min(estimator.coef_) >= 0)
509+
510+
511+
def test_lasso_lars_vs_lasso_cd_positive(verbose=False):
512+
# Test that LassoLars and Lasso using coordinate descent give the
513+
# same results when using the positive option
514+
515+
# this test is basically a copy of the above plus the positive option
516+
517+
X = 3 * diabetes.data
518+
519+
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
520+
positive=True)
521+
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
522+
for c, a in zip(lasso_path.T, alphas):
523+
if a == 0:
524+
continue
525+
lasso_cd.alpha = a
526+
lasso_cd.fit(X, y)
527+
error = linalg.norm(c - lasso_cd.coef_)
528+
assert_less(error, 0.01)
529+
530+
# same test, with normalized data
531+
X = diabetes.data
532+
alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
533+
positive=True)
534+
lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True,
535+
tol=1e-8, positive=True)
536+
for c, a in zip(lasso_path.T, alphas):
537+
if a == 0:
538+
continue
539+
lasso_cd.alpha = a
540+
lasso_cd.fit(X, y)
541+
error = linalg.norm(c - lasso_cd.coef_)
542+
assert_less(error, 0.01)
543+
544+
545+
546+
def evaluate_lasso_lars_vs_lasso_cd_positive_middle_part():
547+
# this part of the above tests does not confirm equality of results.
548+
# but i'm not entirely sure whether this is necessarily to be expected ..
549+
# see comments and results below
550+
551+
# similar test, with the classifiers
552+
553+
for alpha in np.linspace(1e-2, 1 - 1e-2, 20):
554+
clf1 = linear_model.LassoLars(
555+
fit_intercept=False, alpha=alpha, normalize=False,
556+
positive=True).fit(X, y)
557+
clf2 = linear_model.Lasso(
558+
fit_intercept=False, alpha=alpha, tol=1e-8,
559+
normalize=False, positive=True).fit(X, y)
560+
err = linalg.norm(clf1.coef_ - clf2.coef_)
561+
mess = 'alpha={} \t err={} \t num_coeffs=({}/{}) (LassoLars / Lasso)'
562+
print(mess.format(
563+
alpha,
564+
err,
565+
sum(clf1.coef_ > 0),
566+
sum(clf2.coef_ > 0),
567+
))
568+
#assert_less(err, 1e-3)
569+
570+
571+
'''
572+
Results produced:
573+
574+
In [19]: test_least_angle.evaluate_lasso_lars_vs_lasso_cd_positive_middle_part()
575+
alpha=0.01 err=25.7243223179 num_coeffs=(5/5) (LassoLars / Lasso)
576+
alpha=0.0615789473684 err=18.2545548874 num_coeffs=(5/5) (LassoLars / Lasso)
577+
alpha=0.113157894737 err=10.7847873501 num_coeffs=(5/5) (LassoLars / Lasso)
578+
alpha=0.164736842105 err=3.31501982266 num_coeffs=(5/5) (LassoLars / Lasso)
579+
alpha=0.216315789474 err=3.69727628935e-06 num_coeffs=(4/4) (LassoLars / Lasso)
580+
alpha=0.267894736842 err=3.56502617927e-06 num_coeffs=(4/4) (LassoLars / Lasso)
581+
alpha=0.319473684211 err=3.43302342569e-06 num_coeffs=(4/4) (LassoLars / Lasso)
582+
alpha=0.371052631579 err=1.05509699804e-06 num_coeffs=(3/3) (LassoLars / Lasso)
583+
alpha=0.422631578947 err=5.9269674114e-07 num_coeffs=(3/3) (LassoLars / Lasso)
584+
alpha=0.474210526316 err=1.56511823248e-06 num_coeffs=(3/3) (LassoLars / Lasso)
585+
alpha=0.525789473684 err=1.28253471214e-06 num_coeffs=(3/3) (LassoLars / Lasso)
586+
alpha=0.577368421053 err=1.00002954531e-06 num_coeffs=(3/3) (LassoLars / Lasso)
587+
alpha=0.628947368421 err=7.38618104242e-07 num_coeffs=(3/3) (LassoLars / Lasso)
588+
alpha=0.680526315789 err=6.78234533087e-07 num_coeffs=(3/3) (LassoLars / Lasso)
589+
alpha=0.732105263158 err=2.25315273789e-06 num_coeffs=(3/3) (LassoLars / Lasso)
590+
alpha=0.783684210526 err=1.98825668767e-06 num_coeffs=(3/3) (LassoLars / Lasso)
591+
alpha=0.835263157895 err=1.72877180183e-06 num_coeffs=(3/3) (LassoLars / Lasso)
592+
alpha=0.886842105263 err=1.58242049123e-06 num_coeffs=(3/3) (LassoLars / Lasso)
593+
alpha=0.938421052632 err=1.50923193078e-06 num_coeffs=(3/3) (LassoLars / Lasso)
594+
alpha=0.99 err=1.43604831986e-06 num_coeffs=(3/3) (LassoLars / Lasso)
595+
596+
597+
We see that the 'equality of results' is violated for 'small' alphas.
598+
This is mentioned in the original paper by Efron et al. 2004:
599+
600+
`The positive Lasso usually does not converge to the full OLS solution
601+
beta_{m}, even fro very large choices of t.`
602+
603+
604+
'''
605+

0 commit comments

Comments
 (0)