2121"""
2222
2323
24- def _cholesky_omp (X , y , n_nonzero_coefs , tol = None , copy_X = True ):
24+ def _cholesky_omp (X , y , n_nonzero_coefs , tol = None , copy_X = True ,
25+ return_path = False ):
2526 """Orthogonal Matching Pursuit step using the Cholesky decomposition.
2627
2728 Parameters
@@ -45,13 +46,22 @@ def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True):
4546
4647 Returns
4748 -------
49+ return_path: bool, optional. Default: False
50+ Whether to return every value of the nonzero coefficients along the
51+ forward path. Useful for cross-validation.
52+
4853 gamma: array, shape = (n_nonzero_coefs,)
4954 Non-zero elements of the solution
5055
5156 idx: array, shape = (n_nonzero_coefs,)
5257 Indices of the positions of the elements in gamma within the solution
5358 vector
5459
60+ coefs, array, shape = (n_features, n_nonzero_coefs)
61+ The first k values of column k correspond to the coefficient value
62+ for the active features at that step. The lower left triangle contains
63+ garbage. Only returned if ``return_path=True``.
64+
5565 """
5666 if copy_X :
5767 X = X .copy ('F' )
@@ -71,6 +81,8 @@ def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True):
7181 max_features = X .shape [1 ] if tol is not None else n_nonzero_coefs
7282 L = np .empty ((max_features , max_features ), dtype = X .dtype )
7383 L [0 , 0 ] = 1.
84+ if return_path :
85+ coefs = np .empty_like (L )
7486
7587 while True :
7688 lam = np .argmax (np .abs (np .dot (X .T , residual )))
@@ -94,18 +106,22 @@ def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True):
94106 # solves LL'x = y as a composition of two triangular systems
95107 gamma , _ = potrs (L [:n_active , :n_active ], alpha [:n_active ], lower = True ,
96108 overwrite_b = False )
97-
109+ if return_path :
110+ coefs [:n_active , n_active - 1 ] = gamma
98111 residual = y - np .dot (X [:, :n_active ], gamma )
99112 if tol is not None and nrm2 (residual ) ** 2 <= tol :
100113 break
101114 elif n_active == max_features :
102115 break
103116
104- return gamma , indices [:n_active ]
117+ if return_path :
118+ return gamma , indices [:n_active ], coefs [:, :n_active ]
119+ else :
120+ return gamma , indices [:n_active ]
105121
106122
107123def _gram_omp (Gram , Xy , n_nonzero_coefs , tol_0 = None , tol = None ,
108- copy_Gram = True , copy_Xy = True ):
124+ copy_Gram = True , copy_Xy = True , return_path = False ):
109125 """Orthogonal Matching Pursuit step on a precomputed Gram matrix.
110126
111127 This function uses the the Cholesky decomposition method.
@@ -138,13 +154,22 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None,
138154
139155 Returns
140156 -------
157+ return_path: bool, optional. Default: False
158+ Whether to return every value of the nonzero coefficients along the
159+ forward path. Useful for cross-validation.
160+
141161 gamma: array, shape = (n_nonzero_coefs,)
142162 Non-zero elements of the solution
143163
144164 idx: array, shape = (n_nonzero_coefs,)
145165 Indices of the positions of the elements in gamma within the solution
146166 vector
147167
168+ coefs, array, shape = (n_features, n_nonzero_coefs)
169+ The first k values of column k correspond to the coefficient value
170+ for the active features at that step. The lower left triangle contains
171+ garbage. Only returned if ``return_path=True``.
172+
148173 """
149174 Gram = Gram .copy ('F' ) if copy_Gram else np .asfortranarray (Gram )
150175
@@ -165,6 +190,8 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None,
165190 max_features = len (Gram ) if tol is not None else n_nonzero_coefs
166191 L = np .empty ((max_features , max_features ), dtype = Gram .dtype )
167192 L [0 , 0 ] = 1.
193+ if return_path :
194+ coefs = np .empty_like (L )
168195
169196 while True :
170197 lam = np .argmax (np .abs (alpha ))
@@ -188,7 +215,8 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None,
188215 # solves LL'x = y as a composition of two triangular systems
189216 gamma , _ = potrs (L [:n_active , :n_active ], Xy [:n_active ], lower = True ,
190217 overwrite_b = False )
191-
218+ if return_path :
219+ coefs [:n_active , n_active - 1 ] = gamma
192220 beta = np .dot (Gram [:, :n_active ], gamma )
193221 alpha = Xy - beta
194222 if tol is not None :
@@ -200,11 +228,14 @@ def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None,
200228 elif n_active == max_features :
201229 break
202230
203- return gamma , indices [:n_active ]
231+ if return_path :
232+ return gamma , indices [:n_active ], coefs [:, :n_active ]
233+ else :
234+ return gamma , indices [:n_active ]
204235
205236
206237def orthogonal_mp (X , y , n_nonzero_coefs = None , tol = None , precompute_gram = False ,
207- copy_X = True ):
238+ copy_X = True , return_path = False ):
208239 """Orthogonal Matching Pursuit (OMP)
209240
210241 Solves n_targets Orthogonal Matching Pursuit problems.
@@ -241,10 +272,18 @@ def orthogonal_mp(X, y, n_nonzero_coefs=None, tol=None, precompute_gram=False,
241272 value is only helpful if X is already Fortran-ordered, otherwise a
242273 copy is made anyway.
243274
275+ return_path: bool, optional. Default: False
276+ Whether to return every value of the nonzero coefficients along the
277+ forward path. Useful for cross-validation.
278+
244279 Returns
245280 -------
246281 coef: array, shape = (n_features,) or (n_features, n_targets)
247- Coefficients of the OMP solution
282+ Coefficients of the OMP solution. If `return_path=True`, this contains
283+ the whole coefficient path. In this case its shape is
284+ (n_features, n_features) or (n_features, n_targets, n_features) and
285+ iterating over the last axis yields coefficients in increasing order
286+ of active features.
248287
249288 See also
250289 --------
@@ -297,17 +336,28 @@ def orthogonal_mp(X, y, n_nonzero_coefs=None, tol=None, precompute_gram=False,
297336 return orthogonal_mp_gram (G , Xy , n_nonzero_coefs , tol , norms_squared ,
298337 copy_Gram = copy_X , copy_Xy = False )
299338
300- coef = np .zeros ((X .shape [1 ], y .shape [1 ]))
301- for k in range (y .shape [1 ]):
302- x , idx = _cholesky_omp (X , y [:, k ], n_nonzero_coefs , tol ,
303- copy_X = copy_X )
304- coef [idx , k ] = x
339+ if return_path :
340+ coef = np .zeros ((X .shape [1 ], y .shape [1 ], X .shape [1 ]))
341+ else :
342+ coef = np .zeros ((X .shape [1 ], y .shape [1 ]))
343+
344+ for k in xrange (y .shape [1 ]):
345+ out = _cholesky_omp (X , y [:, k ], n_nonzero_coefs , tol ,
346+ copy_X = copy_X , return_path = return_path )
347+ if return_path :
348+ _ , idx , coefs = out
349+ coef = coef [:, :, :len (idx )]
350+ for n_active , x in enumerate (coefs .T ):
351+ coef [idx [:n_active + 1 ], k , n_active ] = x [:n_active + 1 ]
352+ else :
353+ x , idx = out
354+ coef [idx , k ] = x
305355 return np .squeeze (coef )
306356
307357
308358def orthogonal_mp_gram (Gram , Xy , n_nonzero_coefs = None , tol = None ,
309359 norms_squared = None , copy_Gram = True ,
310- copy_Xy = True ):
360+ copy_Xy = True , return_path = False ):
311361 """Gram Orthogonal Matching Pursuit (OMP)
312362
313363 Solves n_targets Orthogonal Matching Pursuit problems using only
@@ -340,10 +390,18 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, tol=None,
340390 Whether the covariance vector Xy must be copied by the algorithm.
341391 If False, it may be overwritten.
342392
393+ return_path: bool, optional. Default: False
394+ Whether to return every value of the nonzero coefficients along the
395+ forward path. Useful for cross-validation.
396+
343397 Returns
344398 -------
345399 coef: array, shape = (n_features,) or (n_features, n_targets)
346- Coefficients of the OMP solution
400+ Coefficients of the OMP solution. If `return_path=True`, this contains
401+ the whole coefficient path. In this case its shape is
402+ (n_features, n_features) or (n_features, n_targets, n_features) and
403+ iterating over the last axis yields coefficients in increasing order
404+ of active features.
347405
348406 See also
349407 --------
@@ -387,12 +445,26 @@ def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, tol=None,
387445 if tol is None and n_nonzero_coefs > len (Gram ):
388446 raise ValueError ("The number of atoms cannot be more than the number "
389447 "of features" )
390- coef = np .zeros ((len (Gram ), Xy .shape [1 ]))
448+
449+ if return_path :
450+ coef = np .zeros ((len (Gram ), Xy .shape [1 ], len (Gram )))
451+ else :
452+ coef = np .zeros ((len (Gram ), Xy .shape [1 ]))
453+
391454 for k in range (Xy .shape [1 ]):
392- x , idx = _gram_omp (Gram , Xy [:, k ], n_nonzero_coefs ,
455+ out = _gram_omp (Gram , Xy [:, k ], n_nonzero_coefs ,
393456 norms_squared [k ] if tol is not None else None , tol ,
394- copy_Gram = copy_Gram , copy_Xy = copy_Xy )
395- coef [idx , k ] = x
457+ copy_Gram = copy_Gram , copy_Xy = copy_Xy ,
458+ return_path = return_path )
459+ if return_path :
460+ _ , idx , coefs = out
461+ coef = coef [:, :, :len (idx )]
462+ for n_active , x in enumerate (coefs .T ):
463+ coef [idx [:n_active + 1 ], k , n_active ] = x [:n_active + 1 ]
464+ else :
465+ x , idx = out
466+ coef [idx , k ] = x
467+
396468 return np .squeeze (coef )
397469
398470
0 commit comments