99# License: BSD 3 clause
1010
1111from math import log , sqrt
12- import warnings
1312
1413import numpy as np
1514from scipy import linalg
16- from scipy import sparse
1715from scipy .special import gammaln
1816
1917from ..base import BaseEstimator , TransformerMixin
2018from ..utils import check_random_state , as_float_array
2119from ..utils import check_array
22- from ..utils import deprecated
23- from ..utils .sparsefuncs import mean_variance_axis0
24- from ..utils .extmath import (fast_logdet , safe_sparse_dot , randomized_svd ,
25- fast_dot )
20+ from ..utils .extmath import fast_dot , fast_logdet , randomized_svd
2621
2722
2823def _assess_dimension_ (spectrum , rank , n_samples , n_features ):
@@ -197,7 +192,6 @@ class PCA(BaseEstimator, TransformerMixin):
197192
198193 See also
199194 --------
200- ProbabilisticPCA
201195 RandomizedPCA
202196 KernelPCA
203197 SparsePCA
@@ -464,80 +458,6 @@ def score(self, X, y=None):
464458 return np .mean (self .score_samples (X ))
465459
466460
467- @deprecated ("ProbabilisticPCA will be removed in 0.16. WARNING: the "
468- "covariance estimation was previously incorrect, your "
469- "output might be different than under the previous versions. "
470- "Use PCA that implements score and score_samples. To work with "
471- "homoscedastic=False, you should use FactorAnalysis." )
472- class ProbabilisticPCA (PCA ):
473- """Additional layer on top of PCA that adds a probabilistic evaluation"""
474- __doc__ += PCA .__doc__
475-
476- def fit (self , X , y = None , homoscedastic = True ):
477- """Additionally to PCA.fit, learns a covariance model
478-
479- Parameters
480- ----------
481- X : array of shape(n_samples, n_features)
482- The data to fit
483-
484- homoscedastic : bool, optional,
485- If True, average variance across remaining dimensions
486- """
487- X = check_array (X )
488- PCA .fit (self , X )
489-
490- n_samples , n_features = X .shape
491- n_components = self .n_components
492- if n_components is None :
493- n_components = n_features
494-
495- explained_variance = self .explained_variance_ .copy ()
496- if homoscedastic :
497- explained_variance -= self .noise_variance_
498-
499- # Make the low rank part of the estimated covariance
500- self .covariance_ = np .dot (self .components_ [:n_components ].T *
501- explained_variance ,
502- self .components_ [:n_components ])
503-
504- if n_features == n_components :
505- delta = 0.
506- elif homoscedastic :
507- delta = self .noise_variance_
508- else :
509- Xr = X - self .mean_
510- Xr -= np .dot (np .dot (Xr , self .components_ .T ), self .components_ )
511- delta = (Xr ** 2 ).mean (axis = 0 ) / (n_features - n_components )
512-
513- # Add delta to the diagonal without extra allocation
514- self .covariance_ .flat [::n_features + 1 ] += delta
515-
516- return self
517-
518- def score (self , X , y = None ):
519- """Return a score associated to new data
520-
521- Parameters
522- ----------
523- X: array of shape(n_samples, n_features)
524- The data to test
525-
526- Returns
527- -------
528- ll: array of shape (n_samples),
529- log-likelihood of each row of X under the current model
530- """
531- Xr = X - self .mean_
532- n_features = X .shape [1 ]
533- log_like = np .zeros (X .shape [0 ])
534- self .precision_ = linalg .inv (self .covariance_ )
535- log_like = - .5 * (Xr * (np .dot (Xr , self .precision_ ))).sum (axis = 1 )
536- log_like -= .5 * (fast_logdet (self .covariance_ )
537- + n_features * log (2. * np .pi ))
538- return log_like
539-
540-
541461class RandomizedPCA (BaseEstimator , TransformerMixin ):
542462 """Principal component analysis (PCA) using randomized SVD
543463
@@ -601,7 +521,6 @@ class RandomizedPCA(BaseEstimator, TransformerMixin):
601521 See also
602522 --------
603523 PCA
604- ProbabilisticPCA
605524 TruncatedSVD
606525
607526 References
@@ -614,13 +533,6 @@ class RandomizedPCA(BaseEstimator, TransformerMixin):
614533 .. [MRT] `A randomized algorithm for the decomposition of matrices
615534 Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert`
616535
617- Notes
618- -----
619- This class supports sparse matrix input for backward compatibility, but
620- actually computes a truncated SVD instead of a PCA in that case (i.e. no
621- centering is performed). This support is deprecated; use the class
622- TruncatedSVD for sparse matrix support.
623-
624536 """
625537
626538 def __init__ (self , n_components = None , copy = True , iterated_power = 3 ,
@@ -645,7 +557,7 @@ def fit(self, X, y=None):
645557 self : object
646558 Returns the instance itself.
647559 """
648- self ._fit (X )
560+ self ._fit (check_array ( X ) )
649561 return self
650562
651563 def _fit (self , X ):
@@ -663,24 +575,13 @@ def _fit(self, X):
663575 The input data, copied, centered and whitened when requested.
664576 """
665577 random_state = check_random_state (self .random_state )
666- if sparse .issparse (X ):
667- warnings .warn ("Sparse matrix support is deprecated in 0.15"
668- " and will be dropped in 0.17. In particular"
669- " computed explained variance is incorrect on"
670- " sparse data. Use TruncatedSVD instead." ,
671- DeprecationWarning )
672- else :
673- # not a sparse matrix, ensure this is a 2D array
674- X = np .atleast_2d (as_float_array (X , copy = self .copy ))
578+ X = np .atleast_2d (as_float_array (X , copy = self .copy ))
675579
676580 n_samples = X .shape [0 ]
677581
678- if sparse .issparse (X ):
679- self .mean_ = None
680- else :
681- # Center data
682- self .mean_ = np .mean (X , axis = 0 )
683- X -= self .mean_
582+ # Center data
583+ self .mean_ = np .mean (X , axis = 0 )
584+ X -= self .mean_
684585 if self .n_components is None :
685586 n_components = X .shape [1 ]
686587 else :
@@ -691,11 +592,7 @@ def _fit(self, X):
691592 random_state = random_state )
692593
693594 self .explained_variance_ = exp_var = (S ** 2 ) / n_samples
694- if sparse .issparse (X ):
695- _ , full_var = mean_variance_axis0 (X )
696- full_var = full_var .sum ()
697- else :
698- full_var = np .var (X , axis = 0 ).sum ()
595+ full_var = np .var (X , axis = 0 ).sum ()
699596 self .explained_variance_ratio_ = exp_var / full_var
700597
701598 if self .whiten :
@@ -722,12 +619,11 @@ def transform(self, X, y=None):
722619 X_new : array-like, shape (n_samples, n_components)
723620
724621 """
725- # XXX remove scipy.sparse support here in 0.16
726- X = check_array (X , accept_sparse = 'csr' )
622+ X = check_array (X )
727623 if self .mean_ is not None :
728624 X = X - self .mean_
729625
730- X = safe_sparse_dot (X , self .components_ .T )
626+ X = fast_dot (X , self .components_ .T )
731627 return X
732628
733629 def fit_transform (self , X , y = None ):
@@ -744,9 +640,9 @@ def fit_transform(self, X, y=None):
744640 X_new : array-like, shape (n_samples, n_components)
745641
746642 """
747- X = self . _fit ( check_array (X , accept_sparse = 'csr' ) )
748- X = safe_sparse_dot ( X , self .components_ . T )
749- return X
643+ X = check_array (X )
644+ X = self ._fit ( X )
645+ return fast_dot ( X , self . components_ . T )
750646
751647 def inverse_transform (self , X , y = None ):
752648 """Transform data back to its original space.
@@ -768,8 +664,7 @@ def inverse_transform(self, X, y=None):
768664 If whitening is enabled, inverse_transform does not compute the
769665 exact inverse operation of transform.
770666 """
771- # XXX remove scipy.sparse support here in 0.16
772- X_original = safe_sparse_dot (X , self .components_ )
667+ X_original = fast_dot (X , self .components_ )
773668 if self .mean_ is not None :
774669 X_original = X_original + self .mean_
775670 return X_original
0 commit comments