Skip to content

Commit 88b05ff

Browse files
committed
MAINT remove deprecated PCA code
1 parent c508965 commit 88b05ff

File tree

4 files changed

+14
-253
lines changed

4 files changed

+14
-253
lines changed

doc/modules/decomposition.rst

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,6 @@ The memory footprint of :class:`RandomizedPCA` is also proportional to
113113
:math:`2 \cdot n_{max} \cdot n_{components}` instead of :math:`n_{max}
114114
\cdot n_{min}` for the exact method.
115115

116-
Furthermore :class:`RandomizedPCA` is able to work with
117-
`scipy.sparse` matrices as input which make it suitable for reducing
118-
the dimensionality of features extracted from text documents for
119-
instance.
120-
121116
Note: the implementation of ``inverse_transform`` in :class:`RandomizedPCA`
122117
is not the exact inverse transform of ``transform`` even when
123118
``whiten=False`` (default).

sklearn/decomposition/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"""
66

77
from .nmf import NMF, ProjectedGradientNMF
8-
from .pca import PCA, RandomizedPCA, ProbabilisticPCA
8+
from .pca import PCA, RandomizedPCA
99
from .kernel_pca import KernelPCA
1010
from .sparse_pca import SparsePCA, MiniBatchSparsePCA
1111
from .truncated_svd import TruncatedSVD
@@ -23,7 +23,6 @@
2323
'MiniBatchSparsePCA',
2424
'NMF',
2525
'PCA',
26-
'ProbabilisticPCA',
2726
'ProjectedGradientNMF',
2827
'RandomizedPCA',
2928
'SparseCoder',

sklearn/decomposition/pca.py

Lines changed: 13 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,15 @@
99
# License: BSD 3 clause
1010

1111
from math import log, sqrt
12-
import warnings
1312

1413
import numpy as np
1514
from scipy import linalg
16-
from scipy import sparse
1715
from scipy.special import gammaln
1816

1917
from ..base import BaseEstimator, TransformerMixin
2018
from ..utils import check_random_state, as_float_array
2119
from ..utils import check_array
22-
from ..utils import deprecated
23-
from ..utils.sparsefuncs import mean_variance_axis0
24-
from ..utils.extmath import (fast_logdet, safe_sparse_dot, randomized_svd,
25-
fast_dot)
20+
from ..utils.extmath import fast_dot, fast_logdet, randomized_svd
2621

2722

2823
def _assess_dimension_(spectrum, rank, n_samples, n_features):
@@ -197,7 +192,6 @@ class PCA(BaseEstimator, TransformerMixin):
197192
198193
See also
199194
--------
200-
ProbabilisticPCA
201195
RandomizedPCA
202196
KernelPCA
203197
SparsePCA
@@ -464,80 +458,6 @@ def score(self, X, y=None):
464458
return np.mean(self.score_samples(X))
465459

466460

467-
@deprecated("ProbabilisticPCA will be removed in 0.16. WARNING: the "
468-
"covariance estimation was previously incorrect, your "
469-
"output might be different than under the previous versions. "
470-
"Use PCA that implements score and score_samples. To work with "
471-
"homoscedastic=False, you should use FactorAnalysis.")
472-
class ProbabilisticPCA(PCA):
473-
"""Additional layer on top of PCA that adds a probabilistic evaluation"""
474-
__doc__ += PCA.__doc__
475-
476-
def fit(self, X, y=None, homoscedastic=True):
477-
"""Additionally to PCA.fit, learns a covariance model
478-
479-
Parameters
480-
----------
481-
X : array of shape(n_samples, n_features)
482-
The data to fit
483-
484-
homoscedastic : bool, optional,
485-
If True, average variance across remaining dimensions
486-
"""
487-
X = check_array(X)
488-
PCA.fit(self, X)
489-
490-
n_samples, n_features = X.shape
491-
n_components = self.n_components
492-
if n_components is None:
493-
n_components = n_features
494-
495-
explained_variance = self.explained_variance_.copy()
496-
if homoscedastic:
497-
explained_variance -= self.noise_variance_
498-
499-
# Make the low rank part of the estimated covariance
500-
self.covariance_ = np.dot(self.components_[:n_components].T *
501-
explained_variance,
502-
self.components_[:n_components])
503-
504-
if n_features == n_components:
505-
delta = 0.
506-
elif homoscedastic:
507-
delta = self.noise_variance_
508-
else:
509-
Xr = X - self.mean_
510-
Xr -= np.dot(np.dot(Xr, self.components_.T), self.components_)
511-
delta = (Xr ** 2).mean(axis=0) / (n_features - n_components)
512-
513-
# Add delta to the diagonal without extra allocation
514-
self.covariance_.flat[::n_features + 1] += delta
515-
516-
return self
517-
518-
def score(self, X, y=None):
519-
"""Return a score associated to new data
520-
521-
Parameters
522-
----------
523-
X: array of shape(n_samples, n_features)
524-
The data to test
525-
526-
Returns
527-
-------
528-
ll: array of shape (n_samples),
529-
log-likelihood of each row of X under the current model
530-
"""
531-
Xr = X - self.mean_
532-
n_features = X.shape[1]
533-
log_like = np.zeros(X.shape[0])
534-
self.precision_ = linalg.inv(self.covariance_)
535-
log_like = -.5 * (Xr * (np.dot(Xr, self.precision_))).sum(axis=1)
536-
log_like -= .5 * (fast_logdet(self.covariance_)
537-
+ n_features * log(2. * np.pi))
538-
return log_like
539-
540-
541461
class RandomizedPCA(BaseEstimator, TransformerMixin):
542462
"""Principal component analysis (PCA) using randomized SVD
543463
@@ -601,7 +521,6 @@ class RandomizedPCA(BaseEstimator, TransformerMixin):
601521
See also
602522
--------
603523
PCA
604-
ProbabilisticPCA
605524
TruncatedSVD
606525
607526
References
@@ -614,13 +533,6 @@ class RandomizedPCA(BaseEstimator, TransformerMixin):
614533
.. [MRT] `A randomized algorithm for the decomposition of matrices
615534
Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert`
616535
617-
Notes
618-
-----
619-
This class supports sparse matrix input for backward compatibility, but
620-
actually computes a truncated SVD instead of a PCA in that case (i.e. no
621-
centering is performed). This support is deprecated; use the class
622-
TruncatedSVD for sparse matrix support.
623-
624536
"""
625537

626538
def __init__(self, n_components=None, copy=True, iterated_power=3,
@@ -645,7 +557,7 @@ def fit(self, X, y=None):
645557
self : object
646558
Returns the instance itself.
647559
"""
648-
self._fit(X)
560+
self._fit(check_array(X))
649561
return self
650562

651563
def _fit(self, X):
@@ -663,24 +575,13 @@ def _fit(self, X):
663575
The input data, copied, centered and whitened when requested.
664576
"""
665577
random_state = check_random_state(self.random_state)
666-
if sparse.issparse(X):
667-
warnings.warn("Sparse matrix support is deprecated in 0.15"
668-
" and will be dropped in 0.17. In particular"
669-
" computed explained variance is incorrect on"
670-
" sparse data. Use TruncatedSVD instead.",
671-
DeprecationWarning)
672-
else:
673-
# not a sparse matrix, ensure this is a 2D array
674-
X = np.atleast_2d(as_float_array(X, copy=self.copy))
578+
X = np.atleast_2d(as_float_array(X, copy=self.copy))
675579

676580
n_samples = X.shape[0]
677581

678-
if sparse.issparse(X):
679-
self.mean_ = None
680-
else:
681-
# Center data
682-
self.mean_ = np.mean(X, axis=0)
683-
X -= self.mean_
582+
# Center data
583+
self.mean_ = np.mean(X, axis=0)
584+
X -= self.mean_
684585
if self.n_components is None:
685586
n_components = X.shape[1]
686587
else:
@@ -691,11 +592,7 @@ def _fit(self, X):
691592
random_state=random_state)
692593

693594
self.explained_variance_ = exp_var = (S ** 2) / n_samples
694-
if sparse.issparse(X):
695-
_, full_var = mean_variance_axis0(X)
696-
full_var = full_var.sum()
697-
else:
698-
full_var = np.var(X, axis=0).sum()
595+
full_var = np.var(X, axis=0).sum()
699596
self.explained_variance_ratio_ = exp_var / full_var
700597

701598
if self.whiten:
@@ -722,12 +619,11 @@ def transform(self, X, y=None):
722619
X_new : array-like, shape (n_samples, n_components)
723620
724621
"""
725-
# XXX remove scipy.sparse support here in 0.16
726-
X = check_array(X, accept_sparse='csr')
622+
X = check_array(X)
727623
if self.mean_ is not None:
728624
X = X - self.mean_
729625

730-
X = safe_sparse_dot(X, self.components_.T)
626+
X = fast_dot(X, self.components_.T)
731627
return X
732628

733629
def fit_transform(self, X, y=None):
@@ -744,9 +640,9 @@ def fit_transform(self, X, y=None):
744640
X_new : array-like, shape (n_samples, n_components)
745641
746642
"""
747-
X = self._fit(check_array(X, accept_sparse='csr'))
748-
X = safe_sparse_dot(X, self.components_.T)
749-
return X
643+
X = check_array(X)
644+
X = self._fit(X)
645+
return fast_dot(X, self.components_.T)
750646

751647
def inverse_transform(self, X, y=None):
752648
"""Transform data back to its original space.
@@ -768,8 +664,7 @@ def inverse_transform(self, X, y=None):
768664
If whitening is enabled, inverse_transform does not compute the
769665
exact inverse operation of transform.
770666
"""
771-
# XXX remove scipy.sparse support here in 0.16
772-
X_original = safe_sparse_dot(X, self.components_)
667+
X_original = fast_dot(X, self.components_)
773668
if self.mean_ is not None:
774669
X_original = X_original + self.mean_
775670
return X_original

0 commit comments

Comments
 (0)