@@ -637,7 +637,7 @@ def transform(self, X, y=None, copy=None):
637637 return binarize (X , threshold = self .threshold , copy = copy )
638638
639639
640- def _transform_selected (X , transform , selected = "all" ):
640+ def _transform_selected (X , transform , selected = "all" , copy = True ):
641641 """Apply a transform function to portion of selected features
642642
643643 Parameters
@@ -648,40 +648,46 @@ def _transform_selected(X, transform, selected="all"):
648648 transform : callable
649649 A callable transform(X) -> X_transformed
650650
651+ copy : boolean, optional
652+ Copy X even if it could be avoided.
653+
651654 selected: "all" or array of indices or mask
652- Specify what features to apply the transform to.
655+ Specify which features to apply the transform to. May not be a mask
656+ for sparse X.
653657
654658 Returns
655659 -------
656660 X : array or sparse matrix, shape=(n_samples, n_features_new)
657661 """
658662 if selected == "all" :
659663 return transform (X )
660- elif len (selected ) == 0 :
664+
665+ X = atleast2d_or_csc (X , copy = copy )
666+
667+ if len (selected ) == 0 :
668+ return X
669+
670+ n_features = X .shape [1 ]
671+ ind = np .arange (n_features )
672+ sel = np .zeros (n_features , dtype = bool )
673+ sel [np .asarray (selected )] = True
674+ not_sel = np .logical_not (sel )
675+ n_selected = np .sum (sel )
676+
677+ if n_selected == 0 :
678+ # No features selected.
661679 return X
680+ elif n_selected == n_features :
681+ # All features selected.
682+ return transform (X )
662683 else :
663- X = atleast2d_or_csc (X )
664- n_features = X .shape [1 ]
665- ind = np .arange (n_features )
666- sel = np .zeros (n_features , dtype = bool )
667- sel [np .array (selected )] = True
668- not_sel = np .logical_not (sel )
669- n_selected = np .sum (sel )
670-
671- if n_selected == 0 :
672- # No features selected.
673- return X
674- elif n_selected == n_features :
675- # All features selected.
676- return transform (X )
677- else :
678- X_sel = transform (X [:, ind [sel ]])
679- X_not_sel = X [:, ind [not_sel ]]
684+ X_sel = transform (X [:, ind [sel ]])
685+ X_not_sel = X [:, ind [not_sel ]]
680686
681- if sp .issparse (X_sel ) or sp .issparse (X_not_sel ):
682- return sp .hstack ((X_sel , X_not_sel ))
683- else :
684- return np .hstack ((X_sel , X_not_sel ))
687+ if sp .issparse (X_sel ) or sp .issparse (X_not_sel ):
688+ return sp .hstack ((X_sel , X_not_sel ))
689+ else :
690+ return np .hstack ((X_sel , X_not_sel ))
685691
686692
687693class OneHotEncoder (BaseEstimator , TransformerMixin ):
@@ -693,8 +699,8 @@ class OneHotEncoder(BaseEstimator, TransformerMixin):
693699 feature. It is assumed that input features take on values in the range
694700 [0, n_values).
695701
696- This encoding is needed for feeding categorical data to scikit-learn
697- estimators.
702+ This encoding is needed for feeding categorical data to many scikit-learn
703+ estimators, notably linear models and SVMs with the standard kernels .
698704
699705 Parameters
700706 ----------
@@ -778,7 +784,7 @@ def fit(self, X, y=None):
778784 return self
779785
780786 def _fit_transform (self , X ):
781- """Asssumes X contains only categorical features."""
787+ """Assumes X contains only categorical features."""
782788 X = check_arrays (X , sparse_format = 'dense' , dtype = np .int )[0 ]
783789 if np .any (X < 0 ):
784790 raise ValueError ("X needs to contain only non-negative integers." )
@@ -826,7 +832,7 @@ def fit_transform(self, X, y=None):
826832 efficient. See fit for the parameters, transform for the return value.
827833 """
828834 return _transform_selected (X , self ._fit_transform ,
829- self .categorical_features )
835+ self .categorical_features , copy = True )
830836
831837 def _transform (self , X ):
832838 """Asssumes X contains only categorical features."""
@@ -870,7 +876,7 @@ def transform(self, X):
870876 Transformed input.
871877 """
872878 return _transform_selected (X , self ._transform ,
873- self .categorical_features )
879+ self .categorical_features , copy = True )
874880
875881
876882class LabelEncoder (BaseEstimator , TransformerMixin ):
0 commit comments