@@ -1157,8 +1157,52 @@ def fit(self, X, y):
11571157 y : array-like of shape (n_samples,) or (n_samples, n_targets)
11581158 Target values
11591159 """
1160- y = check_array (y , copy = False , dtype = [np .float64 , np .float32 ],
1161- ensure_2d = False )
1160+ # This makes sure that there is no duplication in memory.
1161+ # Dealing right with copy_X is important in the following:
1162+ # Multiple functions touch X and subsamples of X and can induce a
1163+ # lot of duplication of memory
1164+ copy_X = self .copy_X and self .fit_intercept
1165+
1166+ check_y_params = dict (copy = False , dtype = [np .float64 , np .float32 ],
1167+ ensure_2d = False )
1168+ if isinstance (X , np .ndarray ) or sparse .isspmatrix (X ):
1169+ # Keep a reference to X
1170+ reference_to_old_X = X
1171+ # Let us not impose fortran ordering so far: it is
1172+ # not useful for the cross-validation loop and will be done
1173+ # by the model fitting itself
1174+
1175+ # Need to validate separately here.
1176+ # We can't pass multi_ouput=True because that would allow y to be
1177+ # csr. We also want to allow y to be 64 or 32 but check_X_y only
1178+ # allows to convert for 64.
1179+ check_X_params = dict (accept_sparse = 'csc' ,
1180+ dtype = [np .float64 , np .float32 ], copy = False )
1181+ X , y = self ._validate_data (X , y ,
1182+ validate_separately = (check_X_params ,
1183+ check_y_params ))
1184+ if sparse .isspmatrix (X ):
1185+ if (hasattr (reference_to_old_X , "data" ) and
1186+ not np .may_share_memory (reference_to_old_X .data , X .data )):
1187+ # X is a sparse matrix and has been copied
1188+ copy_X = False
1189+ elif not np .may_share_memory (reference_to_old_X , X ):
1190+ # X has been copied
1191+ copy_X = False
1192+ del reference_to_old_X
1193+ else :
1194+ # Need to validate separately here.
1195+ # We can't pass multi_ouput=True because that would allow y to be
1196+ # csr. We also want to allow y to be 64 or 32 but check_X_y only
1197+ # allows to convert for 64.
1198+ check_X_params = dict (accept_sparse = 'csc' ,
1199+ dtype = [np .float64 , np .float32 ], order = 'F' ,
1200+ copy = copy_X )
1201+ X , y = self ._validate_data (X , y ,
1202+ validate_separately = (check_X_params ,
1203+ check_y_params ))
1204+ copy_X = False
1205+
11621206 if y .shape [0 ] == 0 :
11631207 raise ValueError ("y has 0 samples: %r" % y )
11641208
@@ -1191,35 +1235,6 @@ def fit(self, X, y):
11911235 if self .selection not in ["random" , "cyclic" ]:
11921236 raise ValueError ("selection should be either random or cyclic." )
11931237
1194- # This makes sure that there is no duplication in memory.
1195- # Dealing right with copy_X is important in the following:
1196- # Multiple functions touch X and subsamples of X and can induce a
1197- # lot of duplication of memory
1198- copy_X = self .copy_X and self .fit_intercept
1199-
1200- if isinstance (X , np .ndarray ) or sparse .isspmatrix (X ):
1201- # Keep a reference to X
1202- reference_to_old_X = X
1203- # Let us not impose fortran ordering so far: it is
1204- # not useful for the cross-validation loop and will be done
1205- # by the model fitting itself
1206- X = self ._validate_data (X , accept_sparse = 'csc' ,
1207- dtype = [np .float64 , np .float32 ], copy = False )
1208- if sparse .isspmatrix (X ):
1209- if (hasattr (reference_to_old_X , "data" ) and
1210- not np .may_share_memory (reference_to_old_X .data , X .data )):
1211- # X is a sparse matrix and has been copied
1212- copy_X = False
1213- elif not np .may_share_memory (reference_to_old_X , X ):
1214- # X has been copied
1215- copy_X = False
1216- del reference_to_old_X
1217- else :
1218- X = self ._validate_data (X , accept_sparse = 'csc' ,
1219- dtype = [np .float64 , np .float32 ], order = 'F' ,
1220- copy = copy_X )
1221- copy_X = False
1222-
12231238 if X .shape [0 ] != y .shape [0 ]:
12241239 raise ValueError ("X and y have inconsistent dimensions (%d != %d)"
12251240 % (X .shape [0 ], y .shape [0 ]))
@@ -1842,9 +1857,15 @@ def fit(self, X, y):
18421857 To avoid memory re-allocation it is advised to allocate the
18431858 initial data in memory directly using that format.
18441859 """
1845- X = self ._validate_data (X , dtype = [np .float64 , np .float32 ], order = 'F' ,
1846- copy = self .copy_X and self .fit_intercept )
1847- y = check_array (y , dtype = X .dtype .type , ensure_2d = False )
1860+
1861+ # Need to validate separately here.
1862+ # We can't pass multi_ouput=True because that would allow y to be csr.
1863+ check_X_params = dict (dtype = [np .float64 , np .float32 ], order = 'F' ,
1864+ copy = self .copy_X and self .fit_intercept )
1865+ check_y_params = dict (ensure_2d = False )
1866+ X , y = self ._validate_data (X , y , validate_separately = (check_X_params ,
1867+ check_y_params ))
1868+ y = y .astype (X .dtype )
18481869
18491870 if hasattr (self , 'l1_ratio' ):
18501871 model_str = 'ElasticNet'
0 commit comments