@@ -134,7 +134,7 @@ def _loss_grad_lbfgs(self, packed_coef_inter, X, y, activations, deltas,
134134 with respect to the different parameters given in the initialization.
135135
136136 Returned gradients are packed in a single vector so it can be used
137- in lbgfs
137+ in lbfgs
138138
139139 Parameters
140140 ----------
@@ -345,8 +345,8 @@ def _fit(self, X, y, incremental=False):
345345 # First time training the model
346346 self ._initialize (y , layer_units )
347347
348- # lbgfs does not support mini-batches
349- if self .solver == 'lbgfs ' :
348+ # lbfgs does not support mini-batches
349+ if self .solver == 'lbfgs ' :
350350 batch_size = n_samples
351351 elif self .batch_size == 'auto' :
352352 batch_size = min (200 , n_samples )
@@ -375,7 +375,7 @@ def _fit(self, X, y, incremental=False):
375375 intercept_grads , layer_units , incremental )
376376
377377 # Run the LBFGS solver
378- elif self .solver == 'lbgfs ' :
378+ elif self .solver == 'lbfgs ' :
379379 self ._fit_lbfgs (X , y , activations , deltas , coef_grads ,
380380 intercept_grads , layer_units )
381381 return self
@@ -422,7 +422,7 @@ def _validate_hyperparameters(self):
422422 if self .learning_rate not in ["constant" , "invscaling" , "adaptive" ]:
423423 raise ValueError ("learning rate %s is not supported. " %
424424 self .learning_rate )
425- supported_solvers = _STOCHASTIC_SOLVERS + ["lbgfs " ]
425+ supported_solvers = _STOCHASTIC_SOLVERS + ["lbfgs " ]
426426 if self .solver not in supported_solvers :
427427 raise ValueError ("The solver %s is not supported. "
428428 " Expected one of: %s" %
@@ -704,10 +704,10 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
704704 - 'relu', the rectified linear unit function,
705705 returns f(x) = max(0, x)
706706
707- solver : {'lbgfs ', 'sgd', 'adam'}, default 'adam'
707+ solver : {'lbfgs ', 'sgd', 'adam'}, default 'adam'
708708 The solver for weight optimization.
709709
710- - 'lbgfs ' is an optimizer in the family of quasi-Newton methods.
710+ - 'lbfgs ' is an optimizer in the family of quasi-Newton methods.
711711
712712 - 'sgd' refers to stochastic gradient descent.
713713
@@ -717,15 +717,15 @@ class MLPClassifier(BaseMultilayerPerceptron, ClassifierMixin):
717717 Note: The default solver 'adam' works pretty well on relatively
718718 large datasets (with thousands of training samples or more) in terms of
719719 both training time and validation score.
720- For small datasets, however, 'lbgfs ' can converge faster and perform
720+ For small datasets, however, 'lbfgs ' can converge faster and perform
721721 better.
722722
723723 alpha : float, optional, default 0.0001
724724 L2 penalty (regularization term) parameter.
725725
726726 batch_size : int, optional, default 'auto'
727727 Size of minibatches for stochastic optimizers.
728- If the solver is 'lbgfs ', the classifier will not use minibatch.
728+ If the solver is 'lbfgs ', the classifier will not use minibatch.
729729 When set to "auto", `batch_size=min(200, n_samples)`
730730
731731 learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant'
@@ -1046,10 +1046,10 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
10461046 - 'relu', the rectified linear unit function,
10471047 returns f(x) = max(0, x)
10481048
1049- solver : {'lbgfs ', 'sgd', 'adam'}, default 'adam'
1049+ solver : {'lbfgs ', 'sgd', 'adam'}, default 'adam'
10501050 The solver for weight optimization.
10511051
1052- - 'lbgfs ' is an optimizer in the family of quasi-Newton methods.
1052+ - 'lbfgs ' is an optimizer in the family of quasi-Newton methods.
10531053
10541054 - 'sgd' refers to stochastic gradient descent.
10551055
@@ -1059,15 +1059,15 @@ class MLPRegressor(BaseMultilayerPerceptron, RegressorMixin):
10591059 Note: The default solver 'adam' works pretty well on relatively
10601060 large datasets (with thousands of training samples or more) in terms of
10611061 both training time and validation score.
1062- For small datasets, however, 'lbgfs ' can converge faster and perform
1062+ For small datasets, however, 'lbfgs ' can converge faster and perform
10631063 better.
10641064
10651065 alpha : float, optional, default 0.0001
10661066 L2 penalty (regularization term) parameter.
10671067
10681068 batch_size : int, optional, default 'auto'
10691069 Size of minibatches for stochastic optimizers.
1070- If the solver is 'lbgfs ', the classifier will not use minibatch.
1070+ If the solver is 'lbfgs ', the classifier will not use minibatch.
10711071 When set to "auto", `batch_size=min(200, n_samples)`
10721072
10731073 learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant'
0 commit comments