@@ -166,18 +166,30 @@ def __init__(self, penalty="l2", gamma=0, fit_intercept=True):
166
166
\left(
167
167
\sum_{i=0}^N y_i \log(\hat{y}_i) +
168
168
(1-y_i) \log(1-\hat{y}_i)
169
- \right) - \frac{\gamma}{2} ||\ mathbf{b}||_2
169
+ \right) - R(\ mathbf{b}, \gamma)
170
170
\right]
171
-
172
- where :math:`\gamma` is a regularization weight, `N` is the number of
173
- examples in **y**, and **b** is the vector of model coefficients.
171
+
172
+ where
173
+
174
+ .. math::
175
+
176
+ R(\mathbf{b}, \gamma) = \left\{
177
+ \begin{array}{lr}
178
+ \frac{\gamma}{2} ||\mathbf{beta}||_2^2 & :\texttt{ penalty = 'l2'}\\
179
+ \gamma ||\beta||_1 & :\texttt{ penalty = 'l1'}
180
+ \end{array}
181
+ \right.
182
+
183
+ is a regularization penalty, :math:`\gamma` is a regularization weight,
184
+ `N` is the number of examples in **y**, and **b** is the vector of model
185
+ coefficients.
174
186
175
187
Parameters
176
188
----------
177
189
penalty : {'l1', 'l2'}
178
190
The type of regularization penalty to apply on the coefficients
179
191
`beta`. Default is 'l2'.
180
- gamma : float in [0, 1]
192
+ gamma : float
181
193
The regularization weight. Larger values correspond to larger
182
194
regularization penalties, and a value of 0 indicates no penalty.
183
195
Default is 0.
@@ -235,21 +247,24 @@ def _NLL(self, X, y, y_pred):
235
247
\text{NLL} = -\frac{1}{N} \left[
236
248
\left(
237
249
\sum_{i=0}^N y_i \log(\hat{y}_i) + (1-y_i) \log(1-\hat{y}_i)
238
- \right) - \frac{\gamma}{2} ||\ mathbf{b}||_2
250
+ \right) - R(\ mathbf{b}, \gamma)
239
251
\right]
240
252
"""
241
253
N , M = X .shape
254
+ beta , gamma = self .beta , self .gamma
242
255
order = 2 if self .penalty == "l2" else 1
256
+ norm_beta = np .linalg .norm (beta , ord = order )
257
+
243
258
nll = - np .log (y_pred [y == 1 ]).sum () - np .log (1 - y_pred [y == 0 ]).sum ()
244
- penalty = 0.5 * self . gamma * np . linalg . norm ( self . beta , ord = order ) ** 2
259
+ penalty = ( gamma / 2 ) * norm_beta ** 2 if order == 2 else gamma * norm_beta
245
260
return (penalty + nll ) / N
246
261
247
262
def _NLL_grad (self , X , y , y_pred ):
248
263
"""Gradient of the penalized negative log likelihood wrt beta"""
249
264
N , M = X .shape
250
265
l1norm = lambda x : np .linalg .norm (x , 1 ) # noqa: E731
251
266
p , beta , gamma = self .penalty , self .beta , self .gamma
252
- d_penalty = gamma * beta if p == "l2" else gamma * l1norm ( beta ) * np .sign (beta )
267
+ d_penalty = gamma * beta if p == "l2" else gamma * np .sign (beta )
253
268
return - (np .dot (y - y_pred , X ) + d_penalty ) / N
254
269
255
270
def predict (self , X ):
0 commit comments