Skip to content

Commit 24bd8f9

Browse files
committed
Merge pull request scikit-learn#3472 from arjoly/fix-metrics-division
MAINT move log_loss and hinge_loss to the classification metrics
2 parents c2b2ce4 + 85c9c84 commit 24bd8f9

File tree

6 files changed

+207
-206
lines changed

6 files changed

+207
-206
lines changed

sklearn/metrics/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,19 @@
66
from .ranking import auc
77
from .ranking import average_precision_score
88
from .ranking import label_ranking_average_precision_score
9-
from .ranking import log_loss
109
from .ranking import precision_recall_curve
1110
from .ranking import roc_auc_score
1211
from .ranking import roc_curve
13-
from .ranking import hinge_loss
1412

1513
from .classification import accuracy_score
1614
from .classification import classification_report
1715
from .classification import confusion_matrix
1816
from .classification import f1_score
1917
from .classification import fbeta_score
2018
from .classification import hamming_loss
19+
from .classification import hinge_loss
2120
from .classification import jaccard_similarity_score
21+
from .classification import log_loss
2222
from .classification import matthews_corrcoef
2323
from .classification import precision_recall_fscore_support
2424
from .classification import precision_score

sklearn/metrics/classification.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
from ..externals.six.moves import zip
2929
from ..preprocessing import label_binarize
30+
from ..preprocessing import LabelBinarizer
3031
from ..preprocessing import LabelEncoder
3132
from ..utils import check_array, check_consistent_length
3233
from ..utils import column_or_1d
@@ -1306,3 +1307,153 @@ def hamming_loss(y_true, y_pred, classes=None):
13061307
return sp_hamming(y_true, y_pred)
13071308
else:
13081309
raise ValueError("{0} is not supported".format(y_type))
1310+
1311+
1312+
def log_loss(y_true, y_pred, eps=1e-15, normalize=True):
1313+
"""Log loss, aka logistic loss or cross-entropy loss.
1314+
1315+
This is the loss function used in (multinomial) logistic regression
1316+
and extensions of it such as neural networks, defined as the negative
1317+
log-likelihood of the true labels given a probabilistic classifier's
1318+
predictions. For a single sample with true label yt in {0,1} and
1319+
estimated probability yp that yt = 1, the log loss is
1320+
1321+
-log P(yt|yp) = -(yt log(yp) + (1 - yt) log(1 - yp))
1322+
1323+
Parameters
1324+
----------
1325+
y_true : array-like or label indicator matrix
1326+
Ground truth (correct) labels for n_samples samples.
1327+
1328+
y_pred : array-like of float, shape = (n_samples, n_classes)
1329+
Predicted probabilities, as returned by a classifier's
1330+
predict_proba method.
1331+
1332+
eps : float
1333+
Log loss is undefined for p=0 or p=1, so probabilities are
1334+
clipped to max(eps, min(1 - eps, p)).
1335+
1336+
normalize : bool, optional (default=True)
1337+
If true, return the mean loss per sample.
1338+
Otherwise, return the sum of the per-sample losses.
1339+
1340+
Returns
1341+
-------
1342+
loss : float
1343+
1344+
Examples
1345+
--------
1346+
>>> log_loss(["spam", "ham", "ham", "spam"], # doctest: +ELLIPSIS
1347+
... [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])
1348+
0.21616...
1349+
1350+
References
1351+
----------
1352+
C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,
1353+
p. 209.
1354+
1355+
Notes
1356+
-----
1357+
The logarithm used is the natural logarithm (base-e).
1358+
"""
1359+
lb = LabelBinarizer()
1360+
T = lb.fit_transform(y_true)
1361+
if T.shape[1] == 1:
1362+
T = np.append(1 - T, T, axis=1)
1363+
1364+
# Clipping
1365+
Y = np.clip(y_pred, eps, 1 - eps)
1366+
1367+
# This happens in cases when elements in y_pred have type "str".
1368+
if not isinstance(Y, np.ndarray):
1369+
raise ValueError("y_pred should be an array of floats.")
1370+
1371+
# If y_pred is of single dimension, assume y_true to be binary
1372+
# and then check.
1373+
if Y.ndim == 1:
1374+
Y = Y[:, np.newaxis]
1375+
if Y.shape[1] == 1:
1376+
Y = np.append(1 - Y, Y, axis=1)
1377+
1378+
# Check if dimensions are consistent.
1379+
check_consistent_length(T, Y)
1380+
T = check_array(T)
1381+
Y = check_array(Y)
1382+
if T.shape[1] != Y.shape[1]:
1383+
raise ValueError("y_true and y_pred have different number of classes "
1384+
"%d, %d" % (T.shape[1], Y.shape[1]))
1385+
1386+
# Renormalize
1387+
Y /= Y.sum(axis=1)[:, np.newaxis]
1388+
loss = -(T * np.log(Y)).sum()
1389+
return loss / T.shape[0] if normalize else loss
1390+
1391+
1392+
def hinge_loss(y_true, pred_decision, pos_label=None, neg_label=None):
1393+
"""Average hinge loss (non-regularized)
1394+
1395+
Assuming labels in y_true are encoded with +1 and -1, when a prediction
1396+
mistake is made, ``margin = y_true * pred_decision`` is always negative
1397+
(since the signs disagree), implying ``1 - margin`` is always greater than
1398+
1. The cumulated hinge loss is therefore an upper bound of the number of
1399+
mistakes made by the classifier.
1400+
1401+
Parameters
1402+
----------
1403+
y_true : array, shape = [n_samples]
1404+
True target, consisting of integers of two values. The positive label
1405+
must be greater than the negative label.
1406+
1407+
pred_decision : array, shape = [n_samples] or [n_samples, n_classes]
1408+
Predicted decisions, as output by decision_function (floats).
1409+
1410+
Returns
1411+
-------
1412+
loss : float
1413+
1414+
References
1415+
----------
1416+
.. [1] `Wikipedia entry on the Hinge loss
1417+
<http://en.wikipedia.org/wiki/Hinge_loss>`_
1418+
1419+
Examples
1420+
--------
1421+
>>> from sklearn import svm
1422+
>>> from sklearn.metrics import hinge_loss
1423+
>>> X = [[0], [1]]
1424+
>>> y = [-1, 1]
1425+
>>> est = svm.LinearSVC(random_state=0)
1426+
>>> est.fit(X, y)
1427+
LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
1428+
intercept_scaling=1, loss='l2', multi_class='ovr', penalty='l2',
1429+
random_state=0, tol=0.0001, verbose=0)
1430+
>>> pred_decision = est.decision_function([[-2], [3], [0.5]])
1431+
>>> pred_decision # doctest: +ELLIPSIS
1432+
array([-2.18..., 2.36..., 0.09...])
1433+
>>> hinge_loss([-1, 1, 1], pred_decision) # doctest: +ELLIPSIS
1434+
0.30...
1435+
1436+
"""
1437+
# TODO: multi-class hinge-loss
1438+
check_consistent_length(y_true, pred_decision)
1439+
y_true = column_or_1d(y_true)
1440+
pred_decision = column_or_1d(pred_decision)
1441+
1442+
# the rest of the code assumes that positive and negative labels
1443+
# are encoded as +1 and -1 respectively
1444+
lbin = LabelBinarizer(neg_label=-1)
1445+
y_true = lbin.fit_transform(y_true)[:, 0]
1446+
1447+
if len(lbin.classes_) > 2 or (pred_decision.ndim == 2
1448+
and pred_decision.shape[1] != 1):
1449+
raise ValueError("Multi-class hinge loss not supported")
1450+
pred_decision = np.ravel(pred_decision)
1451+
1452+
try:
1453+
margin = y_true * pred_decision
1454+
except TypeError:
1455+
raise TypeError("pred_decision should be an array of floats.")
1456+
losses = 1 - margin
1457+
# The hinge doesn't penalize good enough predictions.
1458+
losses[losses <= 0] = 0
1459+
return np.mean(losses)

sklearn/metrics/metrics.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
11
import warnings
22
warnings.warn("sklearn.metrics.metrics is deprecated and will be remove in "
3-
"0.18. Please import from sklearn.metrics",
4-
DeprecationWarning)
3+
"0.18. Please import from sklearn.metrics",
4+
DeprecationWarning)
55

66

77
from .ranking import auc
88
from .ranking import average_precision_score
99
from .ranking import label_ranking_average_precision_score
10-
from .ranking import log_loss
1110
from .ranking import precision_recall_curve
1211
from .ranking import roc_auc_score
1312
from .ranking import roc_curve
14-
from .ranking import hinge_loss
1513

1614
from .classification import accuracy_score
1715
from .classification import classification_report
1816
from .classification import confusion_matrix
1917
from .classification import f1_score
2018
from .classification import fbeta_score
2119
from .classification import hamming_loss
20+
from .classification import hinge_loss
2221
from .classification import jaccard_similarity_score
22+
from .classification import log_loss
2323
from .classification import matthews_corrcoef
2424
from .classification import precision_recall_fscore_support
2525
from .classification import precision_score

sklearn/metrics/ranking.py

Lines changed: 0 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -102,76 +102,6 @@ def auc(x, y, reorder=False):
102102
return area
103103

104104

105-
def hinge_loss(y_true, pred_decision, pos_label=None, neg_label=None):
106-
"""Average hinge loss (non-regularized)
107-
108-
Assuming labels in y_true are encoded with +1 and -1, when a prediction
109-
mistake is made, ``margin = y_true * pred_decision`` is always negative
110-
(since the signs disagree), implying ``1 - margin`` is always greater than
111-
1. The cumulated hinge loss is therefore an upper bound of the number of
112-
mistakes made by the classifier.
113-
114-
Parameters
115-
----------
116-
y_true : array, shape = [n_samples]
117-
True target, consisting of integers of two values. The positive label
118-
must be greater than the negative label.
119-
120-
pred_decision : array, shape = [n_samples] or [n_samples, n_classes]
121-
Predicted decisions, as output by decision_function (floats).
122-
123-
Returns
124-
-------
125-
loss : float
126-
127-
References
128-
----------
129-
.. [1] `Wikipedia entry on the Hinge loss
130-
<http://en.wikipedia.org/wiki/Hinge_loss>`_
131-
132-
Examples
133-
--------
134-
>>> from sklearn import svm
135-
>>> from sklearn.metrics import hinge_loss
136-
>>> X = [[0], [1]]
137-
>>> y = [-1, 1]
138-
>>> est = svm.LinearSVC(random_state=0)
139-
>>> est.fit(X, y)
140-
LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
141-
intercept_scaling=1, loss='l2', multi_class='ovr', penalty='l2',
142-
random_state=0, tol=0.0001, verbose=0)
143-
>>> pred_decision = est.decision_function([[-2], [3], [0.5]])
144-
>>> pred_decision # doctest: +ELLIPSIS
145-
array([-2.18..., 2.36..., 0.09...])
146-
>>> hinge_loss([-1, 1, 1], pred_decision) # doctest: +ELLIPSIS
147-
0.30...
148-
149-
"""
150-
# TODO: multi-class hinge-loss
151-
check_consistent_length(y_true, pred_decision)
152-
y_true = column_or_1d(y_true)
153-
pred_decision = column_or_1d(pred_decision)
154-
155-
# the rest of the code assumes that positive and negative labels
156-
# are encoded as +1 and -1 respectively
157-
lbin = LabelBinarizer(neg_label=-1)
158-
y_true = lbin.fit_transform(y_true)[:, 0]
159-
160-
if len(lbin.classes_) > 2 or (pred_decision.ndim == 2
161-
and pred_decision.shape[1] != 1):
162-
raise ValueError("Multi-class hinge loss not supported")
163-
pred_decision = np.ravel(pred_decision)
164-
165-
try:
166-
margin = y_true * pred_decision
167-
except TypeError:
168-
raise TypeError("pred_decision should be an array of floats.")
169-
losses = 1 - margin
170-
# The hinge doesn't penalize good enough predictions.
171-
losses[losses <= 0] = 0
172-
return np.mean(losses)
173-
174-
175105
def average_precision_score(y_true, y_score, average="macro",
176106
sample_weight=None):
177107
"""Compute average precision (AP) from prediction scores
@@ -617,86 +547,6 @@ class or confidence values.
617547
return fpr, tpr, thresholds
618548

619549

620-
def log_loss(y_true, y_pred, eps=1e-15, normalize=True):
621-
"""Log loss, aka logistic loss or cross-entropy loss.
622-
623-
This is the loss function used in (multinomial) logistic regression
624-
and extensions of it such as neural networks, defined as the negative
625-
log-likelihood of the true labels given a probabilistic classifier's
626-
predictions. For a single sample with true label yt in {0,1} and
627-
estimated probability yp that yt = 1, the log loss is
628-
629-
-log P(yt|yp) = -(yt log(yp) + (1 - yt) log(1 - yp))
630-
631-
Parameters
632-
----------
633-
y_true : array-like or label indicator matrix
634-
Ground truth (correct) labels for n_samples samples.
635-
636-
y_pred : array-like of float, shape = (n_samples, n_classes)
637-
Predicted probabilities, as returned by a classifier's
638-
predict_proba method.
639-
640-
eps : float
641-
Log loss is undefined for p=0 or p=1, so probabilities are
642-
clipped to max(eps, min(1 - eps, p)).
643-
644-
normalize : bool, optional (default=True)
645-
If true, return the mean loss per sample.
646-
Otherwise, return the sum of the per-sample losses.
647-
648-
Returns
649-
-------
650-
loss : float
651-
652-
Examples
653-
--------
654-
>>> log_loss(["spam", "ham", "ham", "spam"], # doctest: +ELLIPSIS
655-
... [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])
656-
0.21616...
657-
658-
References
659-
----------
660-
C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,
661-
p. 209.
662-
663-
Notes
664-
-----
665-
The logarithm used is the natural logarithm (base-e).
666-
"""
667-
lb = LabelBinarizer()
668-
T = lb.fit_transform(y_true)
669-
if T.shape[1] == 1:
670-
T = np.append(1 - T, T, axis=1)
671-
672-
# Clipping
673-
Y = np.clip(y_pred, eps, 1 - eps)
674-
675-
# This happens in cases when elements in y_pred have type "str".
676-
if not isinstance(Y, np.ndarray):
677-
raise ValueError("y_pred should be an array of floats.")
678-
679-
# If y_pred is of single dimension, assume y_true to be binary
680-
# and then check.
681-
if Y.ndim == 1:
682-
Y = Y[:, np.newaxis]
683-
if Y.shape[1] == 1:
684-
Y = np.append(1 - Y, Y, axis=1)
685-
686-
# Check if dimensions are consistent.
687-
check_consistent_length(T, Y)
688-
T = check_array(T)
689-
Y = check_array(Y)
690-
if T.shape[1] != Y.shape[1]:
691-
raise ValueError("y_true and y_pred have different number of classes "
692-
"%d, %d" % (T.shape[1], Y.shape[1]))
693-
694-
# Renormalize
695-
Y /= Y.sum(axis=1)[:, np.newaxis]
696-
loss = -(T * np.log(Y)).sum()
697-
return loss / T.shape[0] if normalize else loss
698-
699-
700550
def label_ranking_average_precision_score(y_true, y_score):
701551
"""Compute ranking-based average precision
702552

0 commit comments

Comments
 (0)