Skip to content

Commit 1b2833a

Browse files
jatinshaharjoly
authored andcommitted
Add sample_weight parameter to metrics.jaccard_similarity_score
- Include this metric in sample_weight test in tests/test_common.py
1 parent cbcae04 commit 1b2833a

File tree

3 files changed

+19
-8
lines changed

3 files changed

+19
-8
lines changed

doc/whats_new.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ Enhancements
4444
descent for :class:`linear_model.Lasso`, :class:`linear_model.ElasticNet`
4545
and related. By `Manoj Kumar`_.
4646

47+
- Add ``sample_weight`` parameter to `metrics.jaccard_similarity_score`.
48+
By `Jatin Shah`.
49+
50+
4751

4852
Documentation improvements
4953
..........................

sklearn/metrics/classification.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# Lars Buitinck <[email protected]>
1616
# Joel Nothman <[email protected]>
1717
# Noel Dawe <[email protected]>
18+
# Jatin Shah <[email protected]>
1819
# License: BSD 3 clause
1920

2021
from __future__ import division
@@ -237,7 +238,8 @@ def confusion_matrix(y_true, y_pred, labels=None):
237238
return CM
238239

239240

240-
def jaccard_similarity_score(y_true, y_pred, normalize=True):
241+
def jaccard_similarity_score(y_true, y_pred, normalize=True,
242+
sample_weight=None):
241243
"""Jaccard similarity coefficient score
242244
243245
The Jaccard index [1], or Jaccard similarity coefficient, defined as
@@ -258,6 +260,9 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True):
258260
over the sample set. Otherwise, return the average of Jaccard
259261
similarity coefficient.
260262
263+
sample_weight : array-like of shape = [n_samples], optional
264+
Sample weights.
265+
261266
Returns
262267
-------
263268
score : float
@@ -340,9 +345,12 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True):
340345
score = y_true == y_pred
341346

342347
if normalize:
343-
return np.mean(score)
348+
return np.average(score, weights=sample_weight)
344349
else:
345-
return np.sum(score)
350+
if sample_weight is not None:
351+
return np.dot(score, sample_weight)
352+
else:
353+
return np.sum(score)
346354

347355

348356
def matthews_corrcoef(y_true, y_pred):

sklearn/metrics/tests/test_common.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,6 @@
315315
"confusion_matrix",
316316
"hamming_loss",
317317
"hinge_loss",
318-
"jaccard_similarity_score", "unnormalized_jaccard_similarity_score",
319318
"log_loss",
320319
"matthews_corrcoef_score",
321320
]
@@ -904,10 +903,10 @@ def check_sample_weight_invariance(name, metric, y1, y2):
904903

905904
# check that unit weights gives the same score as no weight
906905
unweighted_score = metric(y1, y2, sample_weight=None)
907-
assert_equal(
906+
assert_almost_equal(
908907
unweighted_score,
909908
metric(y1, y2, sample_weight=np.ones(shape=len(y1))),
910-
msg="For %s sample_weight=None is not equivalent to "
909+
err_msg="For %s sample_weight=None is not equivalent to "
911910
"sample_weight=ones" % name)
912911

913912
# check that the weighted and unweighted scores are unequal
@@ -920,9 +919,9 @@ def check_sample_weight_invariance(name, metric, y1, y2):
920919
# check that sample_weight can be a list
921920
weighted_score_list = metric(y1, y2,
922921
sample_weight=sample_weight.tolist())
923-
assert_equal(
922+
assert_almost_equal(
924923
weighted_score, weighted_score_list,
925-
msg="Weighted scores for array and list sample_weight input are "
924+
err_msg="Weighted scores for array and list sample_weight input are "
926925
"not equal (%f != %f) for %s" % (
927926
weighted_score, weighted_score_list, name))
928927

0 commit comments

Comments
 (0)