Add sample_weight parameter to metrics.jaccard_similarity_score

jatinshah · arjoly · commit 1b2833aeb071 · 2014-07-30T10:15:15.000+02:00
- Include this metric in sample_weight test in tests/test_common.py
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -44,6 +44,10 @@ Enhancements
      descent for :class:`linear_model.Lasso`, :class:`linear_model.ElasticNet`
      and related. By `Manoj Kumar`_.
 
+    - Add ``sample_weight`` parameter to `metrics.jaccard_similarity_score`.
+      By `Jatin Shah`.
+
+
 
 Documentation improvements
 ..........................
diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py
@@ -15,6 +15,7 @@
 #          Lars Buitinck <L.J.Buitinck@uva.nl>
 #          Joel Nothman <joel.nothman@gmail.com>
 #          Noel Dawe <noel@dawe.me>
+#          Jatin Shah <jatindshah@gmail.com>
 # License: BSD 3 clause
 
 from __future__ import division
@@ -237,7 +238,8 @@ def confusion_matrix(y_true, y_pred, labels=None):
     return CM
 
 
-def jaccard_similarity_score(y_true, y_pred, normalize=True):
+def jaccard_similarity_score(y_true, y_pred, normalize=True,
+                             sample_weight=None):
     """Jaccard similarity coefficient score
 
     The Jaccard index [1], or Jaccard similarity coefficient, defined as
@@ -258,6 +260,9 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True):
         over the sample set. Otherwise, return the average of Jaccard
         similarity coefficient.
 
+    sample_weight : array-like of shape = [n_samples], optional
+        Sample weights.
+
     Returns
     -------
     score : float
@@ -340,9 +345,12 @@ def jaccard_similarity_score(y_true, y_pred, normalize=True):
         score = y_true == y_pred
 
     if normalize:
-        return np.mean(score)
+        return np.average(score, weights=sample_weight)
     else:
-        return np.sum(score)
+        if sample_weight is not None:
+            return np.dot(score, sample_weight)
+        else:
+            return np.sum(score)
 
 
 def matthews_corrcoef(y_true, y_pred):
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
@@ -315,7 +315,6 @@
     "confusion_matrix",
     "hamming_loss",
     "hinge_loss",
-    "jaccard_similarity_score", "unnormalized_jaccard_similarity_score",
     "log_loss",
     "matthews_corrcoef_score",
 ]
@@ -904,10 +903,10 @@ def check_sample_weight_invariance(name, metric, y1, y2):
 
     # check that unit weights gives the same score as no weight
     unweighted_score = metric(y1, y2, sample_weight=None)
-    assert_equal(
+    assert_almost_equal(
         unweighted_score,
         metric(y1, y2, sample_weight=np.ones(shape=len(y1))),
-        msg="For %s sample_weight=None is not equivalent to "
+        err_msg="For %s sample_weight=None is not equivalent to "
             "sample_weight=ones" % name)
 
     # check that the weighted and unweighted scores are unequal
@@ -920,9 +919,9 @@ def check_sample_weight_invariance(name, metric, y1, y2):
     # check that sample_weight can be a list
     weighted_score_list = metric(y1, y2,
                                  sample_weight=sample_weight.tolist())
-    assert_equal(
+    assert_almost_equal(
         weighted_score, weighted_score_list,
-        msg="Weighted scores for array and list sample_weight input are "
+        err_msg="Weighted scores for array and list sample_weight input are "
             "not equal (%f != %f) for %s" % (
                 weighted_score, weighted_score_list, name))