ENH Extension of v_measure_score metric to include beta parameter (scikit-learn#13607)

Stéphane Couvreur · thomasjpfan · commit a353b7fab4e5 · 2019-04-16T06:59:23.000-04:00
diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
@@ -1277,6 +1277,22 @@ Their harmonic mean called **V-measure** is computed by
   >>> metrics.v_measure_score(labels_true, labels_pred)    # doctest: +ELLIPSIS
   0.51...
 
+This function's formula is as follows:::
+
+.. math:: v = \frac{(1 + \beta) \times \text{homogeneity} \times \text{completeness}}{(\beta \times \text{homogeneity} + \text{completeness})}
+
+`beta` defaults to a value of 1.0, but for using a value less than 1 for beta::
+
+  >>> metrics.v_measure_score(labels_true, labels_pred, beta=0.6)    # doctest: +ELLIPSIS
+  0.54...
+
+more weight will be attributed to homogeneity, and using a value greater than 1::
+
+  >>> metrics.v_measure_score(labels_true, labels_pred, beta=1.8)    # doctest: +ELLIPSIS
+  0.48...
+
+more weight will be attributed to completeness.
+
 The V-measure is actually equivalent to the mutual information (NMI)
 discussed above, with the aggregation function being the arithmetic mean [B2011]_.
 
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
@@ -409,6 +409,13 @@ Support for Python 3.4 and below has been officially dropped.
   :issue:`12334` by :user:`Emmanuel Arias <eamanu@eamanu.com>`,
   `Joel Nothman`_ and `Andreas Müller`_
 
+- |Enhancement| Added `beta` parameter to
+  :func:`metrics.cluster.supervised.homogeneity_completeness_v_measure` and
+  :func:`metrics.cluster.supervised.v_measure_score` to configure the
+  tradeoff between homogeneity and completeness.
+  :issue:`13607` by :user:`Stephane Couvreur <scouvreur>` and
+  and :user:`Ivan Sanchez <ivsanro1>`.
+
 - |Fix| The metric :func:`metrics.r2_score` is degenerate with a single sample
   and now it returns NaN and raises :class:`exceptions.UndefinedMetricWarning`.
   :issue:`12855` by :user:`Pawel Sendyk <psendyk>`.
diff --git a/sklearn/metrics/cluster/supervised.py b/sklearn/metrics/cluster/supervised.py
@@ -239,7 +239,7 @@ def adjusted_rand_score(labels_true, labels_pred):
     return (sum_comb - prod_comb) / (mean_comb - prod_comb)
 
 
-def homogeneity_completeness_v_measure(labels_true, labels_pred):
+def homogeneity_completeness_v_measure(labels_true, labels_pred, beta=1.0):
     """Compute the homogeneity and completeness and V-Measure scores at once.
 
     Those metrics are based on normalized conditional entropy measures of
@@ -275,6 +275,12 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred):
     labels_pred : array, shape = [n_samples]
         cluster labels to evaluate
 
+    beta : float
+        Ratio of weight attributed to ``homogeneity`` vs ``completeness``.
+        If ``beta`` is greater than 1, ``completeness`` is weighted more
+        strongly in the calculation. If ``beta`` is less than 1,
+        ``homogeneity`` is weighted more strongly.
+
     Returns
     -------
     homogeneity : float
@@ -309,8 +315,8 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred):
     if homogeneity + completeness == 0.0:
         v_measure_score = 0.0
     else:
-        v_measure_score = (2.0 * homogeneity * completeness /
-                           (homogeneity + completeness))
+        v_measure_score = ((1 + beta) * homogeneity * completeness
+                           / (beta * homogeneity + completeness))
 
     return homogeneity, completeness, v_measure_score
 
@@ -459,15 +465,16 @@ def completeness_score(labels_true, labels_pred):
     return homogeneity_completeness_v_measure(labels_true, labels_pred)[1]
 
 
-def v_measure_score(labels_true, labels_pred):
+def v_measure_score(labels_true, labels_pred, beta=1.0):
     """V-measure cluster labeling given a ground truth.
 
     This score is identical to :func:`normalized_mutual_info_score` with
     the ``'arithmetic'`` option for averaging.
 
     The V-measure is the harmonic mean between homogeneity and completeness::
 
-        v = 2 * (homogeneity * completeness) / (homogeneity + completeness)
+        v = (1 + beta) * homogeneity * completeness
+             / (beta * homogeneity + completeness)
 
     This metric is independent of the absolute values of the labels:
     a permutation of the class or cluster label values won't change the
@@ -489,6 +496,12 @@ def v_measure_score(labels_true, labels_pred):
     labels_pred : array, shape = [n_samples]
         cluster labels to evaluate
 
+    beta : float
+        Ratio of weight attributed to ``homogeneity`` vs ``completeness``.
+        If ``beta`` is greater than 1, ``completeness`` is weighted more
+        strongly in the calculation. If ``beta`` is less than 1,
+        ``homogeneity`` is weighted more strongly.
+
     Returns
     -------
     v_measure : float
@@ -554,7 +567,8 @@ def v_measure_score(labels_true, labels_pred):
       0.0...
 
     """
-    return homogeneity_completeness_v_measure(labels_true, labels_pred)[2]
+    return homogeneity_completeness_v_measure(labels_true, labels_pred,
+                                              beta=beta)[2]
 
 
 def mutual_info_score(labels_true, labels_pred, contingency=None):
diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -126,6 +126,31 @@ def test_not_complete_and_not_homogeneous_labeling():
     assert_almost_equal(v, 0.52, 2)
 
 
+def test_beta_parameter():
+    # test for when beta passed to
+    # homogeneity_completeness_v_measure
+    # and v_measure_score
+    beta_test = 0.2
+    h_test = 0.67
+    c_test = 0.42
+    v_test = ((1 + beta_test) * h_test * c_test
+              / (beta_test * h_test + c_test))
+
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 0, 1, 1, 1],
+        [0, 1, 0, 1, 2, 2],
+        beta=beta_test)
+    assert_almost_equal(h, h_test, 2)
+    assert_almost_equal(c, c_test, 2)
+    assert_almost_equal(v, v_test, 2)
+
+    v = v_measure_score(
+        [0, 0, 0, 1, 1, 1],
+        [0, 1, 0, 1, 2, 2],
+        beta=beta_test)
+    assert_almost_equal(v, v_test, 2)
+
+
 def test_non_consecutive_labels():
     # regression tests for labels with gaps
     h, c, v = homogeneity_completeness_v_measure(