FIX use float64 in metrics.r2_score() to prevent overflow

jzwinck · larsmans · commit e433d20fef89 · 2013-07-16T23:29:56.000+02:00
Without this, if the input arrays are of type np.float32, their sums may be computed with an large accumulated error, resulting in the wrong scor with very long arrays (millions of elements). The "1 - numerator / denominator" calculation at the very end produces a float64 anyway, so the returned type does not change--only the accuracy. Fixes scikit-learn#2158.
diff --git a/sklearn/metrics/metrics.py b/sklearn/metrics/metrics.py
@@ -2392,8 +2392,8 @@ def r2_score(y_true, y_pred):
     if len(y_true) == 1:
         raise ValueError("r2_score can only be computed given more than one"
                          " sample.")
-    numerator = ((y_true - y_pred) ** 2).sum()
-    denominator = ((y_true - y_true.mean(axis=0)) ** 2).sum()
+    numerator = ((y_true - y_pred) ** 2).sum(dtype=np.float64)
+    denominator = ((y_true - y_true.mean(axis=0)) ** 2).sum(dtype=np.float64)
 
     if denominator == 0.0:
         if numerator == 0.0: