feat: add Categorical Generalized Cross Entropy (GCE) loss (keras-team#21024)

edge7 · web-flow · commit 624c00b58711 · 2025-03-18T09:22:34.000-06:00
* feat: add Categorical Generalized Cross Entropy (GCE) loss

* run api generation

* docs: Align docstrings with Keras style guide

* docs: more docstring changes
diff --git a/keras/api/_tf_keras/keras/losses/__init__.py b/keras/api/_tf_keras/keras/losses/__init__.py
@@ -14,6 +14,7 @@
 from keras.src.losses.losses import BinaryFocalCrossentropy
 from keras.src.losses.losses import CategoricalCrossentropy
 from keras.src.losses.losses import CategoricalFocalCrossentropy
+from keras.src.losses.losses import CategoricalGeneralizedCrossEntropy
 from keras.src.losses.losses import CategoricalHinge
 from keras.src.losses.losses import Circle
 from keras.src.losses.losses import CosineSimilarity
@@ -34,6 +35,7 @@
 from keras.src.losses.losses import binary_focal_crossentropy
 from keras.src.losses.losses import categorical_crossentropy
 from keras.src.losses.losses import categorical_focal_crossentropy
+from keras.src.losses.losses import categorical_generalized_cross_entropy
 from keras.src.losses.losses import categorical_hinge
 from keras.src.losses.losses import circle
 from keras.src.losses.losses import cosine_similarity
diff --git a/keras/api/losses/__init__.py b/keras/api/losses/__init__.py
@@ -13,6 +13,7 @@
 from keras.src.losses.losses import BinaryFocalCrossentropy
 from keras.src.losses.losses import CategoricalCrossentropy
 from keras.src.losses.losses import CategoricalFocalCrossentropy
+from keras.src.losses.losses import CategoricalGeneralizedCrossEntropy
 from keras.src.losses.losses import CategoricalHinge
 from keras.src.losses.losses import Circle
 from keras.src.losses.losses import CosineSimilarity
@@ -33,6 +34,7 @@
 from keras.src.losses.losses import binary_focal_crossentropy
 from keras.src.losses.losses import categorical_crossentropy
 from keras.src.losses.losses import categorical_focal_crossentropy
+from keras.src.losses.losses import categorical_generalized_cross_entropy
 from keras.src.losses.losses import categorical_hinge
 from keras.src.losses.losses import circle
 from keras.src.losses.losses import cosine_similarity
diff --git a/keras/src/losses/losses.py b/keras/src/losses/losses.py
@@ -1504,6 +1504,86 @@ def get_config(self):
         return config
 
 
+@keras_export("keras.losses.CategoricalGeneralizedCrossEntropy")
+class CategoricalGeneralizedCrossEntropy(LossFunctionWrapper):
+    """Computes the Generalized Cross Entropy loss between `y_true` & `y_pred`.
+
+    Generalized Cross Entropy (GCE) is a noise-robust loss function
+    that provides better robustness against noisy labels than
+    standard cross entropy.
+    It generalizes both cross entropy and mean absolute error through
+    the parameter q, where values closer to 1 make the loss more robust
+    to noisy labels.
+
+    Formula:
+    ```python
+    loss = (1 - p**q) / q
+    ```
+    where `p` is the predicted probability for the true class and `q`
+    is the noise parameter.
+
+    Args:
+        q: Float in range `(0, 1)`. It is the noise parameter.
+           Controls the behavior of the loss:
+            - As `q` approaches 0: Behaves more like cross entropy
+            - As `q` approaches 1: Behaves more like mean absolute error
+           Defaults to `0.5`
+        reduction: Type of reduction to apply to the loss. In almost all cases
+            this should be `"sum_over_batch_size"`. Supported options are
+            `"sum"`, `"sum_over_batch_size"`, `"mean"`,
+            `"mean_with_sample_weight"` or `None`. `"sum"` sums the loss,
+            `"sum_over_batch_size"` and `"mean"` sum the loss and divide by the
+            sample size, and `"mean_with_sample_weight"` sums the loss and
+            divides by the sum of the sample weights. `"none"` and `None`
+            perform no aggregation. Defaults to `"sum_over_batch_size"`.
+        name: Optional name for the loss instance.
+        dtype: The dtype of the loss's computations. Defaults to `None`, which
+            means using `keras.backend.floatx()`. `keras.backend.floatx()` is a
+            `"float32"` unless set to different value
+            (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is
+            provided, then the `compute_dtype` will be utilized.
+
+    Example:
+    ```python
+    y_true = np.array([0, 1, 0, 1])
+    y_pred = np.array([[0.7, 0.3], [0.2, 0.8], [0.6, 0.4], [0.4, 0.6]])
+    keras.losses.CategoricalGeneralizedCrossEntropy()(y_true, y_pred)
+    ```
+
+    References:
+        - [Zhang, Sabuncu, 2018](https://arxiv.org/abs/1805.07836)
+          ("Generalized Cross Entropy Loss for Training
+            Deep Neural Networks with Noisy Labels")
+    """
+
+    def __init__(
+        self,
+        q=0.5,
+        reduction="sum_over_batch_size",
+        name="categorical_generalized_cross_entropy",
+        dtype=None,
+    ):
+        if not 0 < q < 1:
+            raise ValueError("q must be in the interval (0, 1)")
+        super().__init__(
+            categorical_generalized_cross_entropy,
+            name=name,
+            reduction=reduction,
+            dtype=dtype,
+            q=q,
+        )
+        self.q = q
+
+    def get_config(self):
+        config = Loss.get_config(self)
+        config.update(
+            {
+                "q": self.q,
+            }
+        )
+        return config
+
+
 def convert_binary_labels_to_hinge(y_true):
     """Converts binary labels into -1/1 for hinge loss/metric calculation."""
     are_zeros = ops.equal(y_true, 0)
@@ -2609,3 +2689,54 @@ def circle(
     circle_loss = ops.softplus(p_loss + n_loss)
     backend.set_keras_mask(circle_loss, circle_loss > 0)
     return circle_loss
+
+
+@keras_export("keras.losses.categorical_generalized_cross_entropy")
+def categorical_generalized_cross_entropy(y_true, y_pred, q):
+    """Computes the Generalized Cross Entropy loss.
+
+    Generalized Cross Entropy (GCE) is a noise-robust loss function that
+    provides better robustness against noisy labels than standard cross entropy.
+    It generalizes both cross entropy and mean absolute error through
+    the parameter q, where values closer to 1 make the loss more robust
+    to noisy labels.
+
+    Formula:
+    ```python
+    loss = (1 - p**q) / q
+    ```
+    where `p` is the predicted probability for the true class and `q`
+    is the noise parameter.
+
+    Args:
+        y_true: Ground truth labels. Expected to contain *integer class indices*
+            with shape `[batch_size]` or `[batch_size, 1]`.
+        y_pred: The predicted class probabilities, with shape
+            `[batch_size, num_classes]`.
+        q: Float in range `(0, 1)`. It is the noise parameter.
+           Controls the behavior of the loss:
+            - As `q` approaches 0: Behaves more like cross entropy
+            - As `q` approaches 1: Behaves more like mean absolute error
+
+    Returns:
+        GCE loss values with shape `[batch_size]`.
+    ```
+
+    References:
+        - [Zhang, Sabuncu, 2018](https://arxiv.org/abs/1805.07836)
+          ("Generalized Cross Entropy Loss for Training
+            Deep Neural Networks with Noisy Labels")
+    """
+
+    # Convert y_true to integer type and one-hot encode
+    y_true_one_hot = ops.one_hot(
+        ops.cast(y_true, "int"), num_classes=ops.shape(y_pred)[-1]
+    )
+    y_true_one_hot = ops.cast(y_true_one_hot, y_pred.dtype)
+    # Calculate the probability of the true class
+    p = ops.sum(y_pred * y_true_one_hot, axis=-1)
+
+    # Compute the GCE loss for q in (0,1)
+    gce_loss = (1 - ops.power(p, q)) / q
+
+    return gce_loss
diff --git a/keras/src/losses/losses_test.py b/keras/src/losses/losses_test.py
@@ -1763,3 +1763,208 @@ def test_dtype_arg(self):
         circle_loss = losses.Circle(dtype="bfloat16")
         loss = circle_loss(self.y_true, self.y_pred)
         self.assertDType(loss, "bfloat16")
+
+
+class CategoricalGeneralizedCrossEntropyTest(testing.TestCase):
+    def test_config(self):
+        self.run_class_serialization_test(
+            losses.CategoricalGeneralizedCrossEntropy(name="gce")
+        )
+        self.run_class_serialization_test(
+            losses.CategoricalGeneralizedCrossEntropy(q=0.1, name="gce")
+        )
+
+    def test_basic_correctness_for_binary(self):
+        y_true = np.array([0, 1, 0, 1])
+        y_pred = np.array([[0.7, 0.3], [0.2, 0.8], [0.6, 0.4], [0.4, 0.6]])
+        # Calculate expected GCE loss manually
+        # For q=0.5:
+        # First sample (class 0): gce = (1 - 0.7^0.5) / 0.5
+        # Second sample (class 1): gce = (1 - 0.8^0.5) / 0.5
+        # Third sample (class 0): gce = (1 - 0.6^0.5) / 0.5
+        # Fourth sample (class 1): gce = (1 - 0.6^0.5) / 0.5
+        expected = np.array(
+            [
+                (1 - np.power(0.7, 0.5)) / 0.5,
+                (1 - np.power(0.8, 0.5)) / 0.5,
+                (1 - np.power(0.6, 0.5)) / 0.5,
+                (1 - np.power(0.6, 0.5)) / 0.5,
+            ]
+        )
+        output = losses.CategoricalGeneralizedCrossEntropy()(y_true, y_pred)
+        self.assertAllClose(output, expected.sum() / len(expected))
+
+        expected_q_08 = np.array(
+            [
+                (1 - np.power(0.7, 0.8)) / 0.8,
+                (1 - np.power(0.8, 0.8)) / 0.8,
+                (1 - np.power(0.6, 0.8)) / 0.8,
+                (1 - np.power(0.6, 0.8)) / 0.8,
+            ]
+        )
+        output = losses.CategoricalGeneralizedCrossEntropy(q=0.8)(
+            y_true, y_pred
+        )
+        self.assertAllClose(output, expected_q_08.sum() / len(expected_q_08))
+
+    def test_basic_correctness_for_multi_class(self):
+        y_true = np.array([0, 1, 0, 1])
+        y_pred = np.array(
+            [[0.7, 0.3, 0.0], [0.2, 0.2, 0.6], [0.6, 0.4, 0.0], [0.2, 0.2, 0.6]]
+        )
+        # Calculate expected GCE loss manually
+        # For q=0.5:
+        # First sample (class 0): gce = (1 - 0.7^0.5) / 0.5
+        # Second sample (class 1): gce = (1 - 0^0.5) / 0.5
+        # Third sample (class 0): gce = (1 - 0.6^0.5) / 0.5
+        # Fourth sample (class 1): gce = (1 - 0.0^0.5) / 0.5
+        expected = np.array(
+            [
+                (1 - np.power(0.7, 0.5)) / 0.5,
+                (1 - np.power(0.2, 0.5)) / 0.5,
+                (1 - np.power(0.6, 0.5)) / 0.5,
+                (1 - np.power(0.2, 0.5)) / 0.5,
+            ]
+        )
+        output = losses.CategoricalGeneralizedCrossEntropy()(y_true, y_pred)
+        self.assertAllClose(output, expected.sum() / len(expected))
+
+        expected_q_08 = np.array(
+            [
+                (1 - np.power(0.7, 0.8)) / 0.8,
+                (1 - np.power(0.2, 0.8)) / 0.8,
+                (1 - np.power(0.6, 0.8)) / 0.8,
+                (1 - np.power(0.2, 0.8)) / 0.8,
+            ]
+        )
+        output = losses.CategoricalGeneralizedCrossEntropy(q=0.8)(
+            y_true, y_pred
+        )
+        self.assertAllClose(output, expected_q_08.sum() / len(expected_q_08))
+
+    def test_binary_segmentation(self):
+        y_true = np.array(
+            [[0, 1, 1, 0], [1, 0, 1, 0], [0, 0, 1, 1], [1, 1, 0, 1]]
+        )
+        y_pred = np.array(
+            [
+                [[1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]],
+                [[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]],
+                [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]],
+                [[0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0]],
+            ]
+        )
+        output = losses.CategoricalGeneralizedCrossEntropy(q=0.5)(
+            y_true, y_pred
+        )
+        self.assertAllClose(output, 0.0)
+
+        y_true = np.array(
+            [[0, 1, 1, 0], [1, 0, 1, 0], [0, 0, 1, 1], [1, 1, 0, 1]]
+        )
+        y_pred = np.array(
+            [
+                [[1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.2, 0.8]],
+                [[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]],
+                [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]],
+                [[0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.6, 0.4]],
+            ]
+        )
+        expected = np.array(
+            [
+                (1 - np.power(0.2, 0.5)) / 0.5,
+                (1 - np.power(0.4, 0.5)) / 0.5,
+            ]
+        )
+        output = losses.CategoricalGeneralizedCrossEntropy(q=0.5)(
+            y_true, y_pred
+        )
+        self.assertAllClose(output, expected.sum() / 16.0)  # 16 pixels
+
+    def test_multi_class_segmentation(self):
+        y_true = np.array(
+            [[0, 1, 2, 0], [1, 0, 1, 0], [0, 0, 1, 1], [1, 1, 0, 1]]
+        )
+        y_pred = np.array(
+            [
+                [
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [0.0, 0.0, 1.0],
+                    [1.0, 0.0, 0.0],
+                ],
+                [
+                    [0.0, 1.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                ],
+                [
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                ],
+                [
+                    [0.0, 1.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                ],
+            ]
+        )
+        output = losses.CategoricalGeneralizedCrossEntropy(q=0.5)(
+            y_true, y_pred
+        )
+        self.assertAllClose(output, 0.0)
+
+        y_true = np.array(
+            [[0, 1, 2, 0], [1, 0, 1, 0], [0, 0, 1, 1], [1, 1, 0, 1]]
+        )
+        y_pred = np.array(
+            [
+                [
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [0.0, 0.0, 1.0],
+                    [0.2, 0.0, 0.8],
+                ],
+                [
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                ],
+                [
+                    [1.0, 0.0, 0.0],
+                    [1.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                ],
+                [
+                    [0.0, 1.0, 0.0],
+                    [0.0, 1.0, 0.0],
+                    [0.5, 0.5, 0.0],
+                    [0.0, 1.0, 0.0],
+                ],
+            ]
+        )
+        expected = np.array(
+            [
+                (1 - np.power(0.2, 0.5)) / 0.5,
+                (1 - np.power(0.0, 0.5)) / 0.5,
+                (1 - np.power(0.5, 0.5)) / 0.5,
+            ]
+        )
+        output = losses.CategoricalGeneralizedCrossEntropy(q=0.5)(
+            y_true, y_pred
+        )
+        self.assertAllClose(output, expected.sum() / 16.0)  # 16 pixels
+
+    def test_dtype_arg(self):
+        y_true = np.array([0, 1, 0, 1])
+        y_pred = np.array([[0.7, 0.3], [0.2, 0.8], [0.6, 0.4], [0.4, 0.6]])
+        output = losses.CategoricalGeneralizedCrossEntropy(dtype="bfloat16")(
+            y_true, y_pred
+        )
+        self.assertDType(output, "bfloat16")