@@ -148,6 +148,108 @@ def categorical_cross_entropy(
148148 return - np .sum (y_true * np .log (y_pred ))
149149
150150
151+ def categorical_focal_cross_entropy (
152+ y_true : np .ndarray ,
153+ y_pred : np .ndarray ,
154+ alpha : np .ndarray = None ,
155+ gamma : float = 2.0 ,
156+ epsilon : float = 1e-15 ,
157+ ) -> float :
158+ """
159+ Calculate the mean categorical focal cross-entropy (CFCE) loss between true
160+ labels and predicted probabilities for multi-class classification.
161+
162+ CFCE loss is a generalization of binary focal cross-entropy for multi-class
163+ classification. It addresses class imbalance by focusing on hard examples.
164+
165+ CFCE = -Σ alpha * (1 - y_pred)**gamma * y_true * log(y_pred)
166+
167+ Reference: [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf)
168+
169+ Parameters:
170+ - y_true: True labels in one-hot encoded form.
171+ - y_pred: Predicted probabilities for each class.
172+ - alpha: Array of weighting factors for each class.
173+ - gamma: Focusing parameter for modulating the loss (default: 2.0).
174+ - epsilon: Small constant to avoid numerical instability.
175+
176+ Returns:
177+ - The mean categorical focal cross-entropy loss.
178+
179+ >>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
180+ >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
181+ >>> alpha = np.array([0.6, 0.2, 0.7])
182+ >>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
183+ 0.0025966118981496423
184+
185+ >>> true_labels = np.array([[0, 1, 0], [0, 0, 1]])
186+ >>> pred_probs = np.array([[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
187+ >>> alpha = np.array([0.25, 0.25, 0.25])
188+ >>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
189+ 0.23315276982014324
190+
191+ >>> true_labels = np.array([[1, 0], [0, 1]])
192+ >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
193+ >>> categorical_cross_entropy(true_labels, pred_probs)
194+ Traceback (most recent call last):
195+ ...
196+ ValueError: Input arrays must have the same shape.
197+
198+ >>> true_labels = np.array([[2, 0, 1], [1, 0, 0]])
199+ >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
200+ >>> categorical_focal_cross_entropy(true_labels, pred_probs)
201+ Traceback (most recent call last):
202+ ...
203+ ValueError: y_true must be one-hot encoded.
204+
205+ >>> true_labels = np.array([[1, 0, 1], [1, 0, 0]])
206+ >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
207+ >>> categorical_focal_cross_entropy(true_labels, pred_probs)
208+ Traceback (most recent call last):
209+ ...
210+ ValueError: y_true must be one-hot encoded.
211+
212+ >>> true_labels = np.array([[1, 0, 0], [0, 1, 0]])
213+ >>> pred_probs = np.array([[0.9, 0.1, 0.1], [0.2, 0.7, 0.1]])
214+ >>> categorical_focal_cross_entropy(true_labels, pred_probs)
215+ Traceback (most recent call last):
216+ ...
217+ ValueError: Predicted probabilities must sum to approximately 1.
218+
219+ >>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
220+ >>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
221+ >>> alpha = np.array([0.6, 0.2])
222+ >>> categorical_focal_cross_entropy(true_labels, pred_probs, alpha)
223+ Traceback (most recent call last):
224+ ...
225+ ValueError: Length of alpha must match the number of classes.
226+ """
227+ if y_true .shape != y_pred .shape :
228+ raise ValueError ("Shape of y_true and y_pred must be the same." )
229+
230+ if alpha is None :
231+ alpha = np .ones (y_true .shape [1 ])
232+
233+ if np .any ((y_true != 0 ) & (y_true != 1 )) or np .any (y_true .sum (axis = 1 ) != 1 ):
234+ raise ValueError ("y_true must be one-hot encoded." )
235+
236+ if len (alpha ) != y_true .shape [1 ]:
237+ raise ValueError ("Length of alpha must match the number of classes." )
238+
239+ if not np .all (np .isclose (np .sum (y_pred , axis = 1 ), 1 , rtol = epsilon , atol = epsilon )):
240+ raise ValueError ("Predicted probabilities must sum to approximately 1." )
241+
242+ # Clip predicted probabilities to avoid log(0)
243+ y_pred = np .clip (y_pred , epsilon , 1 - epsilon )
244+
245+ # Calculate loss for each class and sum across classes
246+ cfce_loss = - np .sum (
247+ alpha * np .power (1 - y_pred , gamma ) * y_true * np .log (y_pred ), axis = 1
248+ )
249+
250+ return np .mean (cfce_loss )
251+
252+
151253def hinge_loss (y_true : np .ndarray , y_pred : np .ndarray ) -> float :
152254 """
153255 Calculate the mean hinge loss for between true labels and predicted probabilities
0 commit comments