twitter-algorithm-ml/metrics/auroc.py

163 lines
5.9 KiB
Python

"""
AUROC metrics.
"""
from typing import Union
from tml.ml_logging.torch_logging import logging
import torch
import torchmetrics
from torchmetrics.utilities.data import dim_zero_cat
def _compute_helper(
predictions: torch.Tensor,
target: torch.Tensor,
weights: torch.Tensor,
max_positive_negative_weighted_sum: torch.Tensor,
min_positive_negative_weighted_sum: torch.Tensor,
equal_predictions_as_incorrect: bool,
) -> torch.Tensor:
"""
Compute AUROC.
Args:
predictions: The predictions probabilities.
target: The target.
weights: The sample weights to assign to each sample in the batch.
max_positive_negative_weighted_sum: The sum of the weights for the positive labels.
min_positive_negative_weighted_sum:
equal_predictions_as_incorrect: For positive & negative labels having identical scores,
we assume that they are correct prediction (i.e weight = 1) when ths is False. Otherwise,
we assume that they are correct prediction (i.e weight = 0).
"""
dim = 0
# Sort predictions based on key (score, true_label). The order is ascending for score.
# For true_label, order is ascending if equal_predictions_as_incorrect is True;
# otherwise it is descending.
target_order = torch.argsort(target, dim=dim, descending=equal_predictions_as_incorrect)
score_order = torch.sort(torch.gather(predictions, dim, target_order), stable=True, dim=dim)[1]
score_order = torch.gather(target_order, dim, score_order)
sorted_target = torch.gather(target, dim, score_order)
sorted_weights = torch.gather(weights, dim, score_order)
negatives_from_left = torch.cumsum((1.0 - sorted_target) * sorted_weights, 0)
numerator = torch.sum(
sorted_weights * (sorted_target * negatives_from_left / max_positive_negative_weighted_sum)
)
return numerator / min_positive_negative_weighted_sum
class AUROCWithMWU(torchmetrics.Metric):
"""
AUROC using Mann-Whitney U-test.
See https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve.
This AUROC implementation is well suited to (non-zero) low-CTR. In particular it will return
the correct AUROC even if the predicted probabilities are all close to 0.
Currently only support binary classification.
"""
def __init__(self, label_threshold: float = 0.5, raise_missing_class: bool = False, **kwargs):
"""
Args:
label_threshold: Labels strictly above this threshold are considered positive labels,
otherwise, they are considered negative.
raise_missing_class: If True, an error will be raise if negative or positive class is missing.
Otherwise, we will simply log a warning.
**kwargs: Additional parameters supported by all torchmetrics.Metric.
"""
super().__init__(**kwargs)
self.add_state("predictions", default=[], dist_reduce_fx="cat")
self.add_state("target", default=[], dist_reduce_fx="cat")
self.add_state("weights", default=[], dist_reduce_fx="cat")
self.label_threshold = label_threshold
self.raise_missing_class = raise_missing_class
def update(
self,
predictions: torch.Tensor,
target: torch.Tensor,
weight: Union[float, torch.Tensor] = 1.0,
) -> None:
"""
Update the current auroc.
Args:
predictions: Predicted values, 1D Tensor or 2D Tensor of shape batch_size x 1.
target: Ground truth. Must have same shape as predictions.
weight: The weight to use for the predicted values. Shape should be
broadcastable to that of predictions.
"""
self.predictions.append(predictions)
self.target.append(target)
if not isinstance(weight, torch.Tensor):
weight = torch.as_tensor(weight, dtype=predictions.dtype, device=target.device)
self.weights.append(torch.broadcast_to(weight, predictions.size()))
def compute(self) -> torch.Tensor:
"""
Compute and return the accumulated AUROC.
"""
weights = dim_zero_cat(self.weights)
predictions = dim_zero_cat(self.predictions)
target = dim_zero_cat(self.target).type_as(predictions)
negative_mask = target <= self.label_threshold
positive_mask = torch.logical_not(negative_mask)
if not negative_mask.any():
msg = "Negative class missing. AUROC returned will be meaningless."
if self.raise_missing_class:
raise ValueError(msg)
else:
logging.warn(msg)
if not positive_mask.any():
msg = "Positive class missing. AUROC returned will be meaningless."
if self.raise_missing_class:
raise ValueError(msg)
else:
logging.warn(msg)
weighted_actual_negative_sum = torch.sum(
torch.where(negative_mask, weights, torch.zeros_like(weights))
)
weighted_actual_positive_sum = torch.sum(
torch.where(positive_mask, weights, torch.zeros_like(weights))
)
max_positive_negative_weighted_sum = torch.max(
weighted_actual_negative_sum, weighted_actual_positive_sum
)
min_positive_negative_weighted_sum = torch.min(
weighted_actual_negative_sum, weighted_actual_positive_sum
)
# Compute auroc with the weight set to 1 when positive & negative have identical scores.
auroc_le = _compute_helper(
target=target,
weights=weights,
predictions=predictions,
min_positive_negative_weighted_sum=min_positive_negative_weighted_sum,
max_positive_negative_weighted_sum=max_positive_negative_weighted_sum,
equal_predictions_as_incorrect=False,
)
# Compute auroc with the weight set to 0 when positive & negative have identical scores.
auroc_lt = _compute_helper(
target=target,
weights=weights,
predictions=predictions,
min_positive_negative_weighted_sum=min_positive_negative_weighted_sum,
max_positive_negative_weighted_sum=max_positive_negative_weighted_sum,
equal_predictions_as_incorrect=True,
)
# Compute auroc with the weight set to 1/2 when positive & negative have identical scores.
return auroc_le - (auroc_le - auroc_lt) / 2.0