Skip to content

final_metrics

Calculate final metrics for binary or multiclass classification.

Parameters:

Name Type Description Default
classification str

The type of classification.

required
y ndarray

Ground truth (actual) labels.

required
preds ndarray

Predicted labels from the model.

required
probs Union[ndarray, None]

Predicted probabilities from model. Only used for binary classification and if available.

required
threshold Union[float, None]

Best threshold used for binary classification. Defaults to None.

None

Returns:

Type Description
Dict[str, Any]

Dict[str, Any]: Dictionary of evaluation metrics.

Source code in periomod/training/_metrics.py
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def final_metrics(
    classification: str,
    y: np.ndarray,
    preds: np.ndarray,
    probs: Union[np.ndarray, None],
    threshold: Union[float, None] = None,
) -> Dict[str, Any]:
    """Calculate final metrics for binary or multiclass classification.

    Args:
        classification (str): The type of classification.
        y (np.ndarray): Ground truth (actual) labels.
        preds (np.ndarray): Predicted labels from the model.
        probs (Union[np.ndarray, None]): Predicted probabilities from model.
            Only used for binary classification and if available.
        threshold (Union[float, None]): Best threshold used for binary classification.
            Defaults to None.

    Returns:
        Dict[str, Any]: Dictionary of evaluation metrics.
    """
    if classification == "binary":
        f1: float = f1_score(y_true=y, y_pred=preds, pos_label=0)
        precision: float = precision_score(y_true=y, y_pred=preds, pos_label=0)
        recall: float = recall_score(y_true=y, y_pred=preds, pos_label=0)
        accuracy: float = accuracy_score(y_true=y, y_pred=preds)
        brier_score_value: Union[float, None] = (
            brier_score_loss(y_true=y, y_proba=probs) if probs is not None else None
        )
        roc_auc_value: Union[float, None] = (
            roc_auc_score(y, probs) if probs is not None else None
        )
        conf_matrix: np.ndarray = confusion_matrix(y, preds)

        return {
            "F1 Score": f1,
            "Precision": precision,
            "Recall": recall,
            "Accuracy": accuracy,
            "Brier Score": brier_score_value,
            "ROC AUC Score": roc_auc_value,
            "Confusion Matrix": conf_matrix,
            "Best Threshold": threshold,
        }

    elif classification == "multiclass":
        brier_score: float = brier_loss_multi(y=y, probs=probs)

        return {
            "Macro F1": f1_score(y_true=y, y_pred=preds, average="macro"),
            "Accuracy": accuracy_score(y_true=y, y_pred=preds),
            "Class F1 Scores": f1_score(y_true=y, y_pred=preds, average=None),
            "Multiclass Brier Score": brier_score,
        }

    raise ValueError(f"Unsupported classification type: {classification}")