EvaluatorMethods

Bases: BaseConfig

Base class that contains methods for ModelEvalutor.

Inherits

BaseConfig: Provides base configuration settings.

Parameters:

Name	Type	Description	Default
`X`	`DataFrame`	The test dataset features.	required
`y`	`Series`	The test dataset labels.	required
`model`	`BaseEstimator`	A trained sklearn model instance for single-model evaluation.	required
`encoding`	`Optional[str]`	Encoding type for categorical features, e.g., 'one_hot' or 'target', used for labeling and grouping in plots.	required
`aggregate`	`bool`	If True, aggregates the importance values of multi-category encoded features for interpretability.	required

Attributes:

Name	Type	Description
`X`	`DataFrame`	Holds the test dataset features for evaluation.
`y`	`Series`	Holds the test dataset labels for evaluation.
`model`	`Optional[BaseEstimator]`	The primary model instance used for evaluation, if single-model evaluation is performed.
`encoding`	`Optional[str]`	Indicates the encoding type used, which impacts plot titles and feature grouping in evaluations.
`aggregate`	`bool`	Indicates whether to aggregate importance values of multi-category encoded features, enhancing interpretability in feature importance plots.

Methods:

Name	Description
`brier_scores`	Calculates Brier score for each instance in the evaluator's dataset based on the model's predicted probabilities. Returns series of Brier scores indexed by instance.
`model_predictions`	Generates model predictions for evaluator's feature set, applying threshold-based binarization if specified, and returns predictions as a series indexed by instance.

Source code in periomod/evaluation/_baseeval.py

class EvaluatorMethods(BaseConfig):
    """Base class that contains methods for ModelEvalutor.

    Inherits:
        - `BaseConfig`: Provides base configuration settings.

    Args:
        X (pd.DataFrame): The test dataset features.
        y (pd.Series): The test dataset labels.
        model (sklearn.base.BaseEstimator): A trained sklearn model instance
            for single-model evaluation.
        encoding (Optional[str]): Encoding type for categorical features, e.g.,
            'one_hot' or 'target', used for labeling and grouping in plots.
        aggregate (bool): If True, aggregates the importance values of multi-category
            encoded features for interpretability.

    Attributes:
        X (pd.DataFrame): Holds the test dataset features for evaluation.
        y (pd.Series): Holds the test dataset labels for evaluation.
        model (Optional[sklearn.base.BaseEstimator]): The primary model instance used
            for evaluation, if single-model evaluation is performed.
        encoding (Optional[str]): Indicates the encoding type used, which impacts
            plot titles and feature grouping in evaluations.
        aggregate (bool): Indicates whether to aggregate importance values of
            multi-category encoded features, enhancing interpretability in feature
            importance plots.

    Methods:
        brier_scores: Calculates Brier score for each instance in the evaluator's
            dataset based on the model's predicted probabilities. Returns series of
            Brier scores indexed by instance.
        model_predictions: Generates model predictions for evaluator's feature
            set, applying threshold-based binarization if specified, and returns
            predictions as a series indexed by instance.
    """

    def __init__(
        self,
        X: pd.DataFrame,
        y: pd.Series,
        model: Union[
            RandomForestClassifier,
            LogisticRegression,
            MLPClassifier,
            XGBClassifier,
        ],
        encoding: Optional[str],
        aggregate: bool,
    ) -> None:
        """Initialize the FeatureImportance class."""
        super().__init__()
        self.X = X
        self.y = y
        self.model = model
        self.encoding = encoding
        self.aggregate = aggregate

    def brier_scores(self) -> pd.Series:
        """Calculates Brier scores for each instance in the evaluator's dataset.

        Returns:
            Series: Brier scores for each instance.
        """
        probas = self.model.predict_proba(self.X)

        if probas.shape[1] == 1:
            brier_scores = [
                brier_score_loss([true_label], [pred_proba[0]])
                for true_label, pred_proba in zip(self.y, probas, strict=False)
            ]
        else:
            brier_scores = [
                brier_score_loss(
                    [1 if true_label == idx else 0 for idx in range(len(proba))], proba
                )
                for true_label, proba in zip(self.y, probas, strict=False)
            ]

        return pd.Series(brier_scores, index=self.y.index)

    def model_predictions(self) -> pd.Series:
        """Generates model predictions for the evaluator's feature set.

        Returns:
            pred: Predicted labels as a series.
        """
        if (
            hasattr(self.model, "best_threshold")
            and self.model.best_threshold is not None
        ):
            final_probs = get_probs(model=self.model, classification="binary", X=self.X)
            if final_probs is not None:
                pred = pd.Series(
                    (final_probs >= self.model.best_threshold).astype(int),
                    index=self.X.index,
                )
            else:
                pred = pd.Series(self.model.predict(self.X), index=self.X.index)
        else:
            pred = pd.Series(self.model.predict(self.X), index=self.X.index)

        return pred

    def _feature_mapping(self, features: List[str]) -> List[str]:
        """Maps a list of feature names to their original labels.

        Args:
            features (List[str]): List of feature names to be mapped.

        Returns:
            List[str]: List of mapped feature names, with original labels applied
                where available.
        """
        return [self.feature_mapping.get(feature, feature) for feature in features]

    @staticmethod
    def _aggregate_one_hot_importances(
        fi_df: pd.DataFrame,
    ) -> pd.DataFrame:
        """Aggregate importance scores of one-hot encoded variables.

        Args:
            fi_df (pd.DataFrame): DataFrame with features and their
                importance scores.

        Returns:
            pd.DataFrame: Updated DataFrame with aggregated importance scores.
        """
        base_names = fi_df["Feature"].apply(_get_base_name)
        aggregated_importances = (
            fi_df.groupby(base_names)["Importance"].sum().reset_index()
        )
        aggregated_importances.columns = ["Feature", "Importance"]
        original_features = fi_df["Feature"][
            ~fi_df["Feature"].apply(_is_one_hot_encoded)
        ].unique()

        aggregated_or_original = (
            pd.concat(
                [
                    aggregated_importances,
                    fi_df[fi_df["Feature"].isin(original_features)],
                ]
            )
            .drop_duplicates()
            .sort_values(by="Importance", ascending=False)
        )

        return aggregated_or_original.reset_index(drop=True)

    @staticmethod
    def _aggregate_shap_one_hot(
        shap_values: np.ndarray, feature_names: List[str]
    ) -> Tuple[np.ndarray, List[str]]:
        """Aggregate SHAP values of one-hot encoded variables.

        Args:
            shap_values (np.ndarray): SHAP values.
            feature_names (List[str]): List of features corresponding to SHAP values.

        Returns:
            Tuple[np.ndarray, List[str]]: Aggregated SHAP values and updated list of
            feature names.
        """
        if shap_values.ndim == 3:
            shap_values = shap_values.mean(axis=2)

        shap_df = pd.DataFrame(shap_values, columns=feature_names)
        base_names = [_get_base_name(feature=feature) for feature in shap_df.columns]
        feature_mapping = dict(zip(shap_df.columns, base_names, strict=False))
        aggregated_shap_df = shap_df.groupby(feature_mapping, axis=1).sum()
        return aggregated_shap_df.values, list(aggregated_shap_df.columns)

    @staticmethod
    def _aggregate_one_hot_features_for_clustering(X: pd.DataFrame) -> pd.DataFrame:
        """Aggregate one-hot encoded features for clustering.

        Args:
            X (pd.DataFrame): Input DataFrame with one-hot encoded features.

        Returns:
            pd.DataFrame: DataFrame with aggregated one-hot encoded features.
        """
        X_copy = X.copy()
        one_hot_encoded_cols = [
            col for col in X_copy.columns if _is_one_hot_encoded(feature=col)
        ]
        base_names = {col: _get_base_name(feature=col) for col in one_hot_encoded_cols}
        aggregated_data = X_copy.groupby(base_names, axis=1).sum()
        non_one_hot_cols = [
            col for col in X_copy.columns if col not in one_hot_encoded_cols
        ]
        return pd.concat([X_copy[non_one_hot_cols], aggregated_data], axis=1)

`init(X, y, model, encoding, aggregate)` ¶

Initialize the FeatureImportance class.

Source code in periomod/evaluation/_baseeval.py

def __init__(
    self,
    X: pd.DataFrame,
    y: pd.Series,
    model: Union[
        RandomForestClassifier,
        LogisticRegression,
        MLPClassifier,
        XGBClassifier,
    ],
    encoding: Optional[str],
    aggregate: bool,
) -> None:
    """Initialize the FeatureImportance class."""
    super().__init__()
    self.X = X
    self.y = y
    self.model = model
    self.encoding = encoding
    self.aggregate = aggregate

`brier_scores()` ¶

Calculates Brier scores for each instance in the evaluator's dataset.

Returns:

Name	Type	Description
`Series`	`Series`	Brier scores for each instance.

Source code in periomod/evaluation/_baseeval.py

def brier_scores(self) -> pd.Series:
    """Calculates Brier scores for each instance in the evaluator's dataset.

    Returns:
        Series: Brier scores for each instance.
    """
    probas = self.model.predict_proba(self.X)

    if probas.shape[1] == 1:
        brier_scores = [
            brier_score_loss([true_label], [pred_proba[0]])
            for true_label, pred_proba in zip(self.y, probas, strict=False)
        ]
    else:
        brier_scores = [
            brier_score_loss(
                [1 if true_label == idx else 0 for idx in range(len(proba))], proba
            )
            for true_label, proba in zip(self.y, probas, strict=False)
        ]

    return pd.Series(brier_scores, index=self.y.index)

`model_predictions()` ¶

Generates model predictions for the evaluator's feature set.

Returns:

Name	Type	Description
`pred`	`Series`	Predicted labels as a series.

Source code in periomod/evaluation/_baseeval.py

def model_predictions(self) -> pd.Series:
    """Generates model predictions for the evaluator's feature set.

    Returns:
        pred: Predicted labels as a series.
    """
    if (
        hasattr(self.model, "best_threshold")
        and self.model.best_threshold is not None
    ):
        final_probs = get_probs(model=self.model, classification="binary", X=self.X)
        if final_probs is not None:
            pred = pd.Series(
                (final_probs >= self.model.best_threshold).astype(int),
                index=self.X.index,
            )
        else:
            pred = pd.Series(self.model.predict(self.X), index=self.X.index)
    else:
        pred = pd.Series(self.model.predict(self.X), index=self.X.index)

    return pred

EvaluatorMethods

__init__(X, y, model, encoding, aggregate) ¶

brier_scores() ¶

model_predictions() ¶

`init(X, y, model, encoding, aggregate)` ¶

`brier_scores()` ¶

`model_predictions()` ¶