PyPI - validmind - Versions diffs - 2.2.5__py3-none-any.whl → 2.3.1__py3-none-any.whl - Mend

validmind 2.2.5py3-none-any.whl → 2.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

validmind/tests/data_validation/PiTCreditScoresHistogram.py DELETED Viewed

@@ -1,150 +0,0 @@
-# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
-# See the LICENSE file in the root of this repository for details.
-# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from dataclasses import dataclass
-import pandas as pd
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-from validmind.vm_models import Figure, Metric
-@dataclass
-class PiTCreditScoresHistogram(Metric):
-    """
-    Generates a histogram visualization for observed and predicted credit default scores.
-    **Purpose**:
-    The PiT (Point in Time) Credit Scores Histogram metric is used to evaluate the predictive performance of a credit
-    risk assessment model. This metric provides a visual representation of observed versus predicted default scores and
-    enables quick and intuitive comparison for model assessment.
-    **Test Mechanism**:
-    This metric generates histograms for both observed and predicted score distributions of defaults and non-defaults.
-    The simultaneous representation of both the observed and predicted scores sheds light on the model's ability to
-    accurately predict credit risk.
-    **Signs of High Risk**:
-    - Significant discrepancies between the observed and predicted histograms, suggesting that the model may not be
-    adequately addressing certain risk factors.
-    - Concentration of predicted defaults towards one end of the graph, or uneven distribution in comparison to
-    observed scores, indicating potential issues in the model's interpretation of the data or outcome prediction.
-    **Strengths**:
-    - Provides an intuitive visual representation of model performance that's easy to comprehend, even for individuals
-    without a technical background.
-    - Useful for understanding the model's ability to distinguish between defaulting and non-defaulting entities.
-    - Specifically tailored for assessing credit risk models. The Point in Time (PiT) factor considers the evolution of
-    credit risk over time.
-    **Limitations**:
-    - As the information is visual, precise and quantitative results for detailed statistical analyses may not be
-    obtained.
-    - The method relies on manual inspection and comparison, introducing subjectivity and potential bias.
-    - Subtle discrepancies might go unnoticed and it could be less reliable for identifying such cues.
-    - Performance may degrade when score distributions overlap significantly or when too many scores are plotted,
-    resulting in cluttered or hard-to-decipher graphs.
-    """
-    name = "pit_credit_scores_histogram"
-    required_inputs = ["dataset", "model"]
-    default_params = {"title": "Histogram of Scores"}
-    metadata = {
-        "task_types": ["classification"],
-        "tags": ["tabular_data", "visualization", "credit_risk"],
-    }
-    @staticmethod
-    def plot_score_histogram(
-        df,
-        default_column,
-        predicted_default_column,
-        scores_column,
-        title,
-        point_in_time_date,
-    ):
-        fig = make_subplots(
-            rows=1, cols=2, subplot_titles=("Observed Default", "Predicted Default")
-        )
-        observed_data_0 = df[df[default_column] == 0][scores_column]
-        observed_data_1 = df[df[default_column] == 1][scores_column]
-        predicted_data_0 = df[df[predicted_default_column] == 0][scores_column]
-        predicted_data_1 = df[df[predicted_default_column] == 1][scores_column]
-        fig.add_trace(
-            go.Histogram(x=observed_data_0, opacity=0.75, name="Observed Default = 0"),
-            row=1,
-            col=1,
-        )
-        fig.add_trace(
-            go.Histogram(x=observed_data_1, opacity=0.75, name="Observed Default = 1"),
-            row=1,
-            col=1,
-        )
-        fig.add_trace(
-            go.Histogram(
-                x=predicted_data_0, opacity=0.75, name="Predicted Default = 0"
-            ),
-            row=1,
-            col=2,
-        )
-        fig.add_trace(
-            go.Histogram(
-                x=predicted_data_1, opacity=0.75, name="Predicted Default = 1"
-            ),
-            row=1,
-            col=2,
-        )
-        title += f" (PiT: {point_in_time_date.strftime('%d %b %Y')})"
-        fig.update_layout(barmode="overlay", title_text=title)
-        return fig
-    def run(self):
-        df = self.inputs.dataset.df
-        default_column = (
-            self.params.get("default_column") or self.inputs.dataset.target_column
-        )
-        predicted_default_column = (
-            self.params.get("predicted_default_column")
-            or self.inputs.dataset.y_pred(self.inputs.model),
-        )
-        scores_column = self.params["scores_column"]
-        point_in_time_column = self.params["point_in_time_column"]
-        title = self.params["title"]
-        point_in_time_date = pd.to_datetime(df[point_in_time_column].iloc[0])
-        fig = self.plot_score_histogram(
-            df,
-            default_column,
-            predicted_default_column,
-            scores_column,
-            title,
-            point_in_time_date,
-        )
-        return self.cache_results(
-            metric_value={
-                "score_histogram": {
-                    "observed_scores": list(df[df[default_column] == 1][scores_column]),
-                    "predicted_scores": list(
-                        df[df[predicted_default_column] == 1][scores_column]
-                    ),
-                },
-            },
-            figures=[
-                Figure(
-                    for_object=self,
-                    key="score_histogram",
-                    figure=fig,
-                )
-            ],
-        )

validmind/tests/data_validation/PiTPDHistogram.py DELETED Viewed

@@ -1,152 +0,0 @@
-# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
-# See the LICENSE file in the root of this repository for details.
-# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from dataclasses import dataclass
-import pandas as pd
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-from validmind.vm_models import Figure, Metric
-@dataclass
-class PiTPDHistogram(Metric):
-    """
-    Assesses credit risk prediction accuracy of a model by comparing actual and predicted defaults at a chosen point in
-    time.
-    **Purpose**: The PiTPDHistogram metric uses Probability of Default (PD) calculations for individual instances
-    within both training and test data sets in order to assess a model's proficiency in predicting credit risk. A
-    distinctive point in time (PiT) is chosen for these PD calculations, and the results for both actual and predicted
-    defaults are presented in histogram form. This visualization is aimed at simplifying the understanding of model
-    prediction accuracy.
-    **Test Mechanism**: Instances are categorized into two groups - those for actual defaults and those for predicted
-    defaults, with '1' indicating a default and '0' indicating non-default. PD is calculated for each instance, and
-    based on these calculations, two histograms are created, one for actual defaults and one for predicted defaults. If
-    the predicted default frequency matches that of the actual defaults, the model's performance is deemed effective.
-    **Signs of High Risk**:
-    - Discrepancies between the actual and predicted default histograms may suggest model inefficiency.
-    - Variations in histogram shapes or divergences in default probability distributions could be concerning.
-    - Significant mismatches in peak default probabilities could also be red flags.
-    **Strengths**:
-    - Provides a visual comparison between actual and predicted defaults, aiding in the understanding of model
-    performance.
-    - Helps reveal model bias and areas where the model's performance could be improved.
-    - Easier to understand than purely numerical evaluations or other complicated visualization measures.
-    **Limitations**:
-    - The metric remains largely interpretive and subjective, as the extent and relevance of visual discrepancies often
-    need to be evaluated manually, leading to potentially inconsistent results across different analyses.
-    - This metric alone may not capture all the complexities and nuances of model performance.
-    - The information provided is limited to a specific point in time, potentially neglecting the model's performance
-    under various circumstances or different time periods.
-    """
-    name = "pit_pd_histogram"
-    required_context = ["dataset"]
-    default_params = {"title": "Histogram of PiT Probability of Default"}
-    metadata = {
-        "task_types": ["classification"],
-        "tags": ["tabular_data", "visualization", "credit_risk"],
-    }
-    @staticmethod
-    def plot_pit_pd_histogram(
-        df,
-        default_column,
-        predicted_default_column,
-        default_probabilities_column,
-        title,
-        point_in_time_date,
-    ):
-        fig = make_subplots(
-            rows=1, cols=2, subplot_titles=("Observed Default", "Predicted Default")
-        )
-        observed_data_0 = df[df[default_column] == 0][default_probabilities_column]
-        observed_data_1 = df[df[default_column] == 1][default_probabilities_column]
-        predicted_data_0 = df[df[predicted_default_column] == 0][
-            default_probabilities_column
-        ]
-        predicted_data_1 = df[df[predicted_default_column] == 1][
-            default_probabilities_column
-        ]
-        fig.add_trace(
-            go.Histogram(x=observed_data_0, opacity=0.75, name="Observed Default = 0"),
-            row=1,
-            col=1,
-        )
-        fig.add_trace(
-            go.Histogram(x=observed_data_1, opacity=0.75, name="Observed Default = 1"),
-            row=1,
-            col=1,
-        )
-        fig.add_trace(
-            go.Histogram(
-                x=predicted_data_0, opacity=0.75, name="Predicted Default = 0"
-            ),
-            row=1,
-            col=2,
-        )
-        fig.add_trace(
-            go.Histogram(
-                x=predicted_data_1, opacity=0.75, name="Predicted Default = 1"
-            ),
-            row=1,
-            col=2,
-        )
-        title += f" (PiT: {point_in_time_date.strftime('%d %b %Y')})"
-        fig.update_layout(barmode="overlay", title_text=title)
-        return fig
-    def run(self):
-        df = self.inputs.dataset.df
-        default_column = self.params["default_column"]
-        predicted_default_column = self.params["predicted_default_column"]
-        default_probabilities_column = self.params["default_probabilities_column"]
-        point_in_time_column = self.params["point_in_time_column"]
-        title = self.params["title"]
-        point_in_time_date = pd.to_datetime(df[point_in_time_column].iloc[0])
-        fig = self.plot_pit_pd_histogram(
-            df,
-            default_column,
-            predicted_default_column,
-            default_probabilities_column,
-            title,
-            point_in_time_date,
-        )
-        return self.cache_results(
-            metric_value={
-                "prob_histogram": {
-                    "observed_probs": list(
-                        df[df[default_column] == 1][default_probabilities_column]
-                    ),
-                    "predicted_probs": list(
-                        df[df[predicted_default_column] == 1][
-                            default_probabilities_column
-                        ]
-                    ),
-                },
-            },
-            figures=[
-                Figure(
-                    for_object=self,
-                    key="prob_histogram",
-                    figure=fig,
-                )
-            ],
-        )

validmind/tests/model_validation/statsmodels/ADFTest.py DELETED Viewed

@@ -1,88 +0,0 @@
-# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
-# See the LICENSE file in the root of this repository for details.
-# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from dataclasses import dataclass
-from statsmodels.tsa.stattools import adfuller
-from validmind.vm_models import ThresholdTest, ThresholdTestResult
-@dataclass
-class ADFTest(ThresholdTest):
-    """
-    Assesses the stationarity of time series data using the Augmented Dickey-Fuller (ADF) test.
-    **Purpose**: The Augmented Dickey-Fuller (ADF) metric test is designed to evaluate the presence of a unit root in a
-    time series. This essentially translates to assessing the stationarity of a time series dataset. This is vital in
-    time series analysis, regression tasks, and forecasting, as these often need the data to be stationary.
-    **Test Mechanism**: This test application utilizes the "adfuller" function from Python's “statsmodels” library. It
-    applies this function to each column of the training dataset, subsequently calculating the ADF statistic, p-value,
-    the number of lags used, and the number of observations in the sample for each column. If a column's p-value is
-    lower than the predetermined threshold (usually 0.05), the series is considered stationary, and the test is deemed
-    passed for that column.
-    **Signs of High Risk**:
-    - A p-value that surpasses the threshold value indicates a high risk or potential model performance issue.
-    - A high p-value suggests that the null hypothesis (of a unit root being present) cannot be rejected. This in turn
-    suggests that the series is non-stationary which could potentially yield unreliable and falsified results for the
-    model's performance and forecast.
-    **Strengths**:
-    - Archetypal Test for Stationarity: The ADF test is a comprehensive approach towards testing the stationarity of
-    time series data. Such testing is vital for many machine learning and statistical models.
-    - Detailed Output: The function generates detailed output, including the number of lags used and the number of
-    observations, which adds to understanding a series’ behaviour.
-    **Limitations**:
-    - Dependence on Threshold: The result of this test freights heavily on the threshold chosen. Hence, an imprudent
-    threshold value might lead to false acceptance or rejection of the null hypothesis.
-    - Not Effective for Trending Data: The test suffers when it operates under the assumption that the data does not
-    encapsulate any deterministic trend. In the presence of such a trend, it might falsely identify a series as
-    non-stationary.
-    - Potential for False Positives: The ADF test especially in the case of larger datasets, tends to reject the null
-    hypothesis, escalating the chances of false positives.
-    """
-    name = "adf_test"
-    required_inputs = ["dataset"]
-    default_params = {"threshold": 0.05}
-    metadata = {
-        "task_types": ["regression"],
-        "tags": [
-            "time_series_data",
-            "statsmodels",
-            "forecasting",
-            "statistical_test",
-            "stationarity",
-        ],
-    }
-    def run(self):
-        x_train = self.inputs.dataset.df
-        results = []
-        for col in x_train.columns:
-            # adf_values[col] = adfuller(x_train[col].values)
-            adf, pvalue, usedlag, nobs, critical_values, icbest = adfuller(
-                x_train[col].values
-            )
-            col_passed = pvalue < self.params["threshold"]
-            results.append(
-                ThresholdTestResult(
-                    column=col,
-                    passed=col_passed,
-                    values={
-                        "adf": adf,
-                        "pvalue": pvalue,
-                        "usedlag": usedlag,
-                        "nobs": nobs,
-                        "icbest": icbest,
-                    },
-                )
-            )
-        return self.cache_results(results, passed=all([r.passed for r in results]))

validmind/tests/model_validation/statsmodels/FeatureImportanceAndSignificance.py DELETED Viewed

@@ -1,198 +0,0 @@
-# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
-# See the LICENSE file in the root of this repository for details.
-# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from dataclasses import dataclass
-import pandas as pd
-import plotly.graph_objects as go
-from sklearn.inspection import permutation_importance
-from validmind.logging import get_logger
-from validmind.vm_models import Figure, Metric
-logger = get_logger(__name__)
-@dataclass
-class FeatureImportanceAndSignificance(Metric):
-    """
-    Evaluates and visualizes the statistical significance and feature importance using regression and decision tree
-    models.
-    **Purpose**: The 'FeatureImportanceAndSignificance' test evaluates the statistical significance and the importance
-    of features in the context of the machine learning model. By comparing the p-values from a regression model and the
-    feature importances from a decision tree model, this test aids in determining the most significant variables from a
-    statistical and a machine learning perspective, assisting in feature selection during the model development process.
-    **Test Mechanism**: The test first compares the p-values from a regression model and the feature importances from a
-    decision tree model. These values are normalized to ensure a uniform comparison. The 'p_threshold' parameter is
-    used to determine what p-value is considered statistically significant and if the 'significant_only' parameter is
-    true, only features with p-values below this threshold are included in the final output. The output from this test
-    includes an interactive visualization displaying normalized p-values and the associated feature importances. The
-    test throws an error if it does not receive both a regression model and a decision tree model.
-    **Signs of High Risk**:
-    - Exceptionally high or low p-values, which suggest that a feature may not be significant or meaningful in the
-    context of the model.
-    - If many variables with small feature importance values have significant p-values, this could indicate that the
-    model might be overfitting.
-    **Strengths**:
-    - Combines two perspectives statistical significance (p-values) and feature importance (decision tree model),
-    making it a robust feature selection test.
-    - Provides an interactive visualization making it easy to interpret and understand the results.
-    **Limitations**:
-    - The test only works with a regression model and a decision tree model which may limit its applicability.
-    - The test does not take into account potential correlations or causative relationships between features which may
-    lead to misinterpretations of significance and importance.
-    - Over-reliance on the p-value as a cut-off for feature significance can be seen as arbitrary and may not truly
-    reflect the real-world importance of the feature.
-    """
-    name = "feature_importance_and_significance"
-    required_inputs = ["models"]
-    default_params = {
-        "fontsize": 10,
-        "p_threshold": 0.05,
-        "significant_only": False,
-        "figure_height": 800,
-        "bar_width": 0.3,
-    }
-    metadata = {
-        "task_types": ["regression"],
-        "tags": [
-            "statsmodels",
-            "feature_importance",
-            "statistical_test",
-            "visualization",
-        ],
-    }
-    def compute_p_values_and_feature_importances(
-        self, regression_model, decision_tree_model
-    ):
-        p_values = regression_model.model.pvalues
-        feature_importances = permutation_importance(
-            decision_tree_model.model,
-            decision_tree_model.train_ds.x,
-            decision_tree_model.train_ds.y,
-            random_state=0,
-            n_jobs=-2,
-        ).importances_mean
-        p_values = p_values / max(p_values)
-        feature_importances = feature_importances / max(feature_importances)
-        return p_values, feature_importances
-    def create_dataframe(
-        self,
-        p_values,
-        feature_importances,
-        regression_model,
-        significant_only,
-        p_threshold,
-    ):
-        df = pd.DataFrame(
-            {
-                "Normalized p-value": p_values,
-                "Normalized Feature Importance": feature_importances,
-            },
-            index=regression_model.train_ds.x_df().columns,
-        )
-        if significant_only:
-            df = df[df["Normalized p-value"] <= p_threshold]
-        df = df.sort_values(by="Normalized Feature Importance", ascending=True)
-        return df
-    def create_figure(self, df, fontsize, figure_height, bar_width):
-        fig = go.Figure()
-        title_text = (
-            "Significant Features (p-value <= {0})".format(self.params["p_threshold"])
-            if self.params["significant_only"]
-            else "All Features"
-        )
-        fig.update_layout(
-            title=title_text,
-            barmode="group",
-            height=figure_height,
-            yaxis=dict(tickfont=dict(size=fontsize)),
-            xaxis=dict(title="Normalized Value", titlefont=dict(size=fontsize)),
-        )
-        fig.add_trace(
-            go.Bar(
-                y=df.index,
-                x=df["Normalized p-value"],
-                name="Normalized p-value",
-                orientation="h",
-                marker=dict(color="skyblue"),
-                width=bar_width,
-            )
-        )
-        fig.add_trace(
-            go.Bar(
-                y=df.index,
-                x=df["Normalized Feature Importance"],
-                name="Normalized Feature Importance",
-                orientation="h",
-                marker=dict(color="orange"),
-                width=bar_width,
-            )
-        )
-        return fig
-    def run(self):
-        fontsize = self.params["fontsize"]
-        significant_only = self.params["significant_only"]
-        p_threshold = self.params["p_threshold"]
-        figure_height = self.params["figure_height"]
-        bar_width = self.params["bar_width"]
-        all_models = []
-        if self.inputs.models is not None:
-            all_models.extend(self.inputs.models)
-        if len(self.inputs.models) != 2:
-            raise ValueError("Two models must be provided")
-        regression_model = self.inputs.models[0]
-        decision_tree_model = self.inputs.models[1]
-        p_values, feature_importances = self.compute_p_values_and_feature_importances(
-            regression_model, decision_tree_model
-        )
-        df = self.create_dataframe(
-            p_values,
-            feature_importances,
-            regression_model,
-            significant_only,
-            p_threshold,
-        )
-        fig = self.create_figure(df, fontsize, figure_height, bar_width)
-        return self.cache_results(
-            figures=[
-                Figure(
-                    for_object=self,
-                    key=self.key,
-                    figure=fig,
-                    metadata={
-                        "model_regression": str(regression_model.model),
-                        "model_decision_tree": str(decision_tree_model.model),
-                    },
-                )
-            ]
-        )

validmind 2.2.5__py3-none-any.whl → 2.3.1__py3-none-any.whl

validmind 2.2.5py3-none-any.whl → 2.3.1py3-none-any.whl