PyPI - validmind - Versions diffs - 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl - Mend

validmind 2.0.1py3-none-any.whl → 2.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py CHANGED Viewed

@@ -6,7 +6,7 @@ import re
 from dataclasses import dataclass
 import numpy as np
-from sklearn import metrics
+from sklearn.metrics import mean_absolute_error, mean_squared_error
 from validmind.errors import SkipTestError
 from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
@@ -67,12 +67,12 @@ class RegressionModelsPerformanceComparison(Metric):
     }
     def regression_errors(self, y_true_test, y_pred_test):
-        mae_test = metrics.mean_absolute_error(y_true_test, y_pred_test)
+        mae_test = mean_absolute_error(y_true_test, y_pred_test)
         results = {}
         results["Mean Absolute Error (MAE)"] = mae_test
-        mse_test = metrics.mean_squared_error(y_true_test, y_pred_test)
+        mse_test = mean_squared_error(y_true_test, y_pred_test)
         results["Mean Squared Error (MSE)"] = mse_test
         results["Root Mean Squared Error (RMSE)"] = np.sqrt(mse_test)
@@ -121,12 +121,14 @@ class RegressionModelsPerformanceComparison(Metric):
         if self.inputs.models is not None:
             all_models.extend(self.inputs.models)
         results = {}
         for idx, model in enumerate(all_models):
             result = self.regression_errors(
                 y_true_test=self.inputs.dataset.y,
-                y_pred_test=self.inputs.dataset.y_pred(model.input_id),
+                y_pred_test=self.inputs.dataset.y_pred(model),
             )
             results["model_" + str(idx)] = result
         return self.cache_results(results)

validmind/tests/model_validation/sklearn/RegressionR2Square.py CHANGED Viewed

@@ -70,11 +70,11 @@ class RegressionR2Square(Metric):
     def run(self):
         y_train_true = self.inputs.datasets[0].y
-        y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model.input_id)
+        y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model)
         y_train_true = y_train_true.astype(y_train_pred.dtype)
         y_test_true = self.inputs.datasets[1].y
-        y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model.input_id)
+        y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model)
         y_test_true = y_test_true.astype(y_test_pred.dtype)
         r2s_train = metrics.r2_score(y_train_true, y_train_pred)

validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py CHANGED Viewed

@@ -6,8 +6,10 @@ import warnings
 from dataclasses import dataclass
 import matplotlib.pyplot as plt
+import numpy as np
 import shap
+from validmind.errors import UnsupportedModelForSHAPError
 from validmind.logging import get_logger
 from validmind.vm_models import Figure, Metric
@@ -72,6 +74,9 @@ class SHAPGlobalImportance(Metric):
             "visualization",
         ],
     }
+    default_params = {
+        "kernel_explainer_samples": 10,
+    }
     def _generate_shap_plot(self, type_, shap_values, x_test):
         """
@@ -89,9 +94,30 @@ class SHAPGlobalImportance(Metric):
         summary_plot_extra_args = {}
         if type_ == "mean":
+            # Calculate the mean absolute SHAP value for each feature
+            mean_abs_shap = np.abs(shap_values).mean(axis=0)
+            # Find the maximum mean absolute SHAP value
+            max_shap_value = np.max(mean_abs_shap)
+            # Normalize all SHAP values based on the top feature
+            shap_values = (
+                shap_values / max_shap_value * 100
+            )  # scaling factor to make the top feature 100%
             summary_plot_extra_args = {"plot_type": "bar"}
+            shap.summary_plot(
+                shap_values, x_test, show=False, **summary_plot_extra_args
+            )
+            # Customize the plot using matplotlib
+            plt.xlabel("Normalized SHAP Value (Percentage)", fontsize=13)
+            plt.ylabel("Features", fontsize=13)
+            plt.title("Normalized Feature Importance", fontsize=13)
+        else:
+            shap.summary_plot(
+                shap_values, x_test, show=False, **summary_plot_extra_args
+            )
-        shap.summary_plot(shap_values, x_test, show=False, **summary_plot_extra_args)
         figure = plt.gcf()
         # avoid displaying on notebooks and clears the canvas for the next plot
         plt.close()
@@ -127,22 +153,46 @@ class SHAPGlobalImportance(Metric):
             model_class == "XGBClassifier"
             or model_class == "RandomForestClassifier"
             or model_class == "CatBoostClassifier"
+            or model_class == "DecisionTreeClassifier"
+            or model_class == "RandomForestRegressor"
+            or model_class == "GradientBoostingRegressor"
         ):
             explainer = shap.TreeExplainer(trained_model)
         elif (
             model_class == "LogisticRegression"
             or model_class == "XGBRegressor"
             or model_class == "LinearRegression"
+            or model_class == "LinearSVC"
         ):
             explainer = shap.LinearExplainer(trained_model, self.inputs.dataset.x)
+        elif model_class == "SVC":
+            # KernelExplainer is slow so we use shap.sample to speed it up
+            explainer = shap.KernelExplainer(
+                trained_model.predict,
+                shap.sample(
+                    self.inputs.dataset.x,
+                    self.params["kernel_explainer_samples"],
+                ),
+            )
+        else:
+            raise UnsupportedModelForSHAPError(
+                f"Model {model_class} not supported for SHAP importance."
+            )
+        # KernelExplainer is slow so we use shap.sample to speed it up
+        if isinstance(explainer, shap.KernelExplainer):
+            shap_sample = shap.sample(
+                self.inputs.dataset.x_df(),
+                self.params["kernel_explainer_samples"],
+            )
         else:
-            raise ValueError(f"Model {model_class} not supported for SHAP importance.")
+            shap_sample = self.inputs.dataset.x_df()
-        shap_values = explainer.shap_values(self.inputs.dataset.x)
+        shap_values = explainer.shap_values(shap_sample)
         figures = [
-            self._generate_shap_plot("mean", shap_values, self.inputs.dataset.x),
-            self._generate_shap_plot("summary", shap_values, self.inputs.dataset.x),
+            self._generate_shap_plot("mean", shap_values, shap_sample),
+            self._generate_shap_plot("summary", shap_values, shap_sample),
         ]
         # restore warnings

validmind/tests/model_validation/sklearn/SilhouettePlot.py CHANGED Viewed

@@ -69,7 +69,7 @@ class SilhouettePlot(Metric):
     }
     def run(self):
-        y_pred_train = self.inputs.dataset.y_pred(self.inputs.model.input_id)
+        y_pred_train = self.inputs.dataset.y_pred(self.inputs.model)
         # Calculate the silhouette score
         silhouette_avg = silhouette_score(
             self.inputs.dataset.x,

validmind/tests/model_validation/sklearn/TrainingTestDegradation.py CHANGED Viewed

@@ -121,20 +121,20 @@ class TrainingTestDegradation(ThresholdTest):
     def run(self):
         y_train_true = self.inputs.datasets[0].y
-        y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model.input_id)
+        y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model)
         y_train_true = y_train_true.astype(y_train_pred.dtype)
         y_test_true = self.inputs.datasets[1].y
-        y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model.input_id)
+        y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model)
         y_test_true = y_test_true.astype(y_test_pred.dtype)
         report_train = metrics.classification_report(
-            y_train_true, y_train_pred, output_dict=True
+            y_train_true, y_train_pred, output_dict=True, zero_division=0
         )
         report_train["roc_auc"] = multiclass_roc_auc_score(y_train_true, y_train_pred)
         report_test = metrics.classification_report(
-            y_test_true, y_test_pred, output_dict=True
+            y_test_true, y_test_pred, output_dict=True, zero_division=0
         )
         report_test["roc_auc"] = multiclass_roc_auc_score(y_test_true, y_test_pred)
@@ -145,7 +145,13 @@ class TrainingTestDegradation(ThresholdTest):
             for metric_name in ["precision", "recall", "f1-score"]:
                 train_score = report_train[class_name][metric_name]
                 test_score = report_test[class_name][metric_name]
-                degradation = (train_score - test_score) / train_score
+                # If training score is 0, degradation is assumed to be 100%
+                if train_score == 0:
+                    degradation = 1.0
+                else:
+                    degradation = (train_score - test_score) / train_score
                 passed = degradation < self.params["max_threshold"]
                 test_results.append(
                     ThresholdTestResult(

validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py CHANGED Viewed

@@ -137,11 +137,11 @@ class WeakspotsDiagnosis(ThresholdTest):
         prediction_column = f"{target_column}_pred"
         train_df = self.inputs.datasets[0].df.copy()
-        train_class_pred = self.inputs.datasets[0].y_pred(self.inputs.model.input_id)
+        train_class_pred = self.inputs.datasets[0].y_pred(self.inputs.model)
         train_df[prediction_column] = train_class_pred
         test_df = self.inputs.datasets[1].df.copy()
-        test_class_pred = self.inputs.datasets[1].y_pred(self.inputs.model.input_id)
+        test_class_pred = self.inputs.datasets[1].y_pred(self.inputs.model)
         test_df[prediction_column] = test_class_pred
         test_results = []

validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py ADDED Viewed

@@ -0,0 +1,140 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from dataclasses import dataclass
+import numpy as np
+import plotly.graph_objects as go
+from matplotlib import cm
+from validmind.vm_models import Figure, Metric
+@dataclass
+class CumulativePredictionProbabilities(Metric):
+    """
+    Visualizes cumulative probabilities of positive and negative classes for both training and testing in logistic
+    regression models.
+    **Purpose**: This metric is utilized to evaluate the distribution of predicted probabilities for positive and
+    negative classes in a logistic regression model. It's not solely intended to measure the model's performance but
+    also provides a visual assessment of the model's behavior by plotting the cumulative probabilities for positive and
+    negative classes across both the training and test datasets.
+    **Test Mechanism**: The logistic regression model is evaluated by first computing the predicted probabilities for
+    each instance in both the training and test datasets, which are then added as a new column in these sets. The
+    cumulative probabilities for positive and negative classes are subsequently calculated and sorted in ascending
+    order. Cumulative distributions of these probabilities are created for both positive and negative classes across
+    both training and test datasets. These cumulative probabilities are represented visually in a plot, containing two
+    subplots - one for the training data and the other for the test data, with lines representing cumulative
+    distributions of positive and negative classes.
+    **Signs of High Risk**:
+    - Imbalanced distribution of probabilities for either positive or negative classes.
+    - Notable discrepancies or significant differences between the cumulative probability distributions for the
+    training data versus the test data.
+    - Marked discrepancies or large differences between the cumulative probability distributions for positive and
+    negative classes.
+    **Strengths**:
+    - It offers not only numerical probabilities but also provides a visual illustration of data, which enhances the
+    ease of understanding and interpreting the model's behavior.
+    - Allows for the comparison of model's behavior across training and testing datasets, providing insights about how
+    well the model is generalized.
+    - It differentiates between positive and negative classes and their respective distribution patterns, which can aid
+    in problem diagnosis.
+    **Limitations**:
+    - Exclusive to classification tasks and specifically to logistic regression models.
+    - Graphical results necessitate human interpretation and may not be directly applicable for automated risk
+    detection.
+    - The method does not give a solitary quantifiable measure of model risk, rather it offers a visual representation
+    and broad distributional information.
+    - If the training and test datasets are not representative of the overall data distribution, the metric could
+    provide misleading results.
+    """
+    name = "cumulative_prediction_probabilities"
+    required_inputs = ["model", "datasets"]
+    metadata = {
+        "task_types": ["classification"],
+        "tags": ["logistic_regression", "visualization"],
+    }
+    default_params = {"title": "Cumulative Probabilities"}
+    @staticmethod
+    def plot_cumulative_prob(dataframes, dataset_titles, target_col, title):
+        figures = []
+        # Generate a colormap and convert to Plotly-accepted color format
+        # Adjust 'viridis' to any other matplotlib colormap if desired
+        colormap = cm.get_cmap("viridis")
+        for _, (df, dataset_title) in enumerate(zip(dataframes, dataset_titles)):
+            fig = go.Figure()
+            # Get unique classes and assign colors
+            classes = sorted(df[target_col].unique())
+            colors = [
+                colormap(i / len(classes))[:3] for i in range(len(classes))
+            ]  # RGB
+            color_dict = {
+                cls: f"rgb({int(rgb[0]*255)}, {int(rgb[1]*255)}, {int(rgb[2]*255)})"
+                for cls, rgb in zip(classes, colors)
+            }
+            for class_value in sorted(df[target_col].unique()):
+                # Calculate cumulative distribution for the current class
+                sorted_probs = np.sort(
+                    df[df[target_col] == class_value]["probabilities"]
+                )
+                cumulative_probs = np.cumsum(sorted_probs) / np.sum(sorted_probs)
+                fig.add_trace(
+                    go.Scatter(
+                        x=sorted_probs,
+                        y=cumulative_probs,
+                        mode="lines",
+                        name=f"{dataset_title} {target_col} = {class_value}",
+                        line=dict(
+                            color=color_dict[class_value],
+                        ),
+                    )
+                )
+            fig.update_layout(
+                title_text=f"{title} - {dataset_title}",
+                xaxis_title="Probability",
+                yaxis_title="Cumulative Distribution",
+                legend_title=target_col,
+            )
+            figures.append(fig)
+        return figures
+    def run(self):
+        dataset_titles = [dataset.input_id for dataset in self.inputs.datasets]
+        target_column = self.inputs.datasets[0].target_column
+        title = self.params.get("title", self.default_params["title"])
+        dataframes = []
+        metric_value = {"cum_prob": {}}
+        for dataset in self.inputs.datasets:
+            df = dataset.df.copy()
+            y_prob = dataset.y_prob(self.inputs.model)
+            df["probabilities"] = y_prob
+            dataframes.append(df)
+            metric_value["cum_prob"][dataset.input_id] = list(df["probabilities"])
+        figures = self.plot_cumulative_prob(
+            dataframes, dataset_titles, target_column, title
+        )
+        figures_list = [
+            Figure(
+                for_object=self,
+                key=f"cumulative_prob_{title.replace(' ', '_')}_{i+1}",
+                figure=fig,
+            )
+            for i, fig in enumerate(figures)
+        ]
+        return self.cache_results(metric_value=metric_value, figures=figures_list)

validmind/tests/model_validation/statsmodels/GINITable.py CHANGED Viewed

@@ -65,19 +65,8 @@ class GINITable(Metric):
     }
     def run(self):
-        model = (
-            self.inputs.model[0]
-            if isinstance(self.inputs.model, list)
-            else self.inputs.model
-        )
-        X_train = self.datasets[0].x
-        y_train = self.datasets[0].y
-        X_test = self.datasets[1].x
-        y_test = self.datasets[1].y
-        summary_metrics = self.compute_metrics(model, X_train, y_train, X_test, y_test)
+        summary_metrics = self.compute_metrics()
         return self.cache_results(
             {
@@ -85,52 +74,40 @@ class GINITable(Metric):
             }
         )
-    def compute_metrics(self, model, X_train, y_train, X_test, y_test):
-        """Computes AUC, GINI, and KS for train and test sets."""
+    def compute_metrics(self):
+        """Computes AUC, GINI, and KS for an arbitrary number of datasets."""
+        # Initialize the dictionary to store results
+        metrics_dict = {"Dataset": [], "AUC": [], "GINI": [], "KS": []}
-        metrics_dict = {"Dataset": ["Train", "Test"], "AUC": [], "GINI": [], "KS": []}
+        # Iterate over each dataset in the inputs
+        for i, dataset in enumerate(self.inputs.datasets):
+            dataset_label = (
+                dataset.input_id
+            )  # Use input_id as the label for each dataset
+            metrics_dict["Dataset"].append(dataset_label)
-        for dataset, X, y in zip(
-            ["Train", "Test"], [X_train, X_test], [y_train, y_test]
-        ):
-            y_scores = model.predict(X)
+            # Retrieve y_true and y_pred for the current dataset
+            y_true = np.ravel(dataset.y)  # Flatten y_true to make it one-dimensional
+            y_prob = dataset.y_prob(self.inputs.model)
-            print("Predicted scores obtained...")
+            # Compute metrics
+            y_true = np.array(y_true, dtype=float)
+            y_prob = np.array(y_prob, dtype=float)
-            # Compute AUC, GINI, and KS
-            auc = self.compute_auc(y, y_scores)
-            gini = self.compute_gini(y, y_scores)
-            ks = self.compute_ks(y, y_scores)
+            fpr, tpr, _ = roc_curve(y_true, y_prob)
+            ks = max(tpr - fpr)
+            auc = roc_auc_score(y_true, y_prob)
+            gini = 2 * auc - 1
             # Add the metrics to the dictionary
             metrics_dict["AUC"].append(auc)
             metrics_dict["GINI"].append(gini)
             metrics_dict["KS"].append(ks)
-        # Convert dictionary to DataFrame for nicer display
+        # Create a DataFrame to store and return the results
         metrics_df = pd.DataFrame(metrics_dict)
         return metrics_df
-    def compute_auc(self, y_true, y_scores):
-        """Computes the Area Under the Curve (AUC)."""
-        print("Computing AUC...")
-        auc = roc_auc_score(y_true, y_scores)
-        return auc
-    def compute_gini(self, y_true, y_scores):
-        """Computes the Gini coefficient."""
-        print("Computing GINI...")
-        auc = self.compute_auc(y_true, y_scores)
-        gini = 2 * auc - 1
-        return gini
-    def compute_ks(self, y_true, y_scores):
-        """Computes the Kolmogorov-Smirnov (KS) statistic."""
-        print("Computing KS...")
-        fpr, tpr, _ = roc_curve(y_true, y_scores)
-        ks = np.max(tpr - fpr)
-        return ks
     def summary(self, metric_value):
         summary_metrics_table = metric_value["metrics_summary"]
         return ResultSummary(

validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} RENAMED Viewed

@@ -4,16 +4,14 @@
 from dataclasses import dataclass
-import numpy as np
-import pandas as pd
 import plotly.graph_objects as go
-from plotly.subplots import make_subplots
+from matplotlib import cm
 from validmind.vm_models import Figure, Metric
 @dataclass
-class LogisticRegPredictionHistogram(Metric):
+class PredictionProbabilitiesHistogram(Metric):
     """
     Generates and visualizes histograms of the Probability of Default predictions for both positive and negative
     classes in training and testing datasets.
@@ -58,7 +56,7 @@ class LogisticRegPredictionHistogram(Metric):
     quantifiable measure or score to assess model performance.
     """
-    name = "logistic_reg_prediction_histogram"
+    name = "prediction_probabilities_histogram"
     required_inputs = ["model", "datasets"]
     metadata = {
         "task_types": ["classification"],
@@ -68,95 +66,72 @@ class LogisticRegPredictionHistogram(Metric):
     default_params = {"title": "Histogram of Predictive Probabilities"}
     @staticmethod
-    def compute_probabilities(model, X):
-        """
-        Predict probabilities and add PD as a new column in X
-        """
-        probabilities = model.predict(X)
-        pd_series = probabilities
-        # If X is a numpy array, convert it to DataFrame
-        if isinstance(X, np.ndarray):
-            X = pd.DataFrame(X)
-        X["probabilities"] = pd_series
-        return X
-    @staticmethod
-    def plot_prob_histogram(df_train, df_test, pd_col, target_col, title):
-        train_0 = df_train[df_train[target_col] == 0][pd_col]
-        train_1 = df_train[df_train[target_col] == 1][pd_col]
-        test_0 = df_test[df_test[target_col] == 0][pd_col]
-        test_1 = df_test[df_test[target_col] == 1][pd_col]
-        fig = make_subplots(rows=1, cols=2, subplot_titles=("Train Data", "Test Data"))
-        trace_train_0 = go.Histogram(
-            x=train_0, opacity=0.75, name=f"Train {target_col} = 0"
-        )
-        trace_train_1 = go.Histogram(
-            x=train_1, opacity=0.75, name=f"Train {target_col} = 1"
-        )
-        trace_test_0 = go.Histogram(
-            x=test_0, opacity=0.75, name=f"Test {target_col} = 0"
-        )
-        trace_test_1 = go.Histogram(
-            x=test_1, opacity=0.75, name=f"Test {target_col} = 1"
-        )
-        fig.add_trace(trace_train_0, row=1, col=1)
-        fig.add_trace(trace_train_1, row=1, col=1)
-        fig.add_trace(trace_test_0, row=1, col=2)
-        fig.add_trace(trace_test_1, row=1, col=2)
-        fig.update_layout(barmode="overlay", title_text=title)
-        return fig
+    def plot_prob_histogram(dataframes, dataset_titles, target_col, title):
+        figures = []
+        # Generate a colormap and convert to Plotly-accepted color format
+        # Adjust 'viridis' to any other matplotlib colormap if desired
+        colormap = cm.get_cmap("viridis")
+        for i, (df, dataset_title) in enumerate(zip(dataframes, dataset_titles)):
+            fig = go.Figure()
+            # Get unique classes and assign colors
+            classes = sorted(df[target_col].unique())
+            colors = [
+                colormap(i / len(classes))[:3] for i in range(len(classes))
+            ]  # RGB
+            color_dict = {
+                cls: f"rgb({int(rgb[0]*255)}, {int(rgb[1]*255)}, {int(rgb[2]*255)})"
+                for cls, rgb in zip(classes, colors)
+            }
+            # Ensure classes are plotted in the specified order
+            for class_value in sorted(df[target_col].unique()):
+                fig.add_trace(
+                    go.Histogram(
+                        x=df[df[target_col] == class_value]["probabilities"],
+                        opacity=0.75,
+                        name=f"{dataset_title} {target_col} = {class_value}",
+                        marker=dict(
+                            color=color_dict[class_value],
+                        ),
+                    )
+                )
+            fig.update_layout(
+                barmode="overlay",
+                title_text=f"{title} - {dataset_title}",
+                xaxis_title="Probability",
+                yaxis_title="Frequency",
+            )
+            figures.append(fig)
+        return figures
     def run(self):
-        model = (
-            self.inputs.model[0]
-            if isinstance(self.inputs.model, list)
-            else self.inputs.model
+        dataset_titles = [dataset.input_id for dataset in self.inputs.datasets]
+        target_column = self.inputs.datasets[0].target_column
+        title = self.params.get("title", self.default_params["title"])
+        dataframes = []
+        metric_value = {"prob_histogram": {}}
+        for _, dataset in enumerate(self.inputs.datasets):
+            df = dataset.df.copy()
+            y_prob = dataset.y_prob(self.inputs.model)
+            df["probabilities"] = y_prob
+            dataframes.append(df)
+            metric_value["prob_histogram"][dataset.input_id] = list(df["probabilities"])
+        figures = self.plot_prob_histogram(
+            dataframes, dataset_titles, target_column, title
         )
-        target_column = model.train_ds.target_column
-        title = self.params["title"]
-        # Create a copy of training and testing dataframes
-        df_train = self.datasets[0].df.copy()
-        df_test = self.datasets[1].df.copy()
-        # Drop target_column to create feature dataframes
-        X_train = df_train.drop(columns=[target_column])
-        X_test = df_test.drop(columns=[target_column])
-        # Subset only target_column to create target dataframes
-        y_train = df_train[[target_column]]
-        y_test = df_test[[target_column]]
-        X_train = self.compute_probabilities(model, X_train)
-        X_test = self.compute_probabilities(model, X_test)
+        figures_list = [
+            Figure(
+                for_object=self,
+                key=f"prob_histogram_{title.replace(' ', '_')}_{i+1}",
+                figure=fig,
+            )
+            for i, fig in enumerate(figures)
+        ]
-        df_train = pd.concat([X_train, y_train], axis=1)
-        df_test = pd.concat([X_test, y_test], axis=1)
-        fig = self.plot_prob_histogram(
-            df_train, df_test, "probabilities", target_column, title
-        )
-        return self.cache_results(
-            metric_value={
-                "prob_histogram": {
-                    "train_probs": list(X_train["probabilities"]),
-                    "test_probs": list(X_test["probabilities"]),
-                },
-            },
-            figures=[
-                Figure(
-                    for_object=self,
-                    key="prob_histogram",
-                    figure=fig,
-                )
-            ],
-        )
+        return self.cache_results(metric_value=metric_value, figures=figures_list)

validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py CHANGED Viewed

@@ -94,8 +94,8 @@ class RegressionModelForecastPlot(Metric):
             train_ds = datasets[0]
             test_ds = datasets[1]
-            y_pred = train_ds.y_pred(fitted_model.input_id)
-            y_pred_test = test_ds.y_pred(fitted_model.input_id)
+            y_pred = train_ds.y_pred(fitted_model)
+            y_pred_test = test_ds.y_pred(fitted_model)
             # Check that start_date and end_date are within the data range
             all_dates = pd.concat([pd.Series(train_ds.index), pd.Series(test_ds.index)])

validmind 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl

validmind 2.0.1py3-none-any.whl → 2.1.0py3-none-any.whl