PyPI - validmind - Versions diffs - 2.3.3__py3-none-any.whl → 2.4.0__py3-none-any.whl - Mend

validmind 2.3.3py3-none-any.whl → 2.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

validmind/tests/model_validation/sklearn/ClassifierPerformance.py CHANGED Viewed

@@ -58,15 +58,13 @@ class ClassifierPerformance(Metric):
     name = "classifier_performance"
     required_inputs = ["model", "dataset"]
-    metadata = {
-        "task_types": ["classification", "text_classification"],
-        "tags": [
-            "sklearn",
-            "binary_classification",
-            "multiclass_classification",
-            "model_performance",
-        ],
-    }
+    tasks = ["classification", "text_classification"]
+    tags = [
+        "sklearn",
+        "binary_classification",
+        "multiclass_classification",
+        "model_performance",
+    ]
     def summary(self, metric_value: dict):
         """

validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py CHANGED Viewed

@@ -57,13 +57,11 @@ class ClusterCosineSimilarity(Metric):
     name = "cluster_cosine_similarity"
     required_inputs = ["model", "dataset"]
-    metadata = {
-        "task_types": ["clustering"],
-        "tags": [
-            "sklearn",
-            "model_performance",
-        ],
-    }
+    tasks = ["clustering"]
+    tags = [
+        "sklearn",
+        "model_performance",
+    ]
     def run(self):
         y_true_train = self.inputs.dataset.y

validmind/tests/model_validation/sklearn/ClusterPerformance.py CHANGED Viewed

@@ -51,13 +51,11 @@ class ClusterPerformance(Metric):
     name = "cluster_performance_metrics"
     required_inputs = ["model", "datasets"]
-    metadata = {
-        "task_types": ["clustering"],
-        "tags": [
-            "sklearn",
-            "model_performance",
-        ],
-    }
+    tasks = ["clustering"]
+    tags = [
+        "sklearn",
+        "model_performance",
+    ]
     def cluser_performance_metrics(
         self, y_true_train, y_pred_train, y_true_test, y_pred_test, samples, metric_info

validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py CHANGED Viewed

@@ -61,13 +61,8 @@ class ClusterPerformanceMetrics(ClusterPerformance):
     name = "homogeneity_score"
     required_inputs = ["model", "datasets"]
-    metadata = {
-        "task_types": ["clustering"],
-        "tags": [
-            "sklearn",
-            "model_performance",
-        ],
-    }
+    tasks = ["clustering"]
+    tags = ["sklearn", "model_performance"]
     default_metrics = {
         "Homogeneity Score": metrics.homogeneity_score,
         "Completeness Score": metrics.completeness_score,

validmind/tests/model_validation/sklearn/CompletenessScore.py CHANGED Viewed

@@ -44,13 +44,11 @@ class CompletenessScore(ClusterPerformance):
     name = "homogeneity_score"
     required_inputs = ["model", "datasets"]
-    metadata = {
-        "task_types": ["clustering"],
-        "tags": [
-            "sklearn",
-            "model_performance",
-        ],
-    }
+    tasks = ["clustering"]
+    tags = [
+        "sklearn",
+        "model_performance",
+    ]
     def metric_info(self):
         return {"Completeness Score": metrics.completeness_score}

validmind/tests/model_validation/sklearn/ConfusionMatrix.py CHANGED Viewed

@@ -55,16 +55,14 @@ class ConfusionMatrix(Metric):
     name = "confusion_matrix"
     required_inputs = ["model", "dataset"]
-    metadata = {
-        "task_types": ["classification", "text_classification"],
-        "tags": [
-            "sklearn",
-            "binary_classification",
-            "multiclass_classification",
-            "model_performance",
-            "visualization",
-        ],
-    }
+    tasks = ["classification", "text_classification"]
+    tags = [
+        "sklearn",
+        "binary_classification",
+        "multiclass_classification",
+        "model_performance",
+        "visualization",
+    ]
     def run(self):
         y_true = self.inputs.dataset.y
@@ -113,6 +111,17 @@ class ConfusionMatrix(Metric):
             height=600,
         )
+        # Add an annotation at the bottom of the heatmap
+        fig.add_annotation(
+            x=0.5,
+            y=-0.1,
+            xref="paper",
+            yref="paper",
+            text=f"Confusion Matrix for {self.inputs.model.input_id} on {self.inputs.dataset.input_id}",
+            showarrow=False,
+            font=dict(size=14),
+        )
         return self.cache_results(
             metric_value={
                 "confusion_matrix": cm,

validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py ADDED Viewed

@@ -0,0 +1,83 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+import pandas as pd
+from sklearn.inspection import permutation_importance
+from validmind import tags, tasks
+@tags("model_explainability", "sklearn")
+@tasks("regression", "time_series_forecasting")
+def FeatureImportanceComparison(datasets, models, num_features=3):
+    """
+    Compare feature importance scores for each model and generate a summary table
+    with the top important features.
+    **Purpose**: The purpose of this function is to compare the feature importance scores for different models applied to various datasets.
+    **Test Mechanism**: The function iterates through each dataset-model pair, calculates permutation feature importance (PFI) scores, and generates a summary table with the top `num_features` important features for each model.
+    **Signs of High Risk**:
+    - If key features expected to be important are ranked low, it could indicate potential issues with model training or data quality.
+    - High variance in feature importance scores across different models may suggest instability in feature selection.
+    **Strengths**:
+    - Provides a clear comparison of the most important features for each model.
+    - Uses permutation importance, which is a model-agnostic method and can be applied to any estimator.
+    **Limitations**:
+    - Assumes that the dataset is provided as a DataFrameDataset object with `x_df` and `y_df` methods to access feature and target data.
+    - Requires that `model.model` is compatible with `sklearn.inspection.permutation_importance`.
+    - The function's output is dependent on the number of features specified by `num_features`, which defaults to 3 but can be adjusted.
+    """
+    results_list = []
+    for dataset, model in zip(datasets, models):
+        x = dataset.x_df()
+        y = dataset.y_df()
+        pfi_values = permutation_importance(
+            model.model,
+            x,
+            y,
+            random_state=0,
+            n_jobs=-2,
+        )
+        # Create a dictionary to store PFI scores
+        pfi = {
+            column: pfi_values["importances_mean"][i]
+            for i, column in enumerate(x.columns)
+        }
+        # Sort features by their importance
+        sorted_features = sorted(pfi.items(), key=lambda item: item[1], reverse=True)
+        # Extract the top `num_features` features
+        top_features = sorted_features[:num_features]
+        # Prepare the result for the current model and dataset
+        result = {
+            "Model": model.input_id,
+            "Dataset": dataset.input_id,
+        }
+        # Dynamically add feature columns to the result
+        for i in range(num_features):
+            if i < len(top_features):
+                result[
+                    f"Feature {i + 1}"
+                ] = f"[{top_features[i][0]}; {top_features[i][1]:.4f}]"
+            else:
+                result[f"Feature {i + 1}"] = None
+        # Append the result to the list
+        results_list.append(result)
+    # Convert the results list to a DataFrame
+    results_df = pd.DataFrame(results_list)
+    return results_df

validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py CHANGED Viewed

@@ -55,13 +55,11 @@ class FowlkesMallowsScore(ClusterPerformance):
     name = "fowlkes_mallows_score"
     required_inputs = ["model", "datasets"]
-    metadata = {
-        "task_types": ["clustering"],
-        "tags": [
-            "sklearn",
-            "model_performance",
-        ],
-    }
+    tasks = ["clustering"]
+    tags = [
+        "sklearn",
+        "model_performance",
+    ]
     def metric_info(self):
         return {"Fowlkes-Mallows score": metrics.fowlkes_mallows_score}

validmind/tests/model_validation/sklearn/HomogeneityScore.py CHANGED Viewed

@@ -46,13 +46,11 @@ class HomogeneityScore(ClusterPerformance):
     name = "homogeneity_score"
     required_inputs = ["model", "datasets"]
-    metadata = {
-        "task_types": ["clustering"],
-        "tags": [
-            "sklearn",
-            "model_performance",
-        ],
-    }
+    tasks = ["clustering"]
+    tags = [
+        "sklearn",
+        "model_performance",
+    ]
     def metric_info(self):
         return {"Homogeneity Score": metrics.homogeneity_score}

validmind/tests/model_validation/sklearn/HyperParametersTuning.py CHANGED Viewed

@@ -52,13 +52,8 @@ class HyperParametersTuning(Metric):
     name = "hyper_parameters_tuning"
     required_inputs = ["model", "dataset"]
-    metadata = {
-        "task_types": ["classification", "clustering"],
-        "tags": [
-            "sklearn",
-            "model_performance",
-        ],
-    }
+    tasks = ["classification", "clustering"]
+    tags = ["sklearn", "model_performance"]
     default_params = {"param_grid": None, "scoring": None}
     def run(self):

validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py CHANGED Viewed

@@ -60,13 +60,10 @@ class KMeansClustersOptimization(Metric):
     name = "clusters_optimize_elbow_method"
     required_inputs = ["model", "dataset"]
-    metadata = {
-        "task_types": ["clustering"],
-        "tags": ["sklearn", "model_performance", "kmeans"],
-    }
-    default_params = {
-        "n_clusters": None,
-    }
+    tasks = ["clustering"]
+    tags = ["sklearn", "model_performance", "kmeans"]
+    default_params = {"n_clusters": None}
     def run(self):
         n_clusters = self.params["n_clusters"]

validmind/tests/model_validation/sklearn/MinimumAccuracy.py CHANGED Viewed

@@ -59,15 +59,13 @@ class MinimumAccuracy(ThresholdTest):
     name = "accuracy_score"
     required_inputs = ["model", "dataset"]
     default_params = {"min_threshold": 0.7}
-    metadata = {
-        "task_types": ["classification", "text_classification"],
-        "tags": [
-            "sklearn",
-            "binary_classification",
-            "multiclass_classification",
-            "model_performance",
-        ],
-    }
+    tasks = ["classification", "text_classification"]
+    tags = [
+        "sklearn",
+        "binary_classification",
+        "multiclass_classification",
+        "model_performance",
+    ]
     def summary(self, results: List[ThresholdTestResult], all_passed: bool):
         """

validmind/tests/model_validation/sklearn/MinimumF1Score.py CHANGED Viewed

@@ -62,15 +62,13 @@ class MinimumF1Score(ThresholdTest):
     name = "f1_score"
     required_inputs = ["model", "dataset"]
     default_params = {"min_threshold": 0.5}
-    metadata = {
-        "task_types": ["classification", "text_classification"],
-        "tags": [
-            "sklearn",
-            "binary_classification",
-            "multiclass_classification",
-            "model_performance",
-        ],
-    }
+    tasks = ["classification", "text_classification"]
+    tags = [
+        "sklearn",
+        "binary_classification",
+        "multiclass_classification",
+        "model_performance",
+    ]
     def summary(self, results: List[ThresholdTestResult], all_passed: bool):
         """

validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py CHANGED Viewed

@@ -59,15 +59,13 @@ class MinimumROCAUCScore(ThresholdTest):
     name = "roc_auc_score"
     required_inputs = ["model", "dataset"]
     default_params = {"min_threshold": 0.5}
-    metadata = {
-        "task_types": ["classification", "text_classification"],
-        "tags": [
-            "sklearn",
-            "binary_classification",
-            "multiclass_classification",
-            "model_performance",
-        ],
-    }
+    tasks = ["classification", "text_classification"]
+    tags = [
+        "sklearn",
+        "binary_classification",
+        "multiclass_classification",
+        "model_performance",
+    ]
     def summary(self, results: List[ThresholdTestResult], all_passed: bool):
         """

validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py CHANGED Viewed

@@ -53,16 +53,14 @@ class ModelsPerformanceComparison(ClassifierPerformance):
     name = "models_performance_comparison"
     required_inputs = ["dataset", "models"]
-    metadata = {
-        "task_types": ["classification", "text_classification"],
-        "tags": [
-            "sklearn",
-            "binary_classification",
-            "multiclass_classification",
-            "model_performance",
-            "model_comparison",
-        ],
-    }
+    tasks = ["classification", "text_classification"]
+    tags = [
+        "sklearn",
+        "binary_classification",
+        "multiclass_classification",
+        "model_performance",
+        "model_comparison",
+    ]
     def summary(self, metric_value: dict):
         """

validmind/tests/model_validation/sklearn/OverfitDiagnosis.py CHANGED Viewed

@@ -67,15 +67,13 @@ class OverfitDiagnosis(ThresholdTest):
     name = "overfit_regions"
     required_inputs = ["model", "datasets"]
     default_params = {"features_columns": None, "cut_off_percentage": 4}
-    metadata = {
-        "task_types": ["classification", "text_classification"],
-        "tags": [
-            "sklearn",
-            "binary_classification",
-            "multiclass_classification",
-            "model_diagnosis",
-        ],
-    }
+    tasks = ["classification", "text_classification"]
+    tags = [
+        "sklearn",
+        "binary_classification",
+        "multiclass_classification",
+        "model_diagnosis",
+    ]
     default_metrics = {
         "accuracy": metrics.accuracy_score,

validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py CHANGED Viewed

@@ -56,16 +56,14 @@ class PermutationFeatureImportance(Metric):
         "fontsize": None,
         "figure_height": 1000,
     }
-    metadata = {
-        "task_types": ["classification", "text_classification"],
-        "tags": [
-            "sklearn",
-            "binary_classification",
-            "multiclass_classification",
-            "feature_importance",
-            "visualization",
-        ],
-    }
+    tasks = ["classification", "text_classification"]
+    tags = [
+        "sklearn",
+        "binary_classification",
+        "multiclass_classification",
+        "feature_importance",
+        "visualization",
+    ]
     def run(self):
         x = self.inputs.dataset.x_df()
@@ -121,7 +119,7 @@ class PermutationFeatureImportance(Metric):
             figures=[
                 Figure(
                     for_object=self,
-                    key="pfi",
+                    key=f"pfi_{self.inputs.dataset.input_id}_{self.inputs.model.input_id}",
                     figure=fig,
                 ),
             ],

validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py CHANGED Viewed

@@ -73,15 +73,13 @@ class PopulationStabilityIndex(Metric):
     name = "psi"
     required_inputs = ["model", "datasets"]
-    metadata = {
-        "task_types": ["classification", "text_classification"],
-        "tags": [
-            "sklearn",
-            "binary_classification",
-            "multiclass_classification",
-            "model_performance",
-        ],
-    }
+    tasks = ["classification", "text_classification"]
+    tags = [
+        "sklearn",
+        "binary_classification",
+        "multiclass_classification",
+        "model_performance",
+    ]
     default_params = {
         "num_bins": 10,
         "mode": "fixed",

validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py CHANGED Viewed

@@ -51,16 +51,14 @@ class PrecisionRecallCurve(Metric):
     name = "pr_curve"
     required_inputs = ["model", "dataset"]
-    metadata = {
-        "task_types": ["classification", "text_classification"],
-        "tags": [
-            "sklearn",
-            "binary_classification",
-            "multiclass_classification",
-            "model_performance",
-            "visualization",
-        ],
-    }
+    tasks = ["classification", "text_classification"]
+    tags = [
+        "sklearn",
+        "binary_classification",
+        "multiclass_classification",
+        "model_performance",
+        "visualization",
+    ]
     def run(self):
         if isinstance(self.inputs.model, FoundationModel):

validmind/tests/model_validation/sklearn/ROCCurve.py CHANGED Viewed

@@ -59,16 +59,14 @@ class ROCCurve(Metric):
     name = "roc_curve"
     required_inputs = ["model", "dataset"]
-    metadata = {
-        "task_types": ["classification", "text_classification"],
-        "tags": [
-            "sklearn",
-            "binary_classification",
-            "multiclass_classification",
-            "model_performance",
-            "visualization",
-        ],
-    }
+    tasks = ["classification", "text_classification"]
+    tags = [
+        "sklearn",
+        "binary_classification",
+        "multiclass_classification",
+        "model_performance",
+        "visualization",
+    ]
     def run(self):
         if isinstance(self.inputs.model, FoundationModel):
@@ -109,7 +107,7 @@ class ROCCurve(Metric):
         )
         layout = go.Layout(
-            title="ROC Curve",
+            title=f"ROC Curve for {self.inputs.model.input_id} on {self.inputs.dataset.input_id}",
             xaxis=dict(title="False Positive Rate"),
             yaxis=dict(title="True Positive Rate"),
             width=700,
@@ -117,6 +115,7 @@ class ROCCurve(Metric):
         )
         fig = go.Figure(data=[trace0, trace1], layout=layout)
         return self.cache_results(
             metric_value={
                 "auc": auc,

validmind/tests/model_validation/sklearn/RegressionErrors.py CHANGED Viewed

@@ -43,13 +43,11 @@ class RegressionErrors(Metric):
     name = "regression_errors"
     required_inputs = ["model", "datasets"]
-    metadata = {
-        "task_types": ["regression"],
-        "tags": [
-            "sklearn",
-            "model_performance",
-        ],
-    }
+    tasks = ["regression"]
+    tags = [
+        "sklearn",
+        "model_performance",
+    ]
     def summary(self, raw_results):
         """

validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py ADDED Viewed

@@ -0,0 +1,76 @@
+# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
+# See the LICENSE file in the root of this repository for details.
+# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+import numpy as np
+import pandas as pd
+from sklearn import metrics
+from validmind import tags, tasks
+from validmind.logging import get_logger
+logger = get_logger(__name__)
+@tags("model_performance", "sklearn")
+@tasks("regression", "time_series_forecasting")
+def RegressionErrorsComparison(datasets, models):
+    """
+    Compare regression error metrics for each model and generate a summary table
+    with the results.
+    **Purpose**: The purpose of this function is to compare the regression errors for different models applied to various datasets.
+    **Test Mechanism**: The function iterates through each dataset-model pair, calculates various error metrics (MAE, MSE, MAPE, MBD), and generates a summary table with these results.
+    **Signs of High Risk**:
+    - High Mean Absolute Error (MAE) or Mean Squared Error (MSE) indicates poor model performance.
+    - High Mean Absolute Percentage Error (MAPE) suggests large percentage errors, especially problematic if the true values are small.
+    - Mean Bias Deviation (MBD) significantly different from zero indicates systematic overestimation or underestimation by the model.
+    **Strengths**:
+    - Provides multiple error metrics to assess model performance from different perspectives.
+    - Includes a check to avoid division by zero when calculating MAPE.
+    **Limitations**:
+    - Assumes that the dataset is provided as a DataFrameDataset object with `y`, `y_pred`, and `feature_columns` attributes.
+    - The function relies on the `logger` from `validmind.logging` to warn about zero values in `y_true`, which should be correctly implemented and imported.
+    - Requires that `dataset.y_pred(model)` returns the predicted values for the model.
+    """
+    results_list = []
+    for dataset, model in zip(datasets, models):
+        dataset_name = dataset.input_id
+        model_name = model.input_id
+        y_true = dataset.y
+        y_pred = dataset.y_pred(model)  # Assuming dataset has X for features
+        y_true = y_true.astype(y_pred.dtype)
+        mae = metrics.mean_absolute_error(y_true, y_pred)
+        mse = metrics.mean_squared_error(y_true, y_pred)
+        if np.any(y_true == 0):
+            logger.warning(
+                "y_true contains zero values. Skipping MAPE calculation to avoid division by zero."
+            )
+            mape = None
+        else:
+            mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
+        mbd = np.mean(y_pred - y_true)
+        # Append results to the list
+        results_list.append(
+            {
+                "Model": model_name,
+                "Dataset": dataset_name,
+                "Mean Absolute Error (MAE)": mae,
+                "Mean Squared Error (MSE)": mse,
+                "Mean Absolute Percentage Error (MAPE)": mape,
+                "Mean Bias Deviation (MBD)": mbd,
+            }
+        )
+    # Convert results list to a DataFrame
+    results_df = pd.DataFrame(results_list)
+    return results_df

validmind 2.3.3__py3-none-any.whl → 2.4.0__py3-none-any.whl

validmind 2.3.3py3-none-any.whl → 2.4.0py3-none-any.whl