PyPI - validmind - Versions diffs - 1.11.5__tar.gz → 1.11.6__tar.gz - Mend

validmind 1.11.5tar.gz → 1.11.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

{validmind-1.11.5 → validmind-1.11.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: validmind
-Version: 1.11.5
+Version: 1.11.6
 Summary: ValidMind Developer Framework
 Author: Andres Rodriguez
 Author-email: andres@validmind.ai
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Provides-Extra: r-support
 Requires-Dist: arch (>=5.4.0,<6.0.0)
+Requires-Dist: catboost (>=1.2,<2.0)
 Requires-Dist: click (>=8.0.4,<9.0.0)
 Requires-Dist: dython (>=0.7.1,<0.8.0)
 Requires-Dist: ipython (==7.34.0)

{validmind-1.11.5 → validmind-1.11.6}/pyproject.toml RENAMED Viewed

@@ -13,7 +13,7 @@ description = "ValidMind Developer Framework"
 #   "validmind/**/*.so",
 # ]
 name = "validmind"
-version = "1.11.5"
+version = "1.11.6"
 [tool.poetry.dependencies]
 arch = "^5.4.0"
@@ -41,6 +41,7 @@ tabulate = "^0.8.9"
 tqdm = "^4.64.0"
 xgboost = "^1.5.2"
 markdown = "^3.4.3"
+catboost = "^1.2"
 [tool.poetry.group.dev.dependencies]
 black = "^22.1.0"

{validmind-1.11.5 → validmind-1.11.6}/validmind/client.py RENAMED Viewed

@@ -91,9 +91,7 @@ def init_model(
     if not Model.is_supported_model(model):
         raise ValueError(
-            "Model type {} is not supported at the moment.".format(
-                Model.model_class(model)
-            )
+            f"Model type {Model.model_library(model)}.{Model.model_class(model)} is not supported at the moment."
         )
     return Model.init_vm_model(

{validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/model_metadata.py RENAMED Viewed

@@ -18,6 +18,13 @@ SUPPORTED_STATSMODELS_LINK_FUNCTIONS = {
 }
+def get_catboost_version():
+    if "catboost" in sys.modules:
+        return sys.modules["catboost"].__version__
+    return "n/a"
 def get_pytorch_version():
     if "torch" in sys.modules:
         return sys.modules["torch"].__version__
@@ -113,6 +120,12 @@ def get_info_from_model_instance(model):
         subtask = "binary"
         framework = "PyTorch"
         framework_version = get_pytorch_version()
+    elif model_class == "CatBoostClassifier":
+        architecture = "Gradient Boosting"
+        task = "classification"
+        subtask = "binary"
+        framework = "CatBoost"
+        framework_version = get_catboost_version()
     else:
         raise ValueError(f"Model class {model_class} is not supported by this test")
@@ -162,6 +175,8 @@ def get_params_from_model_instance(model):
         params = model.get_params()
     elif model_library == "pytorch":
         params = {}
+    elif model_library == "catboost":
+        params = model.get_all_params()
     else:
         raise ValueError(f"Model library {model_library} is not supported by this test")

{validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/sklearn/metrics.py RENAMED Viewed

@@ -444,8 +444,12 @@ class SHAPGlobalImportance(Metric):
         # the shap library generates a bunch of annoying warnings that we don't care about
         warnings.filterwarnings("ignore", category=UserWarning)
-        # RandomForestClassifier applies here too
-        if model_class == "XGBClassifier" or model_class == "RandomForestClassifier":
+        # Any tree based model can go here
+        if (
+            model_class == "XGBClassifier"
+            or model_class == "RandomForestClassifier"
+            or model_class == "CatBoostClassifier"
+        ):
             explainer = shap.TreeExplainer(trained_model)
         elif (
             model_class == "LogisticRegression"
@@ -485,6 +489,8 @@ class PopulationStabilityIndex(Metric):
             print(f"Skiping PSI for {model_library} models")
             return
-        psi_df = _get_psi(self.model.y_train_predict, self.model.y_test_predict)
+        psi_df = _get_psi(
+            self.model.y_train_predict.copy(), self.model.y_test_predict.copy()
+        )
         return self.cache_results(metric_value=psi_df)

{validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/sklearn/threshold_tests.py RENAMED Viewed

@@ -318,9 +318,13 @@ class OverfitDiagnosis(ThresholdTest):
             features_list = self.params["features_columns"]
         # Check if all elements from features_list are present in the feature columns
-        all_present = all(elem in self.model.train_ds.get_features_columns() for elem in features_list)
+        all_present = all(
+            elem in self.model.train_ds.get_features_columns() for elem in features_list
+        )
         if not all_present:
-            raise ValueError("The list of feature columns provided do not match with training dataset feature columns")
+            raise ValueError(
+                "The list of feature columns provided do not match with training dataset feature columns"
+            )
         if not isinstance(features_list, list):
             raise ValueError(
@@ -595,10 +599,14 @@ class WeakspotsDiagnosis(ThresholdTest):
             features_list = self.params["features_columns"]
         # Check if all elements from features_list are present in the feature columns
-        all_present = all(elem in self.model.train_ds.get_features_columns() for elem in features_list)
+        all_present = all(
+            elem in self.model.train_ds.get_features_columns() for elem in features_list
+        )
         if not all_present:
-            raise ValueError("The list of feature columns provided do not match with "
-                             + "training dataset feature columns")
+            raise ValueError(
+                "The list of feature columns provided do not match with "
+                + "training dataset feature columns"
+            )
         target_column = self.model.train_ds.target_column
         prediction_column = f"{target_column}_pred"
@@ -866,14 +874,20 @@ class RobustnessDiagnosis(ThresholdTest):
             features_list = self.model.train_ds.get_numeric_features_columns()
         # Check if all elements from features_list are present in the numerical feature columns
-        all_present = all(elem in self.model.train_ds.get_numeric_features_columns()
-                          for elem in features_list)
+        all_present = all(
+            elem in self.model.train_ds.get_numeric_features_columns()
+            for elem in features_list
+        )
         if not all_present:
-            raise ValueError("The list of feature columns provided do not match with training "
-                             + "dataset numerical feature columns")
+            raise ValueError(
+                "The list of feature columns provided do not match with training "
+                + "dataset numerical feature columns"
+            )
         # Remove target column if it exist in the list
-        features_list = [col for col in features_list if col != self.model.train_ds.target_column]
+        features_list = [
+            col for col in features_list if col != self.model.train_ds.target_column
+        ]
         train_df = self.model.train_ds.x.copy()
         train_y_true = self.model.train_ds.y
@@ -884,7 +898,9 @@ class RobustnessDiagnosis(ThresholdTest):
         test_results = []
         test_figures = []
-        results_headers = ["Perturbation Size", "Dataset Type", "Records"] + list(self.default_metrics.keys())
+        results_headers = ["Perturbation Size", "Dataset Type", "Records"] + list(
+            self.default_metrics.keys()
+        )
         results = {k: [] for k in results_headers}
         # Iterate scaling factor for the standard deviation list
@@ -920,14 +936,20 @@ class RobustnessDiagnosis(ThresholdTest):
             )
         )
-        train_acc = df.loc[(df['Dataset Type'] == "Training") , 'accuracy'].values[0]
-        test_acc = df.loc[(df['Dataset Type'] == "Test") , 'accuracy'].values[0]
-        df["Passed"] = np.where((df['Dataset Type'] == "Training") & (df['accuracy'] >= (train_acc - accuracy_threshold)),
-                                True,
-                                np.where((df['Dataset Type'] == "Test") & (df['accuracy'] >= (test_acc - accuracy_threshold)),
-                                         True,
-                                         False))
+        train_acc = df.loc[(df["Dataset Type"] == "Training"), "accuracy"].values[0]
+        test_acc = df.loc[(df["Dataset Type"] == "Test"), "accuracy"].values[0]
+        df["Passed"] = np.where(
+            (df["Dataset Type"] == "Training")
+            & (df["accuracy"] >= (train_acc - accuracy_threshold)),
+            True,
+            np.where(
+                (df["Dataset Type"] == "Test")
+                & (df["accuracy"] >= (test_acc - accuracy_threshold)),
+                True,
+                False,
+            ),
+        )
         test_results.append(
             TestResult(
                 test_name="accuracy",
@@ -936,7 +958,9 @@ class RobustnessDiagnosis(ThresholdTest):
                 values=df.to_dict(),
             )
         )
-        return self.cache_results(test_results, passed=df['Passed'].all(), figures=test_figures)
+        return self.cache_results(
+            test_results, passed=df["Passed"].all(), figures=test_figures
+        )
     def _compute_metrics(
         self,

{validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/model.py RENAMED Viewed

@@ -9,6 +9,7 @@ from .dataset import Dataset
 # import torch.nn as nn
 SUPPORTED_MODEL_TYPES = [
+    "catboost.CatBoostClassifier",
     "pytorch.PyTorchModel",
     "sklearn.LogisticRegression",
     "sklearn.LinearRegression",