validmind 1.11.5__tar.gz → 1.11.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {validmind-1.11.5 → validmind-1.11.6}/PKG-INFO +2 -1
- {validmind-1.11.5 → validmind-1.11.6}/pyproject.toml +2 -1
- {validmind-1.11.5 → validmind-1.11.6}/validmind/client.py +1 -3
- {validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/model_metadata.py +15 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/sklearn/metrics.py +9 -3
- {validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/sklearn/threshold_tests.py +44 -20
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/model.py +1 -0
- {validmind-1.11.5 → validmind-1.11.6}/LICENSE +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/__init__.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/api_client.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/data_validation/__init__.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/data_validation/metrics.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/data_validation/threshold_tests.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/__init__.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/classification/__init__.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/classification/customer_churn.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/classification/datasets/bank_customer_churn.csv +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/classification/datasets/taiwan_credit.csv +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/classification/taiwan_credit.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/__init__.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/datasets/fred_loan_rates.csv +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/datasets/lending_club_loan_rates.csv +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/fred.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/lending_club.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/model_utils.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/__init__.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/sklearn/__init__.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/statsmodels/__init__.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/statsmodels/metrics.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/statsmodels/threshold_tests.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/utils.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/statsutils.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/test_plans/__init__.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/test_plans/binary_classifier.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/test_plans/statsmodels_timeseries.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/test_plans/tabular_datasets.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/test_plans/time_series.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/test_suites/__init__.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/test_suites/test_suites.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/utils.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/__init__.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/dataset.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/dataset_utils.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/figure.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/metric.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/metric_result.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/plot_utils.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/result_summary.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/test_context.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/test_plan.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/test_plan_result.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/test_result.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/test_suite.py +0 -0
- {validmind-1.11.5 → validmind-1.11.6}/validmind/vm_models/threshold_test.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: validmind
|
3
|
-
Version: 1.11.
|
3
|
+
Version: 1.11.6
|
4
4
|
Summary: ValidMind Developer Framework
|
5
5
|
Author: Andres Rodriguez
|
6
6
|
Author-email: andres@validmind.ai
|
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.10
|
12
12
|
Provides-Extra: r-support
|
13
13
|
Requires-Dist: arch (>=5.4.0,<6.0.0)
|
14
|
+
Requires-Dist: catboost (>=1.2,<2.0)
|
14
15
|
Requires-Dist: click (>=8.0.4,<9.0.0)
|
15
16
|
Requires-Dist: dython (>=0.7.1,<0.8.0)
|
16
17
|
Requires-Dist: ipython (==7.34.0)
|
@@ -13,7 +13,7 @@ description = "ValidMind Developer Framework"
|
|
13
13
|
# "validmind/**/*.so",
|
14
14
|
# ]
|
15
15
|
name = "validmind"
|
16
|
-
version = "1.11.
|
16
|
+
version = "1.11.6"
|
17
17
|
|
18
18
|
[tool.poetry.dependencies]
|
19
19
|
arch = "^5.4.0"
|
@@ -41,6 +41,7 @@ tabulate = "^0.8.9"
|
|
41
41
|
tqdm = "^4.64.0"
|
42
42
|
xgboost = "^1.5.2"
|
43
43
|
markdown = "^3.4.3"
|
44
|
+
catboost = "^1.2"
|
44
45
|
|
45
46
|
[tool.poetry.group.dev.dependencies]
|
46
47
|
black = "^22.1.0"
|
@@ -91,9 +91,7 @@ def init_model(
|
|
91
91
|
|
92
92
|
if not Model.is_supported_model(model):
|
93
93
|
raise ValueError(
|
94
|
-
"Model type {} is not supported at the moment."
|
95
|
-
Model.model_class(model)
|
96
|
-
)
|
94
|
+
f"Model type {Model.model_library(model)}.{Model.model_class(model)} is not supported at the moment."
|
97
95
|
)
|
98
96
|
|
99
97
|
return Model.init_vm_model(
|
@@ -18,6 +18,13 @@ SUPPORTED_STATSMODELS_LINK_FUNCTIONS = {
|
|
18
18
|
}
|
19
19
|
|
20
20
|
|
21
|
+
def get_catboost_version():
|
22
|
+
if "catboost" in sys.modules:
|
23
|
+
return sys.modules["catboost"].__version__
|
24
|
+
|
25
|
+
return "n/a"
|
26
|
+
|
27
|
+
|
21
28
|
def get_pytorch_version():
|
22
29
|
if "torch" in sys.modules:
|
23
30
|
return sys.modules["torch"].__version__
|
@@ -113,6 +120,12 @@ def get_info_from_model_instance(model):
|
|
113
120
|
subtask = "binary"
|
114
121
|
framework = "PyTorch"
|
115
122
|
framework_version = get_pytorch_version()
|
123
|
+
elif model_class == "CatBoostClassifier":
|
124
|
+
architecture = "Gradient Boosting"
|
125
|
+
task = "classification"
|
126
|
+
subtask = "binary"
|
127
|
+
framework = "CatBoost"
|
128
|
+
framework_version = get_catboost_version()
|
116
129
|
else:
|
117
130
|
raise ValueError(f"Model class {model_class} is not supported by this test")
|
118
131
|
|
@@ -162,6 +175,8 @@ def get_params_from_model_instance(model):
|
|
162
175
|
params = model.get_params()
|
163
176
|
elif model_library == "pytorch":
|
164
177
|
params = {}
|
178
|
+
elif model_library == "catboost":
|
179
|
+
params = model.get_all_params()
|
165
180
|
else:
|
166
181
|
raise ValueError(f"Model library {model_library} is not supported by this test")
|
167
182
|
|
@@ -444,8 +444,12 @@ class SHAPGlobalImportance(Metric):
|
|
444
444
|
# the shap library generates a bunch of annoying warnings that we don't care about
|
445
445
|
warnings.filterwarnings("ignore", category=UserWarning)
|
446
446
|
|
447
|
-
#
|
448
|
-
if
|
447
|
+
# Any tree based model can go here
|
448
|
+
if (
|
449
|
+
model_class == "XGBClassifier"
|
450
|
+
or model_class == "RandomForestClassifier"
|
451
|
+
or model_class == "CatBoostClassifier"
|
452
|
+
):
|
449
453
|
explainer = shap.TreeExplainer(trained_model)
|
450
454
|
elif (
|
451
455
|
model_class == "LogisticRegression"
|
@@ -485,6 +489,8 @@ class PopulationStabilityIndex(Metric):
|
|
485
489
|
print(f"Skiping PSI for {model_library} models")
|
486
490
|
return
|
487
491
|
|
488
|
-
psi_df = _get_psi(
|
492
|
+
psi_df = _get_psi(
|
493
|
+
self.model.y_train_predict.copy(), self.model.y_test_predict.copy()
|
494
|
+
)
|
489
495
|
|
490
496
|
return self.cache_results(metric_value=psi_df)
|
@@ -318,9 +318,13 @@ class OverfitDiagnosis(ThresholdTest):
|
|
318
318
|
features_list = self.params["features_columns"]
|
319
319
|
|
320
320
|
# Check if all elements from features_list are present in the feature columns
|
321
|
-
all_present = all(
|
321
|
+
all_present = all(
|
322
|
+
elem in self.model.train_ds.get_features_columns() for elem in features_list
|
323
|
+
)
|
322
324
|
if not all_present:
|
323
|
-
raise ValueError(
|
325
|
+
raise ValueError(
|
326
|
+
"The list of feature columns provided do not match with training dataset feature columns"
|
327
|
+
)
|
324
328
|
|
325
329
|
if not isinstance(features_list, list):
|
326
330
|
raise ValueError(
|
@@ -595,10 +599,14 @@ class WeakspotsDiagnosis(ThresholdTest):
|
|
595
599
|
features_list = self.params["features_columns"]
|
596
600
|
|
597
601
|
# Check if all elements from features_list are present in the feature columns
|
598
|
-
all_present = all(
|
602
|
+
all_present = all(
|
603
|
+
elem in self.model.train_ds.get_features_columns() for elem in features_list
|
604
|
+
)
|
599
605
|
if not all_present:
|
600
|
-
raise ValueError(
|
601
|
-
|
606
|
+
raise ValueError(
|
607
|
+
"The list of feature columns provided do not match with "
|
608
|
+
+ "training dataset feature columns"
|
609
|
+
)
|
602
610
|
|
603
611
|
target_column = self.model.train_ds.target_column
|
604
612
|
prediction_column = f"{target_column}_pred"
|
@@ -866,14 +874,20 @@ class RobustnessDiagnosis(ThresholdTest):
|
|
866
874
|
features_list = self.model.train_ds.get_numeric_features_columns()
|
867
875
|
|
868
876
|
# Check if all elements from features_list are present in the numerical feature columns
|
869
|
-
all_present = all(
|
870
|
-
|
877
|
+
all_present = all(
|
878
|
+
elem in self.model.train_ds.get_numeric_features_columns()
|
879
|
+
for elem in features_list
|
880
|
+
)
|
871
881
|
if not all_present:
|
872
|
-
raise ValueError(
|
873
|
-
|
882
|
+
raise ValueError(
|
883
|
+
"The list of feature columns provided do not match with training "
|
884
|
+
+ "dataset numerical feature columns"
|
885
|
+
)
|
874
886
|
|
875
887
|
# Remove target column if it exist in the list
|
876
|
-
features_list = [
|
888
|
+
features_list = [
|
889
|
+
col for col in features_list if col != self.model.train_ds.target_column
|
890
|
+
]
|
877
891
|
|
878
892
|
train_df = self.model.train_ds.x.copy()
|
879
893
|
train_y_true = self.model.train_ds.y
|
@@ -884,7 +898,9 @@ class RobustnessDiagnosis(ThresholdTest):
|
|
884
898
|
test_results = []
|
885
899
|
test_figures = []
|
886
900
|
|
887
|
-
results_headers = ["Perturbation Size", "Dataset Type", "Records"] + list(
|
901
|
+
results_headers = ["Perturbation Size", "Dataset Type", "Records"] + list(
|
902
|
+
self.default_metrics.keys()
|
903
|
+
)
|
888
904
|
results = {k: [] for k in results_headers}
|
889
905
|
|
890
906
|
# Iterate scaling factor for the standard deviation list
|
@@ -920,14 +936,20 @@ class RobustnessDiagnosis(ThresholdTest):
|
|
920
936
|
)
|
921
937
|
)
|
922
938
|
|
923
|
-
train_acc = df.loc[(df[
|
924
|
-
test_acc = df.loc[(df[
|
925
|
-
|
926
|
-
df["Passed"] = np.where(
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
939
|
+
train_acc = df.loc[(df["Dataset Type"] == "Training"), "accuracy"].values[0]
|
940
|
+
test_acc = df.loc[(df["Dataset Type"] == "Test"), "accuracy"].values[0]
|
941
|
+
|
942
|
+
df["Passed"] = np.where(
|
943
|
+
(df["Dataset Type"] == "Training")
|
944
|
+
& (df["accuracy"] >= (train_acc - accuracy_threshold)),
|
945
|
+
True,
|
946
|
+
np.where(
|
947
|
+
(df["Dataset Type"] == "Test")
|
948
|
+
& (df["accuracy"] >= (test_acc - accuracy_threshold)),
|
949
|
+
True,
|
950
|
+
False,
|
951
|
+
),
|
952
|
+
)
|
931
953
|
test_results.append(
|
932
954
|
TestResult(
|
933
955
|
test_name="accuracy",
|
@@ -936,7 +958,9 @@ class RobustnessDiagnosis(ThresholdTest):
|
|
936
958
|
values=df.to_dict(),
|
937
959
|
)
|
938
960
|
)
|
939
|
-
return self.cache_results(
|
961
|
+
return self.cache_results(
|
962
|
+
test_results, passed=df["Passed"].all(), figures=test_figures
|
963
|
+
)
|
940
964
|
|
941
965
|
def _compute_metrics(
|
942
966
|
self,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/classification/datasets/taiwan_credit.csv
RENAMED
File without changes
|
File without changes
|
File without changes
|
{validmind-1.11.5 → validmind-1.11.6}/validmind/datasets/regression/datasets/fred_loan_rates.csv
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{validmind-1.11.5 → validmind-1.11.6}/validmind/model_validation/statsmodels/threshold_tests.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|