validmind 2.4.10__py3-none-any.whl → 2.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/api_client.py +1 -0
  3. validmind/client.py +0 -2
  4. validmind/input_registry.py +8 -0
  5. validmind/tests/__types__.py +4 -0
  6. validmind/tests/data_validation/DatasetDescription.py +1 -0
  7. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +15 -6
  8. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  9. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +10 -3
  10. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +349 -291
  11. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +1 -1
  12. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -37
  13. validmind/tests/ongoing_monitoring/FeatureDrift.py +182 -0
  14. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +76 -0
  15. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +91 -0
  16. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +57 -0
  17. validmind/tests/run.py +35 -19
  18. validmind/unit_metrics/__init__.py +1 -1
  19. validmind/unit_metrics/classification/sklearn/ROC_AUC.py +22 -1
  20. validmind/utils.py +1 -1
  21. validmind/vm_models/__init__.py +2 -0
  22. validmind/vm_models/dataset/dataset.py +55 -14
  23. validmind/vm_models/input.py +31 -0
  24. validmind/vm_models/model.py +4 -2
  25. validmind/vm_models/test_context.py +9 -2
  26. {validmind-2.4.10.dist-info → validmind-2.5.1.dist-info}/METADATA +1 -1
  27. {validmind-2.4.10.dist-info → validmind-2.5.1.dist-info}/RECORD +30 -25
  28. {validmind-2.4.10.dist-info → validmind-2.5.1.dist-info}/LICENSE +0 -0
  29. {validmind-2.4.10.dist-info → validmind-2.5.1.dist-info}/WHEEL +0 -0
  30. {validmind-2.4.10.dist-info → validmind-2.5.1.dist-info}/entry_points.txt +0 -0
validmind/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.4.10"
1
+ __version__ = "2.5.1"
validmind/api_client.py CHANGED
@@ -152,6 +152,7 @@ def _get_session() -> aiohttp.ClientSession:
152
152
  "X-API-KEY": _api_key,
153
153
  "X-API-SECRET": _api_secret,
154
154
  "X-PROJECT-CUID": _model_cuid,
155
+ "X-MONITORING": str(_monitoring),
155
156
  }
156
157
  )
157
158
 
validmind/client.py CHANGED
@@ -48,7 +48,6 @@ def init_dataset(
48
48
  index_name: str = None,
49
49
  date_time_index: bool = False,
50
50
  columns: list = None,
51
- options: dict = None,
52
51
  text_column: str = None,
53
52
  target_column: str = None,
54
53
  feature_columns: list = None,
@@ -72,7 +71,6 @@ def init_dataset(
72
71
  Args:
73
72
  dataset : dataset from various python libraries
74
73
  model (VMModel): ValidMind model object
75
- options (dict): A dictionary of options for the dataset
76
74
  targets (vm.vm.DatasetTargets): A list of target variables
77
75
  target_column (str): The name of the target column in the dataset
78
76
  feature_columns (list): A list of names of feature columns in the dataset
@@ -6,6 +6,8 @@
6
6
  Central class to register inputs
7
7
  """
8
8
 
9
+ from validmind.vm_models.input import VMInput
10
+
9
11
  from .errors import InvalidInputError
10
12
 
11
13
 
@@ -14,6 +16,12 @@ class InputRegistry:
14
16
  self.registry = {}
15
17
 
16
18
  def add(self, key, obj):
19
+ if not isinstance(obj, VMInput):
20
+ raise InvalidInputError(
21
+ f"Input object must be an instance of VMInput. "
22
+ f"Got {type(obj)} instead."
23
+ )
24
+
17
25
  self.registry[key] = obj
18
26
 
19
27
  def get(self, key):
@@ -113,6 +113,10 @@ TestID = Literal[
113
113
  "validmind.model_validation.statsmodels.GINITable",
114
114
  "validmind.model_validation.statsmodels.RegressionModelForecastPlot",
115
115
  "validmind.model_validation.statsmodels.DurbinWatsonTest",
116
+ "validmind.ongoing_monitoring.PredictionCorrelation",
117
+ "validmind.ongoing_monitoring.PredictionAcrossEachFeature",
118
+ "validmind.ongoing_monitoring.FeatureDrift",
119
+ "validmind.ongoing_monitoring.TargetPredictionDistributionPlot",
116
120
  "validmind.data_validation.MissingValuesRisk",
117
121
  "validmind.data_validation.IQROutliersTable",
118
122
  "validmind.data_validation.BivariateFeaturesBarPlots",
@@ -134,6 +134,7 @@ class DatasetDescription(Metric):
134
134
  )
135
135
  else:
136
136
  vm_dataset_variables[column] = {"id": column, "type": str(type)}
137
+
137
138
  return list(vm_dataset_variables.values())
138
139
 
139
140
  def describe_dataset_field(self, df, field):
@@ -4,7 +4,7 @@
4
4
 
5
5
  from dataclasses import dataclass
6
6
 
7
- from numpy import unique
7
+ import numpy as np
8
8
  from sklearn.metrics import classification_report, roc_auc_score
9
9
  from sklearn.preprocessing import LabelBinarizer
10
10
 
@@ -71,7 +71,7 @@ class ClassifierPerformance(Metric):
71
71
  When building a multi-class summary we need to calculate weighted average,
72
72
  macro average and per class metrics.
73
73
  """
74
- classes = {str(i) for i in unique(self.inputs.dataset.y)}
74
+ classes = {str(i) for i in np.unique(self.inputs.dataset.y)}
75
75
  pr_f1_table = [
76
76
  {
77
77
  "Class": class_name,
@@ -126,9 +126,18 @@ class ClassifierPerformance(Metric):
126
126
  output_dict=True,
127
127
  zero_division=0,
128
128
  )
129
- report["roc_auc"] = multiclass_roc_auc_score(
130
- self.inputs.dataset.y,
131
- self.inputs.dataset.y_pred(self.inputs.model),
132
- )
129
+
130
+ y_true = self.inputs.dataset.y
131
+
132
+ if len(np.unique(y_true)) > 2:
133
+ y_pred = self.inputs.dataset.y_pred(self.inputs.model)
134
+ y_true = y_true.astype(y_pred.dtype)
135
+ roc_auc = self.multiclass_roc_auc_score(y_true, y_pred)
136
+ else:
137
+ y_prob = self.inputs.dataset.y_prob(self.inputs.model)
138
+ y_true = y_true.astype(y_prob.dtype).flatten()
139
+ roc_auc = roc_auc_score(y_true, y_prob)
140
+
141
+ report["roc_auc"] = roc_auc
133
142
 
134
143
  return self.cache_results(report)
@@ -57,7 +57,7 @@ class ClusterPerformance(Metric):
57
57
  "model_performance",
58
58
  ]
59
59
 
60
- def cluser_performance_metrics(
60
+ def cluster_performance_metrics(
61
61
  self, y_true_train, y_pred_train, y_true_test, y_pred_test, samples, metric_info
62
62
  ):
63
63
  y_true_train = y_true_train.astype(y_pred_train.dtype).flatten()
@@ -107,7 +107,7 @@ class ClusterPerformance(Metric):
107
107
  y_true_test = y_true_test.astype(class_pred_test.dtype)
108
108
 
109
109
  samples = ["train", "test"]
110
- results = self.cluser_performance_metrics(
110
+ results = self.cluster_performance_metrics(
111
111
  y_true_train,
112
112
  class_pred_train,
113
113
  y_true_test,
@@ -5,6 +5,7 @@
5
5
  from dataclasses import dataclass
6
6
  from typing import List
7
7
 
8
+ import numpy as np
8
9
  import pandas as pd
9
10
  from sklearn import metrics, preprocessing
10
11
 
@@ -99,9 +100,15 @@ class MinimumROCAUCScore(ThresholdTest):
99
100
 
100
101
  def run(self):
101
102
  y_true = self.inputs.dataset.y
102
- class_pred = self.inputs.dataset.y_pred(self.inputs.model)
103
- y_true = y_true.astype(class_pred.dtype)
104
- roc_auc = self.multiclass_roc_auc_score(y_true, class_pred)
103
+
104
+ if len(np.unique(y_true)) > 2:
105
+ class_pred = self.inputs.dataset.y_pred(self.inputs.model)
106
+ y_true = y_true.astype(class_pred.dtype)
107
+ roc_auc = self.multiclass_roc_auc_score(y_true, class_pred)
108
+ else:
109
+ y_prob = self.inputs.dataset.y_prob(self.inputs.model)
110
+ y_true = y_true.astype(y_prob.dtype).flatten()
111
+ roc_auc = metrics.roc_auc_score(y_true, y_prob)
105
112
 
106
113
  passed = roc_auc > self.params["min_threshold"]
107
114
  results = [