validmind 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. validmind/__init__.py +6 -3
  2. validmind/__version__.py +1 -1
  3. validmind/ai.py +193 -0
  4. validmind/api_client.py +45 -31
  5. validmind/client.py +33 -6
  6. validmind/datasets/classification/customer_churn.py +2 -2
  7. validmind/datasets/credit_risk/__init__.py +11 -0
  8. validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
  9. validmind/datasets/credit_risk/lending_club.py +394 -0
  10. validmind/datasets/nlp/__init__.py +5 -0
  11. validmind/datasets/nlp/cnn_dailymail.py +98 -0
  12. validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +255 -0
  13. validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +1277 -0
  14. validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +4847 -0
  15. validmind/errors.py +11 -1
  16. validmind/logging.py +9 -2
  17. validmind/models/huggingface.py +2 -2
  18. validmind/models/pytorch.py +3 -3
  19. validmind/models/sklearn.py +4 -4
  20. validmind/template.py +2 -2
  21. validmind/test_suites/__init__.py +4 -2
  22. validmind/tests/__init__.py +130 -45
  23. validmind/tests/data_validation/DatasetDescription.py +0 -1
  24. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  25. validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
  26. validmind/tests/data_validation/ScatterPlot.py +8 -2
  27. validmind/tests/data_validation/nlp/StopWords.py +1 -6
  28. validmind/tests/data_validation/nlp/TextDescription.py +20 -9
  29. validmind/tests/decorator.py +313 -0
  30. validmind/tests/model_validation/BertScore.py +1 -1
  31. validmind/tests/model_validation/BertScoreAggregate.py +1 -1
  32. validmind/tests/model_validation/BleuScore.py +1 -1
  33. validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
  34. validmind/tests/model_validation/ContextualRecall.py +1 -1
  35. validmind/tests/model_validation/FeaturesAUC.py +110 -0
  36. validmind/tests/model_validation/MeteorScore.py +92 -0
  37. validmind/tests/model_validation/RegardHistogram.py +6 -7
  38. validmind/tests/model_validation/RegardScore.py +4 -6
  39. validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
  40. validmind/tests/model_validation/RougeMetrics.py +7 -5
  41. validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
  42. validmind/tests/model_validation/SelfCheckNLIScore.py +112 -0
  43. validmind/tests/model_validation/TokenDisparity.py +1 -1
  44. validmind/tests/model_validation/ToxicityHistogram.py +1 -1
  45. validmind/tests/model_validation/ToxicityScore.py +1 -1
  46. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  47. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
  48. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +17 -22
  49. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
  50. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +16 -17
  51. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
  52. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  53. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
  54. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
  55. validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
  56. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
  57. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
  58. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
  59. validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
  60. validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
  61. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
  62. validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
  63. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +55 -5
  64. validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
  65. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +11 -5
  66. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
  67. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
  68. validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
  69. validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
  70. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
  71. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
  72. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
  73. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
  74. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
  75. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +2 -2
  76. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
  77. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
  78. validmind/tests/prompt_validation/ai_powered_test.py +2 -0
  79. validmind/tests/test_providers.py +14 -124
  80. validmind/unit_metrics/__init__.py +75 -70
  81. validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
  82. validmind/unit_metrics/classification/sklearn/F1.py +13 -0
  83. validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
  84. validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
  85. validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
  86. validmind/unit_metrics/composite.py +228 -0
  87. validmind/unit_metrics/regression/GiniCoefficient.py +33 -0
  88. validmind/unit_metrics/regression/HuberLoss.py +23 -0
  89. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +30 -0
  90. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +16 -0
  91. validmind/unit_metrics/regression/MeanBiasDeviation.py +13 -0
  92. validmind/unit_metrics/regression/QuantileLoss.py +15 -0
  93. validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +21 -0
  94. validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +13 -0
  95. validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +13 -0
  96. validmind/unit_metrics/regression/sklearn/RSquaredScore.py +13 -0
  97. validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +20 -0
  98. validmind/utils.py +20 -31
  99. validmind/vm_models/__init__.py +0 -2
  100. validmind/vm_models/dataset.py +623 -29
  101. validmind/vm_models/figure.py +52 -17
  102. validmind/vm_models/test/metric.py +33 -31
  103. validmind/vm_models/test/output_template.py +0 -27
  104. validmind/vm_models/test/result_wrapper.py +68 -36
  105. validmind/vm_models/test/test.py +4 -2
  106. validmind/vm_models/test/threshold_test.py +24 -14
  107. validmind/vm_models/test_context.py +7 -0
  108. validmind/vm_models/test_suite/runner.py +1 -1
  109. validmind/vm_models/test_suite/summary.py +3 -3
  110. validmind/vm_models/test_suite/test.py +1 -1
  111. validmind/vm_models/test_suite/test_suite.py +2 -1
  112. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/METADATA +18 -18
  113. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/RECORD +116 -94
  114. validmind-2.1.0.dist-info/entry_points.txt +3 -0
  115. validmind/tests/__types__.py +0 -62
  116. validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
  117. validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
  118. validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
  119. validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
  120. validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -20
  121. validmind/unit_metrics/sklearn/classification/F1.py +0 -22
  122. validmind/unit_metrics/sklearn/classification/Precision.py +0 -22
  123. validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -20
  124. validmind/unit_metrics/sklearn/classification/Recall.py +0 -20
  125. validmind/vm_models/test/unit_metric.py +0 -88
  126. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/LICENSE +0 -0
  127. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/WHEEL +0 -0
@@ -6,7 +6,7 @@ import re
6
6
  from dataclasses import dataclass
7
7
 
8
8
  import numpy as np
9
- from sklearn import metrics
9
+ from sklearn.metrics import mean_absolute_error, mean_squared_error
10
10
 
11
11
  from validmind.errors import SkipTestError
12
12
  from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
@@ -67,12 +67,12 @@ class RegressionModelsPerformanceComparison(Metric):
67
67
  }
68
68
 
69
69
  def regression_errors(self, y_true_test, y_pred_test):
70
- mae_test = metrics.mean_absolute_error(y_true_test, y_pred_test)
70
+ mae_test = mean_absolute_error(y_true_test, y_pred_test)
71
71
 
72
72
  results = {}
73
73
  results["Mean Absolute Error (MAE)"] = mae_test
74
74
 
75
- mse_test = metrics.mean_squared_error(y_true_test, y_pred_test)
75
+ mse_test = mean_squared_error(y_true_test, y_pred_test)
76
76
  results["Mean Squared Error (MSE)"] = mse_test
77
77
  results["Root Mean Squared Error (RMSE)"] = np.sqrt(mse_test)
78
78
 
@@ -121,12 +121,14 @@ class RegressionModelsPerformanceComparison(Metric):
121
121
 
122
122
  if self.inputs.models is not None:
123
123
  all_models.extend(self.inputs.models)
124
+
124
125
  results = {}
125
126
 
126
127
  for idx, model in enumerate(all_models):
127
128
  result = self.regression_errors(
128
129
  y_true_test=self.inputs.dataset.y,
129
- y_pred_test=self.inputs.dataset.y_pred(model.input_id),
130
+ y_pred_test=self.inputs.dataset.y_pred(model),
130
131
  )
131
132
  results["model_" + str(idx)] = result
133
+
132
134
  return self.cache_results(results)
@@ -70,11 +70,11 @@ class RegressionR2Square(Metric):
70
70
 
71
71
  def run(self):
72
72
  y_train_true = self.inputs.datasets[0].y
73
- y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model.input_id)
73
+ y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model)
74
74
  y_train_true = y_train_true.astype(y_train_pred.dtype)
75
75
 
76
76
  y_test_true = self.inputs.datasets[1].y
77
- y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model.input_id)
77
+ y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model)
78
78
  y_test_true = y_test_true.astype(y_test_pred.dtype)
79
79
 
80
80
  r2s_train = metrics.r2_score(y_train_true, y_train_pred)
@@ -6,8 +6,10 @@ import warnings
6
6
  from dataclasses import dataclass
7
7
 
8
8
  import matplotlib.pyplot as plt
9
+ import numpy as np
9
10
  import shap
10
11
 
12
+ from validmind.errors import UnsupportedModelForSHAPError
11
13
  from validmind.logging import get_logger
12
14
  from validmind.vm_models import Figure, Metric
13
15
 
@@ -72,6 +74,9 @@ class SHAPGlobalImportance(Metric):
72
74
  "visualization",
73
75
  ],
74
76
  }
77
+ default_params = {
78
+ "kernel_explainer_samples": 10,
79
+ }
75
80
 
76
81
  def _generate_shap_plot(self, type_, shap_values, x_test):
77
82
  """
@@ -89,9 +94,30 @@ class SHAPGlobalImportance(Metric):
89
94
 
90
95
  summary_plot_extra_args = {}
91
96
  if type_ == "mean":
97
+ # Calculate the mean absolute SHAP value for each feature
98
+ mean_abs_shap = np.abs(shap_values).mean(axis=0)
99
+
100
+ # Find the maximum mean absolute SHAP value
101
+ max_shap_value = np.max(mean_abs_shap)
102
+
103
+ # Normalize all SHAP values based on the top feature
104
+ shap_values = (
105
+ shap_values / max_shap_value * 100
106
+ ) # scaling factor to make the top feature 100%
92
107
  summary_plot_extra_args = {"plot_type": "bar"}
108
+ shap.summary_plot(
109
+ shap_values, x_test, show=False, **summary_plot_extra_args
110
+ )
111
+
112
+ # Customize the plot using matplotlib
113
+ plt.xlabel("Normalized SHAP Value (Percentage)", fontsize=13)
114
+ plt.ylabel("Features", fontsize=13)
115
+ plt.title("Normalized Feature Importance", fontsize=13)
116
+ else:
117
+ shap.summary_plot(
118
+ shap_values, x_test, show=False, **summary_plot_extra_args
119
+ )
93
120
 
94
- shap.summary_plot(shap_values, x_test, show=False, **summary_plot_extra_args)
95
121
  figure = plt.gcf()
96
122
  # avoid displaying on notebooks and clears the canvas for the next plot
97
123
  plt.close()
@@ -127,22 +153,46 @@ class SHAPGlobalImportance(Metric):
127
153
  model_class == "XGBClassifier"
128
154
  or model_class == "RandomForestClassifier"
129
155
  or model_class == "CatBoostClassifier"
156
+ or model_class == "DecisionTreeClassifier"
157
+ or model_class == "RandomForestRegressor"
158
+ or model_class == "GradientBoostingRegressor"
130
159
  ):
131
160
  explainer = shap.TreeExplainer(trained_model)
132
161
  elif (
133
162
  model_class == "LogisticRegression"
134
163
  or model_class == "XGBRegressor"
135
164
  or model_class == "LinearRegression"
165
+ or model_class == "LinearSVC"
136
166
  ):
137
167
  explainer = shap.LinearExplainer(trained_model, self.inputs.dataset.x)
168
+ elif model_class == "SVC":
169
+ # KernelExplainer is slow so we use shap.sample to speed it up
170
+ explainer = shap.KernelExplainer(
171
+ trained_model.predict,
172
+ shap.sample(
173
+ self.inputs.dataset.x,
174
+ self.params["kernel_explainer_samples"],
175
+ ),
176
+ )
177
+ else:
178
+ raise UnsupportedModelForSHAPError(
179
+ f"Model {model_class} not supported for SHAP importance."
180
+ )
181
+
182
+ # KernelExplainer is slow so we use shap.sample to speed it up
183
+ if isinstance(explainer, shap.KernelExplainer):
184
+ shap_sample = shap.sample(
185
+ self.inputs.dataset.x_df(),
186
+ self.params["kernel_explainer_samples"],
187
+ )
138
188
  else:
139
- raise ValueError(f"Model {model_class} not supported for SHAP importance.")
189
+ shap_sample = self.inputs.dataset.x_df()
140
190
 
141
- shap_values = explainer.shap_values(self.inputs.dataset.x)
191
+ shap_values = explainer.shap_values(shap_sample)
142
192
 
143
193
  figures = [
144
- self._generate_shap_plot("mean", shap_values, self.inputs.dataset.x),
145
- self._generate_shap_plot("summary", shap_values, self.inputs.dataset.x),
194
+ self._generate_shap_plot("mean", shap_values, shap_sample),
195
+ self._generate_shap_plot("summary", shap_values, shap_sample),
146
196
  ]
147
197
 
148
198
  # restore warnings
@@ -69,7 +69,7 @@ class SilhouettePlot(Metric):
69
69
  }
70
70
 
71
71
  def run(self):
72
- y_pred_train = self.inputs.dataset.y_pred(self.inputs.model.input_id)
72
+ y_pred_train = self.inputs.dataset.y_pred(self.inputs.model)
73
73
  # Calculate the silhouette score
74
74
  silhouette_avg = silhouette_score(
75
75
  self.inputs.dataset.x,
@@ -121,20 +121,20 @@ class TrainingTestDegradation(ThresholdTest):
121
121
 
122
122
  def run(self):
123
123
  y_train_true = self.inputs.datasets[0].y
124
- y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model.input_id)
124
+ y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model)
125
125
  y_train_true = y_train_true.astype(y_train_pred.dtype)
126
126
 
127
127
  y_test_true = self.inputs.datasets[1].y
128
- y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model.input_id)
128
+ y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model)
129
129
  y_test_true = y_test_true.astype(y_test_pred.dtype)
130
130
 
131
131
  report_train = metrics.classification_report(
132
- y_train_true, y_train_pred, output_dict=True
132
+ y_train_true, y_train_pred, output_dict=True, zero_division=0
133
133
  )
134
134
  report_train["roc_auc"] = multiclass_roc_auc_score(y_train_true, y_train_pred)
135
135
 
136
136
  report_test = metrics.classification_report(
137
- y_test_true, y_test_pred, output_dict=True
137
+ y_test_true, y_test_pred, output_dict=True, zero_division=0
138
138
  )
139
139
  report_test["roc_auc"] = multiclass_roc_auc_score(y_test_true, y_test_pred)
140
140
 
@@ -145,7 +145,13 @@ class TrainingTestDegradation(ThresholdTest):
145
145
  for metric_name in ["precision", "recall", "f1-score"]:
146
146
  train_score = report_train[class_name][metric_name]
147
147
  test_score = report_test[class_name][metric_name]
148
- degradation = (train_score - test_score) / train_score
148
+
149
+ # If training score is 0, degradation is assumed to be 100%
150
+ if train_score == 0:
151
+ degradation = 1.0
152
+ else:
153
+ degradation = (train_score - test_score) / train_score
154
+
149
155
  passed = degradation < self.params["max_threshold"]
150
156
  test_results.append(
151
157
  ThresholdTestResult(
@@ -137,11 +137,11 @@ class WeakspotsDiagnosis(ThresholdTest):
137
137
  prediction_column = f"{target_column}_pred"
138
138
 
139
139
  train_df = self.inputs.datasets[0].df.copy()
140
- train_class_pred = self.inputs.datasets[0].y_pred(self.inputs.model.input_id)
140
+ train_class_pred = self.inputs.datasets[0].y_pred(self.inputs.model)
141
141
  train_df[prediction_column] = train_class_pred
142
142
 
143
143
  test_df = self.inputs.datasets[1].df.copy()
144
- test_class_pred = self.inputs.datasets[1].y_pred(self.inputs.model.input_id)
144
+ test_class_pred = self.inputs.datasets[1].y_pred(self.inputs.model)
145
145
  test_df[prediction_column] = test_class_pred
146
146
 
147
147
  test_results = []
@@ -0,0 +1,140 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from dataclasses import dataclass
6
+
7
+ import numpy as np
8
+ import plotly.graph_objects as go
9
+ from matplotlib import cm
10
+
11
+ from validmind.vm_models import Figure, Metric
12
+
13
+
14
+ @dataclass
15
+ class CumulativePredictionProbabilities(Metric):
16
+ """
17
+ Visualizes cumulative probabilities of positive and negative classes for both training and testing in logistic
18
+ regression models.
19
+
20
+ **Purpose**: This metric is utilized to evaluate the distribution of predicted probabilities for positive and
21
+ negative classes in a logistic regression model. It's not solely intended to measure the model's performance but
22
+ also provides a visual assessment of the model's behavior by plotting the cumulative probabilities for positive and
23
+ negative classes across both the training and test datasets.
24
+
25
+ **Test Mechanism**: The logistic regression model is evaluated by first computing the predicted probabilities for
26
+ each instance in both the training and test datasets, which are then added as a new column in these sets. The
27
+ cumulative probabilities for positive and negative classes are subsequently calculated and sorted in ascending
28
+ order. Cumulative distributions of these probabilities are created for both positive and negative classes across
29
+ both training and test datasets. These cumulative probabilities are represented visually in a plot, containing two
30
+ subplots - one for the training data and the other for the test data, with lines representing cumulative
31
+ distributions of positive and negative classes.
32
+
33
+ **Signs of High Risk**:
34
+ - Imbalanced distribution of probabilities for either positive or negative classes.
35
+ - Notable discrepancies or significant differences between the cumulative probability distributions for the
36
+ training data versus the test data.
37
+ - Marked discrepancies or large differences between the cumulative probability distributions for positive and
38
+ negative classes.
39
+
40
+ **Strengths**:
41
+ - It offers not only numerical probabilities but also provides a visual illustration of data, which enhances the
42
+ ease of understanding and interpreting the model's behavior.
43
+ - Allows for the comparison of model's behavior across training and testing datasets, providing insights about how
44
+ well the model is generalized.
45
+ - It differentiates between positive and negative classes and their respective distribution patterns, which can aid
46
+ in problem diagnosis.
47
+
48
+ **Limitations**:
49
+ - Exclusive to classification tasks and specifically to logistic regression models.
50
+ - Graphical results necessitate human interpretation and may not be directly applicable for automated risk
51
+ detection.
52
+ - The method does not give a solitary quantifiable measure of model risk, rather it offers a visual representation
53
+ and broad distributional information.
54
+ - If the training and test datasets are not representative of the overall data distribution, the metric could
55
+ provide misleading results.
56
+ """
57
+
58
+ name = "cumulative_prediction_probabilities"
59
+ required_inputs = ["model", "datasets"]
60
+ metadata = {
61
+ "task_types": ["classification"],
62
+ "tags": ["logistic_regression", "visualization"],
63
+ }
64
+ default_params = {"title": "Cumulative Probabilities"}
65
+
66
+ @staticmethod
67
+ def plot_cumulative_prob(dataframes, dataset_titles, target_col, title):
68
+ figures = []
69
+
70
+ # Generate a colormap and convert to Plotly-accepted color format
71
+ # Adjust 'viridis' to any other matplotlib colormap if desired
72
+ colormap = cm.get_cmap("viridis")
73
+
74
+ for _, (df, dataset_title) in enumerate(zip(dataframes, dataset_titles)):
75
+ fig = go.Figure()
76
+
77
+ # Get unique classes and assign colors
78
+ classes = sorted(df[target_col].unique())
79
+ colors = [
80
+ colormap(i / len(classes))[:3] for i in range(len(classes))
81
+ ] # RGB
82
+ color_dict = {
83
+ cls: f"rgb({int(rgb[0]*255)}, {int(rgb[1]*255)}, {int(rgb[2]*255)})"
84
+ for cls, rgb in zip(classes, colors)
85
+ }
86
+ for class_value in sorted(df[target_col].unique()):
87
+ # Calculate cumulative distribution for the current class
88
+ sorted_probs = np.sort(
89
+ df[df[target_col] == class_value]["probabilities"]
90
+ )
91
+ cumulative_probs = np.cumsum(sorted_probs) / np.sum(sorted_probs)
92
+
93
+ fig.add_trace(
94
+ go.Scatter(
95
+ x=sorted_probs,
96
+ y=cumulative_probs,
97
+ mode="lines",
98
+ name=f"{dataset_title} {target_col} = {class_value}",
99
+ line=dict(
100
+ color=color_dict[class_value],
101
+ ),
102
+ )
103
+ )
104
+ fig.update_layout(
105
+ title_text=f"{title} - {dataset_title}",
106
+ xaxis_title="Probability",
107
+ yaxis_title="Cumulative Distribution",
108
+ legend_title=target_col,
109
+ )
110
+ figures.append(fig)
111
+ return figures
112
+
113
+ def run(self):
114
+ dataset_titles = [dataset.input_id for dataset in self.inputs.datasets]
115
+ target_column = self.inputs.datasets[0].target_column
116
+ title = self.params.get("title", self.default_params["title"])
117
+
118
+ dataframes = []
119
+ metric_value = {"cum_prob": {}}
120
+ for dataset in self.inputs.datasets:
121
+ df = dataset.df.copy()
122
+ y_prob = dataset.y_prob(self.inputs.model)
123
+ df["probabilities"] = y_prob
124
+ dataframes.append(df)
125
+ metric_value["cum_prob"][dataset.input_id] = list(df["probabilities"])
126
+
127
+ figures = self.plot_cumulative_prob(
128
+ dataframes, dataset_titles, target_column, title
129
+ )
130
+
131
+ figures_list = [
132
+ Figure(
133
+ for_object=self,
134
+ key=f"cumulative_prob_{title.replace(' ', '_')}_{i+1}",
135
+ figure=fig,
136
+ )
137
+ for i, fig in enumerate(figures)
138
+ ]
139
+
140
+ return self.cache_results(metric_value=metric_value, figures=figures_list)
@@ -65,19 +65,8 @@ class GINITable(Metric):
65
65
  }
66
66
 
67
67
  def run(self):
68
- model = (
69
- self.inputs.model[0]
70
- if isinstance(self.inputs.model, list)
71
- else self.inputs.model
72
- )
73
-
74
- X_train = self.datasets[0].x
75
- y_train = self.datasets[0].y
76
68
 
77
- X_test = self.datasets[1].x
78
- y_test = self.datasets[1].y
79
-
80
- summary_metrics = self.compute_metrics(model, X_train, y_train, X_test, y_test)
69
+ summary_metrics = self.compute_metrics()
81
70
 
82
71
  return self.cache_results(
83
72
  {
@@ -85,52 +74,40 @@ class GINITable(Metric):
85
74
  }
86
75
  )
87
76
 
88
- def compute_metrics(self, model, X_train, y_train, X_test, y_test):
89
- """Computes AUC, GINI, and KS for train and test sets."""
77
+ def compute_metrics(self):
78
+ """Computes AUC, GINI, and KS for an arbitrary number of datasets."""
79
+ # Initialize the dictionary to store results
80
+ metrics_dict = {"Dataset": [], "AUC": [], "GINI": [], "KS": []}
90
81
 
91
- metrics_dict = {"Dataset": ["Train", "Test"], "AUC": [], "GINI": [], "KS": []}
82
+ # Iterate over each dataset in the inputs
83
+ for i, dataset in enumerate(self.inputs.datasets):
84
+ dataset_label = (
85
+ dataset.input_id
86
+ ) # Use input_id as the label for each dataset
87
+ metrics_dict["Dataset"].append(dataset_label)
92
88
 
93
- for dataset, X, y in zip(
94
- ["Train", "Test"], [X_train, X_test], [y_train, y_test]
95
- ):
96
- y_scores = model.predict(X)
89
+ # Retrieve y_true and y_pred for the current dataset
90
+ y_true = np.ravel(dataset.y) # Flatten y_true to make it one-dimensional
91
+ y_prob = dataset.y_prob(self.inputs.model)
97
92
 
98
- print("Predicted scores obtained...")
93
+ # Compute metrics
94
+ y_true = np.array(y_true, dtype=float)
95
+ y_prob = np.array(y_prob, dtype=float)
99
96
 
100
- # Compute AUC, GINI, and KS
101
- auc = self.compute_auc(y, y_scores)
102
- gini = self.compute_gini(y, y_scores)
103
- ks = self.compute_ks(y, y_scores)
97
+ fpr, tpr, _ = roc_curve(y_true, y_prob)
98
+ ks = max(tpr - fpr)
99
+ auc = roc_auc_score(y_true, y_prob)
100
+ gini = 2 * auc - 1
104
101
 
105
102
  # Add the metrics to the dictionary
106
103
  metrics_dict["AUC"].append(auc)
107
104
  metrics_dict["GINI"].append(gini)
108
105
  metrics_dict["KS"].append(ks)
109
106
 
110
- # Convert dictionary to DataFrame for nicer display
107
+ # Create a DataFrame to store and return the results
111
108
  metrics_df = pd.DataFrame(metrics_dict)
112
109
  return metrics_df
113
110
 
114
- def compute_auc(self, y_true, y_scores):
115
- """Computes the Area Under the Curve (AUC)."""
116
- print("Computing AUC...")
117
- auc = roc_auc_score(y_true, y_scores)
118
- return auc
119
-
120
- def compute_gini(self, y_true, y_scores):
121
- """Computes the Gini coefficient."""
122
- print("Computing GINI...")
123
- auc = self.compute_auc(y_true, y_scores)
124
- gini = 2 * auc - 1
125
- return gini
126
-
127
- def compute_ks(self, y_true, y_scores):
128
- """Computes the Kolmogorov-Smirnov (KS) statistic."""
129
- print("Computing KS...")
130
- fpr, tpr, _ = roc_curve(y_true, y_scores)
131
- ks = np.max(tpr - fpr)
132
- return ks
133
-
134
111
  def summary(self, metric_value):
135
112
  summary_metrics_table = metric_value["metrics_summary"]
136
113
  return ResultSummary(
@@ -4,16 +4,14 @@
4
4
 
5
5
  from dataclasses import dataclass
6
6
 
7
- import numpy as np
8
- import pandas as pd
9
7
  import plotly.graph_objects as go
10
- from plotly.subplots import make_subplots
8
+ from matplotlib import cm
11
9
 
12
10
  from validmind.vm_models import Figure, Metric
13
11
 
14
12
 
15
13
  @dataclass
16
- class LogisticRegPredictionHistogram(Metric):
14
+ class PredictionProbabilitiesHistogram(Metric):
17
15
  """
18
16
  Generates and visualizes histograms of the Probability of Default predictions for both positive and negative
19
17
  classes in training and testing datasets.
@@ -58,7 +56,7 @@ class LogisticRegPredictionHistogram(Metric):
58
56
  quantifiable measure or score to assess model performance.
59
57
  """
60
58
 
61
- name = "logistic_reg_prediction_histogram"
59
+ name = "prediction_probabilities_histogram"
62
60
  required_inputs = ["model", "datasets"]
63
61
  metadata = {
64
62
  "task_types": ["classification"],
@@ -68,95 +66,72 @@ class LogisticRegPredictionHistogram(Metric):
68
66
  default_params = {"title": "Histogram of Predictive Probabilities"}
69
67
 
70
68
  @staticmethod
71
- def compute_probabilities(model, X):
72
- """
73
- Predict probabilities and add PD as a new column in X
74
- """
75
- probabilities = model.predict(X)
76
- pd_series = probabilities
77
-
78
- # If X is a numpy array, convert it to DataFrame
79
- if isinstance(X, np.ndarray):
80
- X = pd.DataFrame(X)
81
-
82
- X["probabilities"] = pd_series
83
- return X
84
-
85
- @staticmethod
86
- def plot_prob_histogram(df_train, df_test, pd_col, target_col, title):
87
- train_0 = df_train[df_train[target_col] == 0][pd_col]
88
- train_1 = df_train[df_train[target_col] == 1][pd_col]
89
- test_0 = df_test[df_test[target_col] == 0][pd_col]
90
- test_1 = df_test[df_test[target_col] == 1][pd_col]
91
-
92
- fig = make_subplots(rows=1, cols=2, subplot_titles=("Train Data", "Test Data"))
93
-
94
- trace_train_0 = go.Histogram(
95
- x=train_0, opacity=0.75, name=f"Train {target_col} = 0"
96
- )
97
- trace_train_1 = go.Histogram(
98
- x=train_1, opacity=0.75, name=f"Train {target_col} = 1"
99
- )
100
- trace_test_0 = go.Histogram(
101
- x=test_0, opacity=0.75, name=f"Test {target_col} = 0"
102
- )
103
- trace_test_1 = go.Histogram(
104
- x=test_1, opacity=0.75, name=f"Test {target_col} = 1"
105
- )
106
-
107
- fig.add_trace(trace_train_0, row=1, col=1)
108
- fig.add_trace(trace_train_1, row=1, col=1)
109
- fig.add_trace(trace_test_0, row=1, col=2)
110
- fig.add_trace(trace_test_1, row=1, col=2)
111
-
112
- fig.update_layout(barmode="overlay", title_text=title)
113
-
114
- return fig
69
+ def plot_prob_histogram(dataframes, dataset_titles, target_col, title):
70
+ figures = []
71
+
72
+ # Generate a colormap and convert to Plotly-accepted color format
73
+ # Adjust 'viridis' to any other matplotlib colormap if desired
74
+ colormap = cm.get_cmap("viridis")
75
+
76
+ for i, (df, dataset_title) in enumerate(zip(dataframes, dataset_titles)):
77
+ fig = go.Figure()
78
+
79
+ # Get unique classes and assign colors
80
+ classes = sorted(df[target_col].unique())
81
+ colors = [
82
+ colormap(i / len(classes))[:3] for i in range(len(classes))
83
+ ] # RGB
84
+ color_dict = {
85
+ cls: f"rgb({int(rgb[0]*255)}, {int(rgb[1]*255)}, {int(rgb[2]*255)})"
86
+ for cls, rgb in zip(classes, colors)
87
+ }
88
+
89
+ # Ensure classes are plotted in the specified order
90
+ for class_value in sorted(df[target_col].unique()):
91
+ fig.add_trace(
92
+ go.Histogram(
93
+ x=df[df[target_col] == class_value]["probabilities"],
94
+ opacity=0.75,
95
+ name=f"{dataset_title} {target_col} = {class_value}",
96
+ marker=dict(
97
+ color=color_dict[class_value],
98
+ ),
99
+ )
100
+ )
101
+ fig.update_layout(
102
+ barmode="overlay",
103
+ title_text=f"{title} - {dataset_title}",
104
+ xaxis_title="Probability",
105
+ yaxis_title="Frequency",
106
+ )
107
+ figures.append(fig)
108
+ return figures
115
109
 
116
110
  def run(self):
117
- model = (
118
- self.inputs.model[0]
119
- if isinstance(self.inputs.model, list)
120
- else self.inputs.model
111
+ dataset_titles = [dataset.input_id for dataset in self.inputs.datasets]
112
+ target_column = self.inputs.datasets[0].target_column
113
+ title = self.params.get("title", self.default_params["title"])
114
+
115
+ dataframes = []
116
+ metric_value = {"prob_histogram": {}}
117
+ for _, dataset in enumerate(self.inputs.datasets):
118
+ df = dataset.df.copy()
119
+ y_prob = dataset.y_prob(self.inputs.model)
120
+ df["probabilities"] = y_prob
121
+ dataframes.append(df)
122
+ metric_value["prob_histogram"][dataset.input_id] = list(df["probabilities"])
123
+
124
+ figures = self.plot_prob_histogram(
125
+ dataframes, dataset_titles, target_column, title
121
126
  )
122
127
 
123
- target_column = model.train_ds.target_column
124
- title = self.params["title"]
125
-
126
- # Create a copy of training and testing dataframes
127
- df_train = self.datasets[0].df.copy()
128
- df_test = self.datasets[1].df.copy()
129
-
130
- # Drop target_column to create feature dataframes
131
- X_train = df_train.drop(columns=[target_column])
132
- X_test = df_test.drop(columns=[target_column])
133
-
134
- # Subset only target_column to create target dataframes
135
- y_train = df_train[[target_column]]
136
- y_test = df_test[[target_column]]
137
-
138
- X_train = self.compute_probabilities(model, X_train)
139
- X_test = self.compute_probabilities(model, X_test)
128
+ figures_list = [
129
+ Figure(
130
+ for_object=self,
131
+ key=f"prob_histogram_{title.replace(' ', '_')}_{i+1}",
132
+ figure=fig,
133
+ )
134
+ for i, fig in enumerate(figures)
135
+ ]
140
136
 
141
- df_train = pd.concat([X_train, y_train], axis=1)
142
- df_test = pd.concat([X_test, y_test], axis=1)
143
-
144
- fig = self.plot_prob_histogram(
145
- df_train, df_test, "probabilities", target_column, title
146
- )
147
-
148
- return self.cache_results(
149
- metric_value={
150
- "prob_histogram": {
151
- "train_probs": list(X_train["probabilities"]),
152
- "test_probs": list(X_test["probabilities"]),
153
- },
154
- },
155
- figures=[
156
- Figure(
157
- for_object=self,
158
- key="prob_histogram",
159
- figure=fig,
160
- )
161
- ],
162
- )
137
+ return self.cache_results(metric_value=metric_value, figures=figures_list)
@@ -94,8 +94,8 @@ class RegressionModelForecastPlot(Metric):
94
94
  train_ds = datasets[0]
95
95
  test_ds = datasets[1]
96
96
 
97
- y_pred = train_ds.y_pred(fitted_model.input_id)
98
- y_pred_test = test_ds.y_pred(fitted_model.input_id)
97
+ y_pred = train_ds.y_pred(fitted_model)
98
+ y_pred_test = test_ds.y_pred(fitted_model)
99
99
 
100
100
  # Check that start_date and end_date are within the data range
101
101
  all_dates = pd.concat([pd.Series(train_ds.index), pd.Series(test_ds.index)])