validmind 2.0.7__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +3 -3
- validmind/__version__.py +1 -1
- validmind/ai.py +7 -11
- validmind/api_client.py +29 -27
- validmind/client.py +10 -3
- validmind/datasets/credit_risk/__init__.py +11 -0
- validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
- validmind/datasets/credit_risk/lending_club.py +394 -0
- validmind/logging.py +9 -2
- validmind/template.py +2 -2
- validmind/test_suites/__init__.py +4 -2
- validmind/tests/__init__.py +97 -50
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +8 -2
- validmind/tests/decorator.py +138 -14
- validmind/tests/model_validation/BertScore.py +1 -1
- validmind/tests/model_validation/BertScoreAggregate.py +1 -1
- validmind/tests/model_validation/BleuScore.py +1 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
- validmind/tests/model_validation/ContextualRecall.py +1 -1
- validmind/tests/model_validation/FeaturesAUC.py +110 -0
- validmind/tests/model_validation/MeteorScore.py +1 -1
- validmind/tests/model_validation/RegardHistogram.py +1 -1
- validmind/tests/model_validation/RegardScore.py +1 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
- validmind/tests/model_validation/RougeMetrics.py +1 -1
- validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
- validmind/tests/model_validation/SelfCheckNLIScore.py +1 -1
- validmind/tests/model_validation/TokenDisparity.py +1 -1
- validmind/tests/model_validation/ToxicityHistogram.py +1 -1
- validmind/tests/model_validation/ToxicityScore.py +1 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +1 -1
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +15 -18
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
- validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
- validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +27 -3
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
- validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
- validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
- validmind/tests/test_providers.py +14 -124
- validmind/unit_metrics/__init__.py +76 -69
- validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
- validmind/unit_metrics/classification/sklearn/F1.py +13 -0
- validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
- validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
- validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
- validmind/unit_metrics/composite.py +24 -71
- validmind/unit_metrics/regression/GiniCoefficient.py +20 -26
- validmind/unit_metrics/regression/HuberLoss.py +12 -16
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +18 -24
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +7 -13
- validmind/unit_metrics/regression/MeanBiasDeviation.py +5 -14
- validmind/unit_metrics/regression/QuantileLoss.py +6 -16
- validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +12 -18
- validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +6 -15
- validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +5 -14
- validmind/unit_metrics/regression/sklearn/RSquaredScore.py +6 -15
- validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +11 -14
- validmind/utils.py +18 -45
- validmind/vm_models/__init__.py +0 -2
- validmind/vm_models/dataset.py +255 -16
- validmind/vm_models/test/metric.py +1 -2
- validmind/vm_models/test/result_wrapper.py +12 -13
- validmind/vm_models/test/test.py +2 -1
- validmind/vm_models/test/threshold_test.py +1 -2
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test_suite.py +2 -1
- {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/METADATA +10 -6
- {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/RECORD +97 -96
- validmind/tests/__types__.py +0 -62
- validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
- validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
- validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
- validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
- validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -22
- validmind/unit_metrics/sklearn/classification/F1.py +0 -24
- validmind/unit_metrics/sklearn/classification/Precision.py +0 -24
- validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -22
- validmind/unit_metrics/sklearn/classification/Recall.py +0 -22
- validmind/vm_models/test/unit_metric.py +0 -88
- {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/LICENSE +0 -0
- {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/WHEEL +0 -0
- {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/entry_points.txt +0 -0
@@ -70,11 +70,11 @@ class RegressionR2Square(Metric):
|
|
70
70
|
|
71
71
|
def run(self):
|
72
72
|
y_train_true = self.inputs.datasets[0].y
|
73
|
-
y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model
|
73
|
+
y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model)
|
74
74
|
y_train_true = y_train_true.astype(y_train_pred.dtype)
|
75
75
|
|
76
76
|
y_test_true = self.inputs.datasets[1].y
|
77
|
-
y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model
|
77
|
+
y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model)
|
78
78
|
y_test_true = y_test_true.astype(y_test_pred.dtype)
|
79
79
|
|
80
80
|
r2s_train = metrics.r2_score(y_train_true, y_train_pred)
|
@@ -6,6 +6,7 @@ import warnings
|
|
6
6
|
from dataclasses import dataclass
|
7
7
|
|
8
8
|
import matplotlib.pyplot as plt
|
9
|
+
import numpy as np
|
9
10
|
import shap
|
10
11
|
|
11
12
|
from validmind.errors import UnsupportedModelForSHAPError
|
@@ -93,9 +94,30 @@ class SHAPGlobalImportance(Metric):
|
|
93
94
|
|
94
95
|
summary_plot_extra_args = {}
|
95
96
|
if type_ == "mean":
|
97
|
+
# Calculate the mean absolute SHAP value for each feature
|
98
|
+
mean_abs_shap = np.abs(shap_values).mean(axis=0)
|
99
|
+
|
100
|
+
# Find the maximum mean absolute SHAP value
|
101
|
+
max_shap_value = np.max(mean_abs_shap)
|
102
|
+
|
103
|
+
# Normalize all SHAP values based on the top feature
|
104
|
+
shap_values = (
|
105
|
+
shap_values / max_shap_value * 100
|
106
|
+
) # scaling factor to make the top feature 100%
|
96
107
|
summary_plot_extra_args = {"plot_type": "bar"}
|
108
|
+
shap.summary_plot(
|
109
|
+
shap_values, x_test, show=False, **summary_plot_extra_args
|
110
|
+
)
|
111
|
+
|
112
|
+
# Customize the plot using matplotlib
|
113
|
+
plt.xlabel("Normalized SHAP Value (Percentage)", fontsize=13)
|
114
|
+
plt.ylabel("Features", fontsize=13)
|
115
|
+
plt.title("Normalized Feature Importance", fontsize=13)
|
116
|
+
else:
|
117
|
+
shap.summary_plot(
|
118
|
+
shap_values, x_test, show=False, **summary_plot_extra_args
|
119
|
+
)
|
97
120
|
|
98
|
-
shap.summary_plot(shap_values, x_test, show=False, **summary_plot_extra_args)
|
99
121
|
figure = plt.gcf()
|
100
122
|
# avoid displaying on notebooks and clears the canvas for the next plot
|
101
123
|
plt.close()
|
@@ -132,6 +154,8 @@ class SHAPGlobalImportance(Metric):
|
|
132
154
|
or model_class == "RandomForestClassifier"
|
133
155
|
or model_class == "CatBoostClassifier"
|
134
156
|
or model_class == "DecisionTreeClassifier"
|
157
|
+
or model_class == "RandomForestRegressor"
|
158
|
+
or model_class == "GradientBoostingRegressor"
|
135
159
|
):
|
136
160
|
explainer = shap.TreeExplainer(trained_model)
|
137
161
|
elif (
|
@@ -158,11 +182,11 @@ class SHAPGlobalImportance(Metric):
|
|
158
182
|
# KernelExplainer is slow so we use shap.sample to speed it up
|
159
183
|
if isinstance(explainer, shap.KernelExplainer):
|
160
184
|
shap_sample = shap.sample(
|
161
|
-
self.inputs.dataset.
|
185
|
+
self.inputs.dataset.x_df(),
|
162
186
|
self.params["kernel_explainer_samples"],
|
163
187
|
)
|
164
188
|
else:
|
165
|
-
shap_sample = self.inputs.dataset.
|
189
|
+
shap_sample = self.inputs.dataset.x_df()
|
166
190
|
|
167
191
|
shap_values = explainer.shap_values(shap_sample)
|
168
192
|
|
@@ -69,7 +69,7 @@ class SilhouettePlot(Metric):
|
|
69
69
|
}
|
70
70
|
|
71
71
|
def run(self):
|
72
|
-
y_pred_train = self.inputs.dataset.y_pred(self.inputs.model
|
72
|
+
y_pred_train = self.inputs.dataset.y_pred(self.inputs.model)
|
73
73
|
# Calculate the silhouette score
|
74
74
|
silhouette_avg = silhouette_score(
|
75
75
|
self.inputs.dataset.x,
|
@@ -121,11 +121,11 @@ class TrainingTestDegradation(ThresholdTest):
|
|
121
121
|
|
122
122
|
def run(self):
|
123
123
|
y_train_true = self.inputs.datasets[0].y
|
124
|
-
y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model
|
124
|
+
y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model)
|
125
125
|
y_train_true = y_train_true.astype(y_train_pred.dtype)
|
126
126
|
|
127
127
|
y_test_true = self.inputs.datasets[1].y
|
128
|
-
y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model
|
128
|
+
y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model)
|
129
129
|
y_test_true = y_test_true.astype(y_test_pred.dtype)
|
130
130
|
|
131
131
|
report_train = metrics.classification_report(
|
@@ -137,11 +137,11 @@ class WeakspotsDiagnosis(ThresholdTest):
|
|
137
137
|
prediction_column = f"{target_column}_pred"
|
138
138
|
|
139
139
|
train_df = self.inputs.datasets[0].df.copy()
|
140
|
-
train_class_pred = self.inputs.datasets[0].y_pred(self.inputs.model
|
140
|
+
train_class_pred = self.inputs.datasets[0].y_pred(self.inputs.model)
|
141
141
|
train_df[prediction_column] = train_class_pred
|
142
142
|
|
143
143
|
test_df = self.inputs.datasets[1].df.copy()
|
144
|
-
test_class_pred = self.inputs.datasets[1].y_pred(self.inputs.model
|
144
|
+
test_class_pred = self.inputs.datasets[1].y_pred(self.inputs.model)
|
145
145
|
test_df[prediction_column] = test_class_pred
|
146
146
|
|
147
147
|
test_results = []
|
@@ -0,0 +1,140 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import plotly.graph_objects as go
|
9
|
+
from matplotlib import cm
|
10
|
+
|
11
|
+
from validmind.vm_models import Figure, Metric
|
12
|
+
|
13
|
+
|
14
|
+
@dataclass
|
15
|
+
class CumulativePredictionProbabilities(Metric):
|
16
|
+
"""
|
17
|
+
Visualizes cumulative probabilities of positive and negative classes for both training and testing in logistic
|
18
|
+
regression models.
|
19
|
+
|
20
|
+
**Purpose**: This metric is utilized to evaluate the distribution of predicted probabilities for positive and
|
21
|
+
negative classes in a logistic regression model. It's not solely intended to measure the model's performance but
|
22
|
+
also provides a visual assessment of the model's behavior by plotting the cumulative probabilities for positive and
|
23
|
+
negative classes across both the training and test datasets.
|
24
|
+
|
25
|
+
**Test Mechanism**: The logistic regression model is evaluated by first computing the predicted probabilities for
|
26
|
+
each instance in both the training and test datasets, which are then added as a new column in these sets. The
|
27
|
+
cumulative probabilities for positive and negative classes are subsequently calculated and sorted in ascending
|
28
|
+
order. Cumulative distributions of these probabilities are created for both positive and negative classes across
|
29
|
+
both training and test datasets. These cumulative probabilities are represented visually in a plot, containing two
|
30
|
+
subplots - one for the training data and the other for the test data, with lines representing cumulative
|
31
|
+
distributions of positive and negative classes.
|
32
|
+
|
33
|
+
**Signs of High Risk**:
|
34
|
+
- Imbalanced distribution of probabilities for either positive or negative classes.
|
35
|
+
- Notable discrepancies or significant differences between the cumulative probability distributions for the
|
36
|
+
training data versus the test data.
|
37
|
+
- Marked discrepancies or large differences between the cumulative probability distributions for positive and
|
38
|
+
negative classes.
|
39
|
+
|
40
|
+
**Strengths**:
|
41
|
+
- It offers not only numerical probabilities but also provides a visual illustration of data, which enhances the
|
42
|
+
ease of understanding and interpreting the model's behavior.
|
43
|
+
- Allows for the comparison of model's behavior across training and testing datasets, providing insights about how
|
44
|
+
well the model is generalized.
|
45
|
+
- It differentiates between positive and negative classes and their respective distribution patterns, which can aid
|
46
|
+
in problem diagnosis.
|
47
|
+
|
48
|
+
**Limitations**:
|
49
|
+
- Exclusive to classification tasks and specifically to logistic regression models.
|
50
|
+
- Graphical results necessitate human interpretation and may not be directly applicable for automated risk
|
51
|
+
detection.
|
52
|
+
- The method does not give a solitary quantifiable measure of model risk, rather it offers a visual representation
|
53
|
+
and broad distributional information.
|
54
|
+
- If the training and test datasets are not representative of the overall data distribution, the metric could
|
55
|
+
provide misleading results.
|
56
|
+
"""
|
57
|
+
|
58
|
+
name = "cumulative_prediction_probabilities"
|
59
|
+
required_inputs = ["model", "datasets"]
|
60
|
+
metadata = {
|
61
|
+
"task_types": ["classification"],
|
62
|
+
"tags": ["logistic_regression", "visualization"],
|
63
|
+
}
|
64
|
+
default_params = {"title": "Cumulative Probabilities"}
|
65
|
+
|
66
|
+
@staticmethod
|
67
|
+
def plot_cumulative_prob(dataframes, dataset_titles, target_col, title):
|
68
|
+
figures = []
|
69
|
+
|
70
|
+
# Generate a colormap and convert to Plotly-accepted color format
|
71
|
+
# Adjust 'viridis' to any other matplotlib colormap if desired
|
72
|
+
colormap = cm.get_cmap("viridis")
|
73
|
+
|
74
|
+
for _, (df, dataset_title) in enumerate(zip(dataframes, dataset_titles)):
|
75
|
+
fig = go.Figure()
|
76
|
+
|
77
|
+
# Get unique classes and assign colors
|
78
|
+
classes = sorted(df[target_col].unique())
|
79
|
+
colors = [
|
80
|
+
colormap(i / len(classes))[:3] for i in range(len(classes))
|
81
|
+
] # RGB
|
82
|
+
color_dict = {
|
83
|
+
cls: f"rgb({int(rgb[0]*255)}, {int(rgb[1]*255)}, {int(rgb[2]*255)})"
|
84
|
+
for cls, rgb in zip(classes, colors)
|
85
|
+
}
|
86
|
+
for class_value in sorted(df[target_col].unique()):
|
87
|
+
# Calculate cumulative distribution for the current class
|
88
|
+
sorted_probs = np.sort(
|
89
|
+
df[df[target_col] == class_value]["probabilities"]
|
90
|
+
)
|
91
|
+
cumulative_probs = np.cumsum(sorted_probs) / np.sum(sorted_probs)
|
92
|
+
|
93
|
+
fig.add_trace(
|
94
|
+
go.Scatter(
|
95
|
+
x=sorted_probs,
|
96
|
+
y=cumulative_probs,
|
97
|
+
mode="lines",
|
98
|
+
name=f"{dataset_title} {target_col} = {class_value}",
|
99
|
+
line=dict(
|
100
|
+
color=color_dict[class_value],
|
101
|
+
),
|
102
|
+
)
|
103
|
+
)
|
104
|
+
fig.update_layout(
|
105
|
+
title_text=f"{title} - {dataset_title}",
|
106
|
+
xaxis_title="Probability",
|
107
|
+
yaxis_title="Cumulative Distribution",
|
108
|
+
legend_title=target_col,
|
109
|
+
)
|
110
|
+
figures.append(fig)
|
111
|
+
return figures
|
112
|
+
|
113
|
+
def run(self):
|
114
|
+
dataset_titles = [dataset.input_id for dataset in self.inputs.datasets]
|
115
|
+
target_column = self.inputs.datasets[0].target_column
|
116
|
+
title = self.params.get("title", self.default_params["title"])
|
117
|
+
|
118
|
+
dataframes = []
|
119
|
+
metric_value = {"cum_prob": {}}
|
120
|
+
for dataset in self.inputs.datasets:
|
121
|
+
df = dataset.df.copy()
|
122
|
+
y_prob = dataset.y_prob(self.inputs.model)
|
123
|
+
df["probabilities"] = y_prob
|
124
|
+
dataframes.append(df)
|
125
|
+
metric_value["cum_prob"][dataset.input_id] = list(df["probabilities"])
|
126
|
+
|
127
|
+
figures = self.plot_cumulative_prob(
|
128
|
+
dataframes, dataset_titles, target_column, title
|
129
|
+
)
|
130
|
+
|
131
|
+
figures_list = [
|
132
|
+
Figure(
|
133
|
+
for_object=self,
|
134
|
+
key=f"cumulative_prob_{title.replace(' ', '_')}_{i+1}",
|
135
|
+
figure=fig,
|
136
|
+
)
|
137
|
+
for i, fig in enumerate(figures)
|
138
|
+
]
|
139
|
+
|
140
|
+
return self.cache_results(metric_value=metric_value, figures=figures_list)
|
@@ -65,19 +65,8 @@ class GINITable(Metric):
|
|
65
65
|
}
|
66
66
|
|
67
67
|
def run(self):
|
68
|
-
model = (
|
69
|
-
self.inputs.model[0]
|
70
|
-
if isinstance(self.inputs.model, list)
|
71
|
-
else self.inputs.model
|
72
|
-
)
|
73
|
-
|
74
|
-
X_train = self.datasets[0].x
|
75
|
-
y_train = self.datasets[0].y
|
76
68
|
|
77
|
-
|
78
|
-
y_test = self.datasets[1].y
|
79
|
-
|
80
|
-
summary_metrics = self.compute_metrics(model, X_train, y_train, X_test, y_test)
|
69
|
+
summary_metrics = self.compute_metrics()
|
81
70
|
|
82
71
|
return self.cache_results(
|
83
72
|
{
|
@@ -85,52 +74,40 @@ class GINITable(Metric):
|
|
85
74
|
}
|
86
75
|
)
|
87
76
|
|
88
|
-
def compute_metrics(self
|
89
|
-
"""Computes AUC, GINI, and KS for
|
77
|
+
def compute_metrics(self):
|
78
|
+
"""Computes AUC, GINI, and KS for an arbitrary number of datasets."""
|
79
|
+
# Initialize the dictionary to store results
|
80
|
+
metrics_dict = {"Dataset": [], "AUC": [], "GINI": [], "KS": []}
|
90
81
|
|
91
|
-
|
82
|
+
# Iterate over each dataset in the inputs
|
83
|
+
for i, dataset in enumerate(self.inputs.datasets):
|
84
|
+
dataset_label = (
|
85
|
+
dataset.input_id
|
86
|
+
) # Use input_id as the label for each dataset
|
87
|
+
metrics_dict["Dataset"].append(dataset_label)
|
92
88
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
y_scores = model.predict(X)
|
89
|
+
# Retrieve y_true and y_pred for the current dataset
|
90
|
+
y_true = np.ravel(dataset.y) # Flatten y_true to make it one-dimensional
|
91
|
+
y_prob = dataset.y_prob(self.inputs.model)
|
97
92
|
|
98
|
-
|
93
|
+
# Compute metrics
|
94
|
+
y_true = np.array(y_true, dtype=float)
|
95
|
+
y_prob = np.array(y_prob, dtype=float)
|
99
96
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
97
|
+
fpr, tpr, _ = roc_curve(y_true, y_prob)
|
98
|
+
ks = max(tpr - fpr)
|
99
|
+
auc = roc_auc_score(y_true, y_prob)
|
100
|
+
gini = 2 * auc - 1
|
104
101
|
|
105
102
|
# Add the metrics to the dictionary
|
106
103
|
metrics_dict["AUC"].append(auc)
|
107
104
|
metrics_dict["GINI"].append(gini)
|
108
105
|
metrics_dict["KS"].append(ks)
|
109
106
|
|
110
|
-
#
|
107
|
+
# Create a DataFrame to store and return the results
|
111
108
|
metrics_df = pd.DataFrame(metrics_dict)
|
112
109
|
return metrics_df
|
113
110
|
|
114
|
-
def compute_auc(self, y_true, y_scores):
|
115
|
-
"""Computes the Area Under the Curve (AUC)."""
|
116
|
-
print("Computing AUC...")
|
117
|
-
auc = roc_auc_score(y_true, y_scores)
|
118
|
-
return auc
|
119
|
-
|
120
|
-
def compute_gini(self, y_true, y_scores):
|
121
|
-
"""Computes the Gini coefficient."""
|
122
|
-
print("Computing GINI...")
|
123
|
-
auc = self.compute_auc(y_true, y_scores)
|
124
|
-
gini = 2 * auc - 1
|
125
|
-
return gini
|
126
|
-
|
127
|
-
def compute_ks(self, y_true, y_scores):
|
128
|
-
"""Computes the Kolmogorov-Smirnov (KS) statistic."""
|
129
|
-
print("Computing KS...")
|
130
|
-
fpr, tpr, _ = roc_curve(y_true, y_scores)
|
131
|
-
ks = np.max(tpr - fpr)
|
132
|
-
return ks
|
133
|
-
|
134
111
|
def summary(self, metric_value):
|
135
112
|
summary_metrics_table = metric_value["metrics_summary"]
|
136
113
|
return ResultSummary(
|
@@ -4,16 +4,14 @@
|
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
|
7
|
-
import numpy as np
|
8
|
-
import pandas as pd
|
9
7
|
import plotly.graph_objects as go
|
10
|
-
from
|
8
|
+
from matplotlib import cm
|
11
9
|
|
12
10
|
from validmind.vm_models import Figure, Metric
|
13
11
|
|
14
12
|
|
15
13
|
@dataclass
|
16
|
-
class
|
14
|
+
class PredictionProbabilitiesHistogram(Metric):
|
17
15
|
"""
|
18
16
|
Generates and visualizes histograms of the Probability of Default predictions for both positive and negative
|
19
17
|
classes in training and testing datasets.
|
@@ -58,7 +56,7 @@ class LogisticRegPredictionHistogram(Metric):
|
|
58
56
|
quantifiable measure or score to assess model performance.
|
59
57
|
"""
|
60
58
|
|
61
|
-
name = "
|
59
|
+
name = "prediction_probabilities_histogram"
|
62
60
|
required_inputs = ["model", "datasets"]
|
63
61
|
metadata = {
|
64
62
|
"task_types": ["classification"],
|
@@ -68,95 +66,72 @@ class LogisticRegPredictionHistogram(Metric):
|
|
68
66
|
default_params = {"title": "Histogram of Predictive Probabilities"}
|
69
67
|
|
70
68
|
@staticmethod
|
71
|
-
def
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
fig.update_layout(barmode="overlay", title_text=title)
|
113
|
-
|
114
|
-
return fig
|
69
|
+
def plot_prob_histogram(dataframes, dataset_titles, target_col, title):
|
70
|
+
figures = []
|
71
|
+
|
72
|
+
# Generate a colormap and convert to Plotly-accepted color format
|
73
|
+
# Adjust 'viridis' to any other matplotlib colormap if desired
|
74
|
+
colormap = cm.get_cmap("viridis")
|
75
|
+
|
76
|
+
for i, (df, dataset_title) in enumerate(zip(dataframes, dataset_titles)):
|
77
|
+
fig = go.Figure()
|
78
|
+
|
79
|
+
# Get unique classes and assign colors
|
80
|
+
classes = sorted(df[target_col].unique())
|
81
|
+
colors = [
|
82
|
+
colormap(i / len(classes))[:3] for i in range(len(classes))
|
83
|
+
] # RGB
|
84
|
+
color_dict = {
|
85
|
+
cls: f"rgb({int(rgb[0]*255)}, {int(rgb[1]*255)}, {int(rgb[2]*255)})"
|
86
|
+
for cls, rgb in zip(classes, colors)
|
87
|
+
}
|
88
|
+
|
89
|
+
# Ensure classes are plotted in the specified order
|
90
|
+
for class_value in sorted(df[target_col].unique()):
|
91
|
+
fig.add_trace(
|
92
|
+
go.Histogram(
|
93
|
+
x=df[df[target_col] == class_value]["probabilities"],
|
94
|
+
opacity=0.75,
|
95
|
+
name=f"{dataset_title} {target_col} = {class_value}",
|
96
|
+
marker=dict(
|
97
|
+
color=color_dict[class_value],
|
98
|
+
),
|
99
|
+
)
|
100
|
+
)
|
101
|
+
fig.update_layout(
|
102
|
+
barmode="overlay",
|
103
|
+
title_text=f"{title} - {dataset_title}",
|
104
|
+
xaxis_title="Probability",
|
105
|
+
yaxis_title="Frequency",
|
106
|
+
)
|
107
|
+
figures.append(fig)
|
108
|
+
return figures
|
115
109
|
|
116
110
|
def run(self):
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
111
|
+
dataset_titles = [dataset.input_id for dataset in self.inputs.datasets]
|
112
|
+
target_column = self.inputs.datasets[0].target_column
|
113
|
+
title = self.params.get("title", self.default_params["title"])
|
114
|
+
|
115
|
+
dataframes = []
|
116
|
+
metric_value = {"prob_histogram": {}}
|
117
|
+
for _, dataset in enumerate(self.inputs.datasets):
|
118
|
+
df = dataset.df.copy()
|
119
|
+
y_prob = dataset.y_prob(self.inputs.model)
|
120
|
+
df["probabilities"] = y_prob
|
121
|
+
dataframes.append(df)
|
122
|
+
metric_value["prob_histogram"][dataset.input_id] = list(df["probabilities"])
|
123
|
+
|
124
|
+
figures = self.plot_prob_histogram(
|
125
|
+
dataframes, dataset_titles, target_column, title
|
121
126
|
)
|
122
127
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
X_train = df_train.drop(columns=[target_column])
|
132
|
-
X_test = df_test.drop(columns=[target_column])
|
133
|
-
|
134
|
-
# Subset only target_column to create target dataframes
|
135
|
-
y_train = df_train[[target_column]]
|
136
|
-
y_test = df_test[[target_column]]
|
137
|
-
|
138
|
-
X_train = self.compute_probabilities(model, X_train)
|
139
|
-
X_test = self.compute_probabilities(model, X_test)
|
128
|
+
figures_list = [
|
129
|
+
Figure(
|
130
|
+
for_object=self,
|
131
|
+
key=f"prob_histogram_{title.replace(' ', '_')}_{i+1}",
|
132
|
+
figure=fig,
|
133
|
+
)
|
134
|
+
for i, fig in enumerate(figures)
|
135
|
+
]
|
140
136
|
|
141
|
-
|
142
|
-
df_test = pd.concat([X_test, y_test], axis=1)
|
143
|
-
|
144
|
-
fig = self.plot_prob_histogram(
|
145
|
-
df_train, df_test, "probabilities", target_column, title
|
146
|
-
)
|
147
|
-
|
148
|
-
return self.cache_results(
|
149
|
-
metric_value={
|
150
|
-
"prob_histogram": {
|
151
|
-
"train_probs": list(X_train["probabilities"]),
|
152
|
-
"test_probs": list(X_test["probabilities"]),
|
153
|
-
},
|
154
|
-
},
|
155
|
-
figures=[
|
156
|
-
Figure(
|
157
|
-
for_object=self,
|
158
|
-
key="prob_histogram",
|
159
|
-
figure=fig,
|
160
|
-
)
|
161
|
-
],
|
162
|
-
)
|
137
|
+
return self.cache_results(metric_value=metric_value, figures=figures_list)
|
@@ -94,8 +94,8 @@ class RegressionModelForecastPlot(Metric):
|
|
94
94
|
train_ds = datasets[0]
|
95
95
|
test_ds = datasets[1]
|
96
96
|
|
97
|
-
y_pred = train_ds.y_pred(fitted_model
|
98
|
-
y_pred_test = test_ds.y_pred(fitted_model
|
97
|
+
y_pred = train_ds.y_pred(fitted_model)
|
98
|
+
y_pred_test = test_ds.y_pred(fitted_model)
|
99
99
|
|
100
100
|
# Check that start_date and end_date are within the data range
|
101
101
|
all_dates = pd.concat([pd.Series(train_ds.index), pd.Series(test_ds.index)])
|
@@ -103,8 +103,8 @@ class RegressionModelForecastPlotLevels(Metric):
|
|
103
103
|
train_ds = datasets[0]
|
104
104
|
test_ds = datasets[1]
|
105
105
|
|
106
|
-
y_pred = train_ds.y_pred(fitted_model
|
107
|
-
y_pred_test = test_ds.y_pred(fitted_model
|
106
|
+
y_pred = train_ds.y_pred(fitted_model)
|
107
|
+
y_pred_test = test_ds.y_pred(fitted_model)
|
108
108
|
|
109
109
|
all_dates = pd.concat([pd.Series(train_ds.index), pd.Series(test_ds.index)])
|
110
110
|
|
@@ -108,7 +108,7 @@ class RegressionModelInsampleComparison(Metric):
|
|
108
108
|
for i, model in enumerate(models):
|
109
109
|
X_columns = dataset.get_features_columns()
|
110
110
|
y_true = dataset.y
|
111
|
-
y_pred = dataset.y_pred(model
|
111
|
+
y_pred = dataset.y_pred(model)
|
112
112
|
|
113
113
|
# Extract R-squared and Adjusted R-squared
|
114
114
|
r2 = r2_score(y_true, y_pred)
|
@@ -102,7 +102,7 @@ class RegressionModelOutsampleComparison(Metric):
|
|
102
102
|
y_test = dataset.y
|
103
103
|
|
104
104
|
# Predict the test data
|
105
|
-
y_pred = dataset.y_pred(fitted_model
|
105
|
+
y_pred = dataset.y_pred(fitted_model)
|
106
106
|
|
107
107
|
# Calculate the residuals
|
108
108
|
residuals = y_test - y_pred
|
@@ -60,7 +60,7 @@ class RegressionModelSummary(Metric):
|
|
60
60
|
X_columns = self.inputs.dataset.get_features_columns()
|
61
61
|
|
62
62
|
y_true = self.inputs.dataset.y
|
63
|
-
y_pred = self.inputs.dataset.y_pred(self.inputs.model
|
63
|
+
y_pred = self.inputs.dataset.y_pred(self.inputs.model)
|
64
64
|
|
65
65
|
r2 = r2_score(y_true, y_pred)
|
66
66
|
adj_r2 = adj_r2_score(y_true, y_pred, len(y_true), len(X_columns))
|
@@ -82,7 +82,7 @@ class RegressionModelsPerformance(Metric):
|
|
82
82
|
for model, dataset in zip(models, datasets):
|
83
83
|
X_columns = dataset.get_features_columns()
|
84
84
|
y_true = dataset.y
|
85
|
-
y_pred = dataset.y_pred(model
|
85
|
+
y_pred = dataset.y_pred(model)
|
86
86
|
|
87
87
|
# Extract R-squared and Adjusted R-squared
|
88
88
|
r2 = r2_score(y_true, y_pred)
|