validmind 2.0.7__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +3 -3
- validmind/__version__.py +1 -1
- validmind/ai.py +7 -11
- validmind/api_client.py +29 -27
- validmind/client.py +10 -3
- validmind/datasets/credit_risk/__init__.py +11 -0
- validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
- validmind/datasets/credit_risk/lending_club.py +394 -0
- validmind/logging.py +9 -2
- validmind/template.py +2 -2
- validmind/test_suites/__init__.py +4 -2
- validmind/tests/__init__.py +97 -50
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +8 -2
- validmind/tests/decorator.py +138 -14
- validmind/tests/model_validation/BertScore.py +1 -1
- validmind/tests/model_validation/BertScoreAggregate.py +1 -1
- validmind/tests/model_validation/BleuScore.py +1 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
- validmind/tests/model_validation/ContextualRecall.py +1 -1
- validmind/tests/model_validation/FeaturesAUC.py +110 -0
- validmind/tests/model_validation/MeteorScore.py +1 -1
- validmind/tests/model_validation/RegardHistogram.py +1 -1
- validmind/tests/model_validation/RegardScore.py +1 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
- validmind/tests/model_validation/RougeMetrics.py +1 -1
- validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
- validmind/tests/model_validation/SelfCheckNLIScore.py +1 -1
- validmind/tests/model_validation/TokenDisparity.py +1 -1
- validmind/tests/model_validation/ToxicityHistogram.py +1 -1
- validmind/tests/model_validation/ToxicityScore.py +1 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +1 -1
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +15 -18
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
- validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
- validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +27 -3
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
- validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
- validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
- validmind/tests/test_providers.py +14 -124
- validmind/unit_metrics/__init__.py +76 -69
- validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
- validmind/unit_metrics/classification/sklearn/F1.py +13 -0
- validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
- validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
- validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
- validmind/unit_metrics/composite.py +24 -71
- validmind/unit_metrics/regression/GiniCoefficient.py +20 -26
- validmind/unit_metrics/regression/HuberLoss.py +12 -16
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +18 -24
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +7 -13
- validmind/unit_metrics/regression/MeanBiasDeviation.py +5 -14
- validmind/unit_metrics/regression/QuantileLoss.py +6 -16
- validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +12 -18
- validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +6 -15
- validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +5 -14
- validmind/unit_metrics/regression/sklearn/RSquaredScore.py +6 -15
- validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +11 -14
- validmind/utils.py +18 -45
- validmind/vm_models/__init__.py +0 -2
- validmind/vm_models/dataset.py +255 -16
- validmind/vm_models/test/metric.py +1 -2
- validmind/vm_models/test/result_wrapper.py +12 -13
- validmind/vm_models/test/test.py +2 -1
- validmind/vm_models/test/threshold_test.py +1 -2
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test_suite.py +2 -1
- {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/METADATA +10 -6
- {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/RECORD +97 -96
- validmind/tests/__types__.py +0 -62
- validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
- validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
- validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
- validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
- validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -22
- validmind/unit_metrics/sklearn/classification/F1.py +0 -24
- validmind/unit_metrics/sklearn/classification/Precision.py +0 -24
- validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -22
- validmind/unit_metrics/sklearn/classification/Recall.py +0 -22
- validmind/vm_models/test/unit_metric.py +0 -88
- {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/LICENSE +0 -0
- {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/WHEEL +0 -0
- {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/entry_points.txt +0 -0
@@ -1,172 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
|
-
import numpy as np
|
8
|
-
import pandas as pd
|
9
|
-
import plotly.graph_objects as go
|
10
|
-
from plotly.subplots import make_subplots
|
11
|
-
|
12
|
-
from validmind.vm_models import Figure, Metric
|
13
|
-
|
14
|
-
|
15
|
-
@dataclass
|
16
|
-
class LogisticRegCumulativeProb(Metric):
|
17
|
-
"""
|
18
|
-
Visualizes cumulative probabilities of positive and negative classes for both training and testing in logistic
|
19
|
-
regression models.
|
20
|
-
|
21
|
-
**Purpose**: This metric is utilized to evaluate the distribution of predicted probabilities for positive and
|
22
|
-
negative classes in a logistic regression model. It's not solely intended to measure the model's performance but
|
23
|
-
also provides a visual assessment of the model's behavior by plotting the cumulative probabilities for positive and
|
24
|
-
negative classes across both the training and test datasets.
|
25
|
-
|
26
|
-
**Test Mechanism**: The logistic regression model is evaluated by first computing the predicted probabilities for
|
27
|
-
each instance in both the training and test datasets, which are then added as a new column in these sets. The
|
28
|
-
cumulative probabilities for positive and negative classes are subsequently calculated and sorted in ascending
|
29
|
-
order. Cumulative distributions of these probabilities are created for both positive and negative classes across
|
30
|
-
both training and test datasets. These cumulative probabilities are represented visually in a plot, containing two
|
31
|
-
subplots - one for the training data and the other for the test data, with lines representing cumulative
|
32
|
-
distributions of positive and negative classes.
|
33
|
-
|
34
|
-
**Signs of High Risk**:
|
35
|
-
- Imbalanced distribution of probabilities for either positive or negative classes.
|
36
|
-
- Notable discrepancies or significant differences between the cumulative probability distributions for the
|
37
|
-
training data versus the test data.
|
38
|
-
- Marked discrepancies or large differences between the cumulative probability distributions for positive and
|
39
|
-
negative classes.
|
40
|
-
|
41
|
-
**Strengths**:
|
42
|
-
- It offers not only numerical probabilities but also provides a visual illustration of data, which enhances the
|
43
|
-
ease of understanding and interpreting the model's behavior.
|
44
|
-
- Allows for the comparison of model's behavior across training and testing datasets, providing insights about how
|
45
|
-
well the model is generalized.
|
46
|
-
- It differentiates between positive and negative classes and their respective distribution patterns, which can aid
|
47
|
-
in problem diagnosis.
|
48
|
-
|
49
|
-
**Limitations**:
|
50
|
-
- Exclusive to classification tasks and specifically to logistic regression models.
|
51
|
-
- Graphical results necessitate human interpretation and may not be directly applicable for automated risk
|
52
|
-
detection.
|
53
|
-
- The method does not give a solitary quantifiable measure of model risk, rather it offers a visual representation
|
54
|
-
and broad distributional information.
|
55
|
-
- If the training and test datasets are not representative of the overall data distribution, the metric could
|
56
|
-
provide misleading results.
|
57
|
-
"""
|
58
|
-
|
59
|
-
name = "logistic_reg_cumulative_prob"
|
60
|
-
required_inputs = ["model", "datasets"]
|
61
|
-
metadata = {
|
62
|
-
"task_types": ["classification"],
|
63
|
-
"tags": ["logistic_regression", "visualization"],
|
64
|
-
}
|
65
|
-
default_params = {"title": "Cumulative Probabilities"}
|
66
|
-
|
67
|
-
@staticmethod
|
68
|
-
def compute_probabilities(model, X):
|
69
|
-
"""
|
70
|
-
Predict probabilities and add them as a new column in X
|
71
|
-
"""
|
72
|
-
probabilities = model.predict(X)
|
73
|
-
X["probabilities"] = probabilities
|
74
|
-
return X
|
75
|
-
|
76
|
-
@staticmethod
|
77
|
-
def plot_cumulative_prob(df_train, df_test, prob_col, target_col, title):
|
78
|
-
# Separate probabilities based on target column
|
79
|
-
train_0 = np.sort(df_train[df_train[target_col] == 0][prob_col])
|
80
|
-
train_1 = np.sort(df_train[df_train[target_col] == 1][prob_col])
|
81
|
-
test_0 = np.sort(df_test[df_test[target_col] == 0][prob_col])
|
82
|
-
test_1 = np.sort(df_test[df_test[target_col] == 1][prob_col])
|
83
|
-
|
84
|
-
# Calculate cumulative distributions
|
85
|
-
cumulative_train_0 = np.cumsum(train_0) / np.sum(train_0)
|
86
|
-
cumulative_train_1 = np.cumsum(train_1) / np.sum(train_1)
|
87
|
-
cumulative_test_0 = np.cumsum(test_0) / np.sum(test_0)
|
88
|
-
cumulative_test_1 = np.cumsum(test_1) / np.sum(test_1)
|
89
|
-
|
90
|
-
# Create subplot
|
91
|
-
fig = make_subplots(rows=1, cols=2, subplot_titles=("Train Data", "Test Data"))
|
92
|
-
|
93
|
-
# Create line plots for training data
|
94
|
-
trace_train_0 = go.Scatter(
|
95
|
-
x=train_0,
|
96
|
-
y=cumulative_train_0,
|
97
|
-
mode="lines",
|
98
|
-
name=f"Train {target_col} = 0",
|
99
|
-
)
|
100
|
-
trace_train_1 = go.Scatter(
|
101
|
-
x=train_1,
|
102
|
-
y=cumulative_train_1,
|
103
|
-
mode="lines",
|
104
|
-
name=f"Train {target_col} = 1",
|
105
|
-
)
|
106
|
-
|
107
|
-
# Create line plots for testing data
|
108
|
-
trace_test_0 = go.Scatter(
|
109
|
-
x=test_0, y=cumulative_test_0, mode="lines", name=f"Test {target_col} = 0"
|
110
|
-
)
|
111
|
-
trace_test_1 = go.Scatter(
|
112
|
-
x=test_1, y=cumulative_test_1, mode="lines", name=f"Test {target_col} = 1"
|
113
|
-
)
|
114
|
-
|
115
|
-
# Add traces to the subplots
|
116
|
-
fig.add_trace(trace_train_0, row=1, col=1)
|
117
|
-
fig.add_trace(trace_train_1, row=1, col=1)
|
118
|
-
fig.add_trace(trace_test_0, row=1, col=2)
|
119
|
-
fig.add_trace(trace_test_1, row=1, col=2)
|
120
|
-
|
121
|
-
# Update layout
|
122
|
-
fig.update_layout(title_text=title)
|
123
|
-
|
124
|
-
return fig
|
125
|
-
|
126
|
-
def run(self):
|
127
|
-
model = (
|
128
|
-
self.inputs.model[0]
|
129
|
-
if isinstance(self.inputs.model, list)
|
130
|
-
else self.inputs.model
|
131
|
-
)
|
132
|
-
|
133
|
-
target_column = self.datasets[0].target_column
|
134
|
-
title = self.params["title"]
|
135
|
-
|
136
|
-
# Create a copy of training and testing dataframes
|
137
|
-
df_train = self.datasets[0].df.copy()
|
138
|
-
df_test = self.datasets[1].df.copy()
|
139
|
-
|
140
|
-
# Drop target_column to create feature dataframes
|
141
|
-
X_train = df_train.drop(columns=[target_column])
|
142
|
-
X_test = df_test.drop(columns=[target_column])
|
143
|
-
|
144
|
-
# Subset only target_column to create target dataframes
|
145
|
-
y_train = df_train[[target_column]]
|
146
|
-
y_test = df_test[[target_column]]
|
147
|
-
|
148
|
-
X_train = self.compute_probabilities(model, X_train)
|
149
|
-
X_test = self.compute_probabilities(model, X_test)
|
150
|
-
|
151
|
-
df_train = pd.concat([X_train, y_train], axis=1)
|
152
|
-
df_test = pd.concat([X_test, y_test], axis=1)
|
153
|
-
|
154
|
-
fig = self.plot_cumulative_prob(
|
155
|
-
df_train, df_test, "probabilities", target_column, title
|
156
|
-
)
|
157
|
-
|
158
|
-
return self.cache_results(
|
159
|
-
metric_value={
|
160
|
-
"cum_prob": {
|
161
|
-
"train_probs": list(X_train["probabilities"]),
|
162
|
-
"test_probs": list(X_test["probabilities"]),
|
163
|
-
},
|
164
|
-
},
|
165
|
-
figures=[
|
166
|
-
Figure(
|
167
|
-
for_object=self,
|
168
|
-
key="cum_prob",
|
169
|
-
figure=fig,
|
170
|
-
)
|
171
|
-
],
|
172
|
-
)
|
@@ -1,181 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
|
-
import numpy as np
|
8
|
-
import pandas as pd
|
9
|
-
import plotly.graph_objects as go
|
10
|
-
|
11
|
-
from validmind.vm_models import Figure, Metric
|
12
|
-
|
13
|
-
|
14
|
-
@dataclass
|
15
|
-
class ScorecardBucketHistogram(Metric):
|
16
|
-
"""
|
17
|
-
Evaluates and visualizes distribution of risk categories in a classification model's scores, useful in credit risk
|
18
|
-
assessment.
|
19
|
-
|
20
|
-
**Purpose**: The 'Scorecard Bucket Histogram' is employed as a metric to evaluate the performance of a
|
21
|
-
classification model, specifically in credit risk assessment. It categorizes model scores into different rating
|
22
|
-
classes, and visualizes the distribution of scores or probabilities within each class. It essentially measures how
|
23
|
-
different risk categories (classes) are distributed in the model scores and provides insight into the model's
|
24
|
-
classification ability. This makes it particularly useful in credit scoring and risk modeling where understanding
|
25
|
-
the probability of default is critical.
|
26
|
-
|
27
|
-
**Test Mechanism**: The test works by computing the probabilities for each record in the test and train dataset
|
28
|
-
using the model's predict function. Subsequently, it calculates the scores using a formula incorporating target
|
29
|
-
score, target odds, and points to double odds (PDO). The scores are then bucketed into predefined rating classes
|
30
|
-
(such as 'A', 'B', 'C', 'D') and plotted in a histogram for both the train and test datasets. The target score,
|
31
|
-
target odds, points to double the odds (PDO), and rating classes are customizable parameters, providing flexibility
|
32
|
-
in test metrics based on differing model or industry norms.
|
33
|
-
|
34
|
-
**Signs of High Risk**:
|
35
|
-
|
36
|
-
- Disproportionate scores within rating classes
|
37
|
-
- Excessive overlap between classes
|
38
|
-
- Inconsistent distribution of scores between the training and testing datasets
|
39
|
-
|
40
|
-
If the model is accurately classifying and risk is being evenly distributed, we would anticipate smooth and
|
41
|
-
relatively balanced histograms within classes.
|
42
|
-
|
43
|
-
**Strengths**:
|
44
|
-
|
45
|
-
- Provides a quick visual snapshot of score distribution
|
46
|
-
- Breaks down complex predictions into simple, understandable classes, making it easily interpretable for both
|
47
|
-
technical and non-technical audiences
|
48
|
-
- Caters to customization of parameters
|
49
|
-
- Gives ownership of the class definitions to the user
|
50
|
-
- Useful in the field of credit risk, providing a clear understanding of which class or 'bucket' a potential
|
51
|
-
borrower belongs to
|
52
|
-
|
53
|
-
**Limitations**:
|
54
|
-
|
55
|
-
- Relies on manual setting of classes and other parameters (like target score, target odds, and PDO), potentially
|
56
|
-
leading to arbitrary classifications and potential bias if not judiciously performed
|
57
|
-
- Effectiveness can be limited with non-tabular data
|
58
|
-
- Doesn't provide a numerical value easily compared across different models or runs as the output is primarily
|
59
|
-
visual
|
60
|
-
- Might not present a complete view of model performance and should be used in conjunction with other metrics
|
61
|
-
"""
|
62
|
-
|
63
|
-
name = "scorecard_bucket_histogram"
|
64
|
-
required_inputs = ["model", "datasets"]
|
65
|
-
metadata = {
|
66
|
-
"task_types": ["classification"],
|
67
|
-
"tags": ["tabular_data", "visualization", "credit_risk"],
|
68
|
-
}
|
69
|
-
default_params = {
|
70
|
-
"title": "Distribution of Scores by Rating Classes",
|
71
|
-
"target_score": 600,
|
72
|
-
"target_odds": 50,
|
73
|
-
"pdo": 20,
|
74
|
-
"rating_classes": ["A", "B", "C", "D"],
|
75
|
-
}
|
76
|
-
|
77
|
-
@staticmethod
|
78
|
-
def compute_probabilities(model, X):
|
79
|
-
"""
|
80
|
-
Predict probabilities and add them as a new column in X
|
81
|
-
"""
|
82
|
-
probabilities = model.predict(X)
|
83
|
-
X["probabilities"] = probabilities
|
84
|
-
return X
|
85
|
-
|
86
|
-
@staticmethod
|
87
|
-
def compute_scores(X, target_score, target_odds, pdo):
|
88
|
-
X_copy = X.copy()
|
89
|
-
factor = pdo / np.log(2)
|
90
|
-
offset = target_score - (factor * np.log(target_odds))
|
91
|
-
|
92
|
-
X_copy["score"] = offset + factor * np.log(
|
93
|
-
X_copy["probabilities"] / (1 - X_copy["probabilities"])
|
94
|
-
)
|
95
|
-
|
96
|
-
return X_copy
|
97
|
-
|
98
|
-
@staticmethod
|
99
|
-
def plot_score_bucket_histogram(df, score_col, title, rating_classes):
|
100
|
-
df["bucket"] = pd.cut(
|
101
|
-
df[score_col], bins=len(rating_classes), labels=rating_classes, right=False
|
102
|
-
)
|
103
|
-
|
104
|
-
fig = go.Figure()
|
105
|
-
|
106
|
-
color_scale = [[0.0, "rgba(178, 24, 43, 1)"], [1.0, "rgba(33, 102, 172, 1)"]]
|
107
|
-
|
108
|
-
for bucket in rating_classes:
|
109
|
-
df_bucket = df[df["bucket"] == bucket]
|
110
|
-
bucket_values = df_bucket[score_col]
|
111
|
-
fig.add_trace(
|
112
|
-
go.Histogram(
|
113
|
-
x=bucket_values,
|
114
|
-
name=bucket,
|
115
|
-
opacity=0.6,
|
116
|
-
)
|
117
|
-
)
|
118
|
-
|
119
|
-
fig.update_layout(
|
120
|
-
title_text=title,
|
121
|
-
xaxis_title="",
|
122
|
-
yaxis_title="Frequency",
|
123
|
-
barmode="overlay",
|
124
|
-
coloraxis=dict(colorscale=color_scale, colorbar=dict(title="")),
|
125
|
-
)
|
126
|
-
|
127
|
-
return fig
|
128
|
-
|
129
|
-
def run(self):
|
130
|
-
title = self.params["title"]
|
131
|
-
target_score = self.params["target_score"]
|
132
|
-
target_odds = self.params["target_odds"]
|
133
|
-
pdo = self.params["pdo"]
|
134
|
-
rating_classes = self.params["rating_classes"]
|
135
|
-
|
136
|
-
X_train = self.inputs.datasets[0].x.copy()
|
137
|
-
X_test = self.inputs.datasets[1].x.copy()
|
138
|
-
|
139
|
-
X_train_probs = self.compute_probabilities(self.inputs.model, X_train)
|
140
|
-
X_test_probs = self.compute_probabilities(self.inputs.model, X_test)
|
141
|
-
|
142
|
-
df_train_scores = self.compute_scores(
|
143
|
-
X_train_probs, target_score, target_odds, pdo
|
144
|
-
)
|
145
|
-
df_test_scores = self.compute_scores(
|
146
|
-
X_test_probs, target_score, target_odds, pdo
|
147
|
-
)
|
148
|
-
|
149
|
-
fig_train = self.plot_score_bucket_histogram(
|
150
|
-
df_train_scores,
|
151
|
-
"score",
|
152
|
-
title + " - Train Data",
|
153
|
-
rating_classes,
|
154
|
-
)
|
155
|
-
fig_test = self.plot_score_bucket_histogram(
|
156
|
-
df_test_scores,
|
157
|
-
"score",
|
158
|
-
title + " - Test Data",
|
159
|
-
rating_classes,
|
160
|
-
)
|
161
|
-
|
162
|
-
return self.cache_results(
|
163
|
-
metric_value={
|
164
|
-
"score_distribution": {
|
165
|
-
"train_scores": list(df_train_scores["score"]),
|
166
|
-
"test_scores": list(df_test_scores["score"]),
|
167
|
-
},
|
168
|
-
},
|
169
|
-
figures=[
|
170
|
-
Figure(
|
171
|
-
for_object=self,
|
172
|
-
key="score_distribution_train",
|
173
|
-
figure=fig_train,
|
174
|
-
),
|
175
|
-
Figure(
|
176
|
-
for_object=self,
|
177
|
-
key="score_distribution_test",
|
178
|
-
figure=fig_test,
|
179
|
-
),
|
180
|
-
],
|
181
|
-
)
|
@@ -1,175 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
|
-
import numpy as np
|
8
|
-
import pandas as pd
|
9
|
-
import plotly.graph_objects as go
|
10
|
-
|
11
|
-
from validmind.vm_models import Figure, Metric
|
12
|
-
|
13
|
-
|
14
|
-
@dataclass
|
15
|
-
class ScorecardProbabilitiesHistogram(Metric):
|
16
|
-
"""
|
17
|
-
Evaluates risk classification of a model by visualizing the distribution of default probability across score
|
18
|
-
buckets.
|
19
|
-
|
20
|
-
**Purpose**: The Scorecard Probabilities Histogram, a specific metric used within the credit risk domain, is
|
21
|
-
designed to evaluate and visualize risk classification of a model. It aims at examining the distribution of the
|
22
|
-
probability of default across varied score buckets, with the score buckets being categories that entities (e.g.,
|
23
|
-
loan applicants) are classed under based on their predicted default risks. The key idea is to ensure that the model
|
24
|
-
accurately classifies entities into appropriate risk categories (score buckets) and aptly represents their default
|
25
|
-
probabilities.
|
26
|
-
|
27
|
-
**Test Mechanism**: The mechanism behind the Scorecard Probabilities Histogram includes several steps. It starts
|
28
|
-
with the calculation of default probabilities by the 'compute_probabilities' method, where the resulting
|
29
|
-
probability is added as a fresh column to the input dataset. Following that, scores are computed using these
|
30
|
-
probabilities, a target score, target odds, and a Points to Double the odds (pdo) factor by the 'compute_scores'
|
31
|
-
method. These scores are then bucketed via the 'compute_buckets' method. A histogram is then plotted for each score
|
32
|
-
bucket, with default probabilities as the x-axis and their frequency as the y-axis - implemented within the
|
33
|
-
'plot_probabilities_histogram' method. This entire process is executed distinctly for both training and testing
|
34
|
-
datasets.
|
35
|
-
|
36
|
-
**Signs of High Risk**:
|
37
|
-
- A significant overlap of different score buckets in the histogram indicates that the model is not efficiently
|
38
|
-
distinguishing between various risk categories.
|
39
|
-
- If very high or low probabilities are commonplace across all buckets, the model's predictions could be skewed.
|
40
|
-
|
41
|
-
**Strengths**:
|
42
|
-
- The Scorecard Probabilities Histogram allows for the visualization and analysis of the predicted default risk
|
43
|
-
distribution across different risk classes, thereby facilitating a visual inspection of the model's performance and
|
44
|
-
calibration for various risk categories.
|
45
|
-
- It provides a means to visualize how these classifications are distributed on the training and testing datasets
|
46
|
-
separately, contributing to a better comprehension of model generalization.
|
47
|
-
|
48
|
-
**Limitations**:
|
49
|
-
- The Scorecard Probabilities Histogram assumes linear and equally spaced risk categories, which might not always
|
50
|
-
hold true.
|
51
|
-
- If there are too few or too many score buckets, the visualization may not convey sufficient information.
|
52
|
-
- While it effectively illustrates the distribution of probabilities, it does not provide adequate numerical
|
53
|
-
metrics or threshold to definitively evaluate the model's performance. A more accurate evaluation necessitates its
|
54
|
-
usage in conjunction with other metrics and tools including the confusion matrix, AUC-ROC, Precision, Recall, and
|
55
|
-
so forth.
|
56
|
-
"""
|
57
|
-
|
58
|
-
name = "scorecard_probabilities_histogram"
|
59
|
-
required_inputs = ["model"]
|
60
|
-
metadata = {
|
61
|
-
"task_types": ["classification"],
|
62
|
-
"tags": ["tabular_data", "visualization", "credit_risk"],
|
63
|
-
}
|
64
|
-
default_params = {
|
65
|
-
"title": "Probability of Default by Score Bucket",
|
66
|
-
"target_score": 600,
|
67
|
-
"target_odds": 50,
|
68
|
-
"pdo": 20,
|
69
|
-
"score_buckets": ["A", "B", "C", "D"],
|
70
|
-
}
|
71
|
-
|
72
|
-
@staticmethod
|
73
|
-
def compute_probabilities(model, X):
|
74
|
-
"""
|
75
|
-
Predict probabilities and add them as a new column in X
|
76
|
-
"""
|
77
|
-
probabilities = model.predict(X)
|
78
|
-
X["probabilities"] = probabilities
|
79
|
-
return X
|
80
|
-
|
81
|
-
@staticmethod
|
82
|
-
def compute_scores(X, target_score, target_odds, pdo):
|
83
|
-
X_copy = X.copy()
|
84
|
-
factor = pdo / np.log(2)
|
85
|
-
offset = target_score - (factor * np.log(target_odds))
|
86
|
-
|
87
|
-
X_copy["score"] = offset + factor * np.log(
|
88
|
-
X_copy["probabilities"] / (1 - X_copy["probabilities"])
|
89
|
-
)
|
90
|
-
|
91
|
-
return X_copy
|
92
|
-
|
93
|
-
@staticmethod
|
94
|
-
def compute_buckets(X, score_buckets):
|
95
|
-
X["bucket"] = pd.qcut(X["score"], q=len(score_buckets), labels=score_buckets)
|
96
|
-
return X
|
97
|
-
|
98
|
-
@staticmethod
|
99
|
-
def plot_probabilities_histogram(df, title, score_buckets):
|
100
|
-
fig = go.Figure()
|
101
|
-
|
102
|
-
for bucket in score_buckets:
|
103
|
-
df_bucket = df[df["bucket"] == bucket]
|
104
|
-
bucket_values = df_bucket["probabilities"]
|
105
|
-
fig.add_trace(
|
106
|
-
go.Histogram(
|
107
|
-
x=bucket_values,
|
108
|
-
name=bucket,
|
109
|
-
opacity=0.6,
|
110
|
-
)
|
111
|
-
)
|
112
|
-
|
113
|
-
fig.update_layout(
|
114
|
-
title_text=title,
|
115
|
-
xaxis_title="Probability",
|
116
|
-
yaxis_title="Frequency",
|
117
|
-
barmode="overlay",
|
118
|
-
)
|
119
|
-
|
120
|
-
return fig
|
121
|
-
|
122
|
-
def run(self):
|
123
|
-
title = self.params["title"]
|
124
|
-
target_score = self.params["target_score"]
|
125
|
-
target_odds = self.params["target_odds"]
|
126
|
-
pdo = self.params["pdo"]
|
127
|
-
score_buckets = self.params["score_buckets"]
|
128
|
-
|
129
|
-
X_train = self.inputs.datasets[0].x.copy()
|
130
|
-
X_test = self.inputs.datasets[1].x.copy()
|
131
|
-
|
132
|
-
X_train_probs = self.compute_probabilities(self.inputs.model, X_train)
|
133
|
-
X_test_probs = self.compute_probabilities(self.inputs.model, X_test)
|
134
|
-
|
135
|
-
df_train_scores = self.compute_scores(
|
136
|
-
X_train_probs, target_score, target_odds, pdo
|
137
|
-
)
|
138
|
-
df_test_scores = self.compute_scores(
|
139
|
-
X_test_probs, target_score, target_odds, pdo
|
140
|
-
)
|
141
|
-
|
142
|
-
df_train_buckets = self.compute_buckets(df_train_scores, score_buckets)
|
143
|
-
df_test_buckets = self.compute_buckets(df_test_scores, score_buckets)
|
144
|
-
|
145
|
-
fig_train = self.plot_probabilities_histogram(
|
146
|
-
df_train_buckets,
|
147
|
-
title + " - Train Data",
|
148
|
-
score_buckets,
|
149
|
-
)
|
150
|
-
fig_test = self.plot_probabilities_histogram(
|
151
|
-
df_test_buckets,
|
152
|
-
title + " - Test Data",
|
153
|
-
score_buckets,
|
154
|
-
)
|
155
|
-
|
156
|
-
return self.cache_results(
|
157
|
-
metric_value={
|
158
|
-
"probability_distribution": {
|
159
|
-
"train_probs": list(df_train_buckets["probabilities"]),
|
160
|
-
"test_probs": list(df_test_buckets["probabilities"]),
|
161
|
-
},
|
162
|
-
},
|
163
|
-
figures=[
|
164
|
-
Figure(
|
165
|
-
for_object=self,
|
166
|
-
key="probability_distribution_train",
|
167
|
-
figure=fig_train,
|
168
|
-
),
|
169
|
-
Figure(
|
170
|
-
for_object=self,
|
171
|
-
key="probability_distribution_test",
|
172
|
-
figure=fig_test,
|
173
|
-
),
|
174
|
-
],
|
175
|
-
)
|
@@ -1,22 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
|
-
from sklearn.metrics import accuracy_score
|
8
|
-
|
9
|
-
from validmind.vm_models import UnitMetric
|
10
|
-
|
11
|
-
|
12
|
-
@dataclass
|
13
|
-
class Accuracy(UnitMetric):
|
14
|
-
required_inputs = ["dataset", "model"]
|
15
|
-
|
16
|
-
def run(self):
|
17
|
-
y_true = self.inputs.dataset.y
|
18
|
-
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
-
|
20
|
-
value = accuracy_score(y_true, y_pred, **self.params)
|
21
|
-
|
22
|
-
return self.cache_results(metric_value=value)
|
@@ -1,24 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
|
-
from sklearn.metrics import f1_score
|
8
|
-
|
9
|
-
from validmind.vm_models import UnitMetric
|
10
|
-
|
11
|
-
|
12
|
-
@dataclass
|
13
|
-
class F1(UnitMetric):
|
14
|
-
required_inputs = ["dataset", "model"]
|
15
|
-
|
16
|
-
def run(self):
|
17
|
-
y_true = self.inputs.dataset.y
|
18
|
-
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
-
|
20
|
-
value = f1_score(y_true, y_pred, **self.params)
|
21
|
-
|
22
|
-
return self.cache_results(
|
23
|
-
metric_value=value,
|
24
|
-
)
|
@@ -1,24 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
|
-
from sklearn.metrics import precision_score
|
8
|
-
|
9
|
-
from validmind.vm_models import UnitMetric
|
10
|
-
|
11
|
-
|
12
|
-
@dataclass
|
13
|
-
class Precision(UnitMetric):
|
14
|
-
required_inputs = ["dataset", "model"]
|
15
|
-
|
16
|
-
def run(self):
|
17
|
-
y_true = self.inputs.dataset.y
|
18
|
-
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
-
|
20
|
-
value = precision_score(y_true, y_pred, **self.params)
|
21
|
-
|
22
|
-
return self.cache_results(
|
23
|
-
metric_value=value,
|
24
|
-
)
|
@@ -1,22 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
|
-
from sklearn.metrics import roc_auc_score
|
8
|
-
|
9
|
-
from validmind.vm_models import UnitMetric
|
10
|
-
|
11
|
-
|
12
|
-
@dataclass
|
13
|
-
class ROC_AUC(UnitMetric):
|
14
|
-
required_inputs = ["dataset", "model"]
|
15
|
-
|
16
|
-
def run(self):
|
17
|
-
y_true = self.inputs.dataset.y
|
18
|
-
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
-
|
20
|
-
value = roc_auc_score(y_true, y_pred, **self.params)
|
21
|
-
|
22
|
-
return self.cache_results(metric_value=value)
|
@@ -1,22 +0,0 @@
|
|
1
|
-
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
-
# See the LICENSE file in the root of this repository for details.
|
3
|
-
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
-
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
|
-
from sklearn.metrics import recall_score
|
8
|
-
|
9
|
-
from validmind.vm_models import UnitMetric
|
10
|
-
|
11
|
-
|
12
|
-
@dataclass
|
13
|
-
class Recall(UnitMetric):
|
14
|
-
required_inputs = ["dataset", "model"]
|
15
|
-
|
16
|
-
def run(self):
|
17
|
-
y_true = self.inputs.dataset.y
|
18
|
-
y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
|
19
|
-
|
20
|
-
value = recall_score(y_true, y_pred, **self.params)
|
21
|
-
|
22
|
-
return self.cache_results(metric_value=value)
|