validmind 2.0.7__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. validmind/__init__.py +3 -3
  2. validmind/__version__.py +1 -1
  3. validmind/ai.py +7 -11
  4. validmind/api_client.py +29 -27
  5. validmind/client.py +10 -3
  6. validmind/datasets/credit_risk/__init__.py +11 -0
  7. validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
  8. validmind/datasets/credit_risk/lending_club.py +394 -0
  9. validmind/logging.py +9 -2
  10. validmind/template.py +2 -2
  11. validmind/test_suites/__init__.py +4 -2
  12. validmind/tests/__init__.py +97 -50
  13. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  14. validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
  15. validmind/tests/data_validation/ScatterPlot.py +8 -2
  16. validmind/tests/decorator.py +138 -14
  17. validmind/tests/model_validation/BertScore.py +1 -1
  18. validmind/tests/model_validation/BertScoreAggregate.py +1 -1
  19. validmind/tests/model_validation/BleuScore.py +1 -1
  20. validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
  21. validmind/tests/model_validation/ContextualRecall.py +1 -1
  22. validmind/tests/model_validation/FeaturesAUC.py +110 -0
  23. validmind/tests/model_validation/MeteorScore.py +1 -1
  24. validmind/tests/model_validation/RegardHistogram.py +1 -1
  25. validmind/tests/model_validation/RegardScore.py +1 -1
  26. validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
  27. validmind/tests/model_validation/RougeMetrics.py +1 -1
  28. validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
  29. validmind/tests/model_validation/SelfCheckNLIScore.py +1 -1
  30. validmind/tests/model_validation/TokenDisparity.py +1 -1
  31. validmind/tests/model_validation/ToxicityHistogram.py +1 -1
  32. validmind/tests/model_validation/ToxicityScore.py +1 -1
  33. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  34. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
  35. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +1 -1
  36. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
  37. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +15 -18
  38. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
  39. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  40. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
  41. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
  42. validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
  43. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
  44. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
  45. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
  46. validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
  47. validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
  48. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
  49. validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
  50. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +33 -3
  51. validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
  52. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
  53. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
  54. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
  55. validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
  56. validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
  57. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
  58. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
  59. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
  60. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
  61. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
  62. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +1 -1
  63. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
  64. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
  65. validmind/tests/test_providers.py +14 -124
  66. validmind/unit_metrics/__init__.py +76 -69
  67. validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
  68. validmind/unit_metrics/classification/sklearn/F1.py +13 -0
  69. validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
  70. validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
  71. validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
  72. validmind/unit_metrics/composite.py +24 -71
  73. validmind/unit_metrics/regression/GiniCoefficient.py +20 -26
  74. validmind/unit_metrics/regression/HuberLoss.py +12 -16
  75. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +18 -24
  76. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +7 -13
  77. validmind/unit_metrics/regression/MeanBiasDeviation.py +5 -14
  78. validmind/unit_metrics/regression/QuantileLoss.py +6 -16
  79. validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +12 -18
  80. validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +6 -15
  81. validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +5 -14
  82. validmind/unit_metrics/regression/sklearn/RSquaredScore.py +6 -15
  83. validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +11 -14
  84. validmind/utils.py +18 -45
  85. validmind/vm_models/__init__.py +0 -2
  86. validmind/vm_models/dataset.py +255 -16
  87. validmind/vm_models/test/metric.py +1 -2
  88. validmind/vm_models/test/result_wrapper.py +12 -13
  89. validmind/vm_models/test/test.py +2 -1
  90. validmind/vm_models/test/threshold_test.py +1 -2
  91. validmind/vm_models/test_suite/summary.py +3 -3
  92. validmind/vm_models/test_suite/test_suite.py +2 -1
  93. {validmind-2.0.7.dist-info → validmind-2.1.1.dist-info}/METADATA +10 -6
  94. {validmind-2.0.7.dist-info → validmind-2.1.1.dist-info}/RECORD +97 -96
  95. validmind/tests/__types__.py +0 -62
  96. validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
  97. validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
  98. validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
  99. validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
  100. validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -22
  101. validmind/unit_metrics/sklearn/classification/F1.py +0 -24
  102. validmind/unit_metrics/sklearn/classification/Precision.py +0 -24
  103. validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -22
  104. validmind/unit_metrics/sklearn/classification/Recall.py +0 -22
  105. validmind/vm_models/test/unit_metric.py +0 -88
  106. {validmind-2.0.7.dist-info → validmind-2.1.1.dist-info}/LICENSE +0 -0
  107. {validmind-2.0.7.dist-info → validmind-2.1.1.dist-info}/WHEEL +0 -0
  108. {validmind-2.0.7.dist-info → validmind-2.1.1.dist-info}/entry_points.txt +0 -0
@@ -1,172 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import numpy as np
8
- import pandas as pd
9
- import plotly.graph_objects as go
10
- from plotly.subplots import make_subplots
11
-
12
- from validmind.vm_models import Figure, Metric
13
-
14
-
15
- @dataclass
16
- class LogisticRegCumulativeProb(Metric):
17
- """
18
- Visualizes cumulative probabilities of positive and negative classes for both training and testing in logistic
19
- regression models.
20
-
21
- **Purpose**: This metric is utilized to evaluate the distribution of predicted probabilities for positive and
22
- negative classes in a logistic regression model. It's not solely intended to measure the model's performance but
23
- also provides a visual assessment of the model's behavior by plotting the cumulative probabilities for positive and
24
- negative classes across both the training and test datasets.
25
-
26
- **Test Mechanism**: The logistic regression model is evaluated by first computing the predicted probabilities for
27
- each instance in both the training and test datasets, which are then added as a new column in these sets. The
28
- cumulative probabilities for positive and negative classes are subsequently calculated and sorted in ascending
29
- order. Cumulative distributions of these probabilities are created for both positive and negative classes across
30
- both training and test datasets. These cumulative probabilities are represented visually in a plot, containing two
31
- subplots - one for the training data and the other for the test data, with lines representing cumulative
32
- distributions of positive and negative classes.
33
-
34
- **Signs of High Risk**:
35
- - Imbalanced distribution of probabilities for either positive or negative classes.
36
- - Notable discrepancies or significant differences between the cumulative probability distributions for the
37
- training data versus the test data.
38
- - Marked discrepancies or large differences between the cumulative probability distributions for positive and
39
- negative classes.
40
-
41
- **Strengths**:
42
- - It offers not only numerical probabilities but also provides a visual illustration of data, which enhances the
43
- ease of understanding and interpreting the model's behavior.
44
- - Allows for the comparison of model's behavior across training and testing datasets, providing insights about how
45
- well the model is generalized.
46
- - It differentiates between positive and negative classes and their respective distribution patterns, which can aid
47
- in problem diagnosis.
48
-
49
- **Limitations**:
50
- - Exclusive to classification tasks and specifically to logistic regression models.
51
- - Graphical results necessitate human interpretation and may not be directly applicable for automated risk
52
- detection.
53
- - The method does not give a solitary quantifiable measure of model risk, rather it offers a visual representation
54
- and broad distributional information.
55
- - If the training and test datasets are not representative of the overall data distribution, the metric could
56
- provide misleading results.
57
- """
58
-
59
- name = "logistic_reg_cumulative_prob"
60
- required_inputs = ["model", "datasets"]
61
- metadata = {
62
- "task_types": ["classification"],
63
- "tags": ["logistic_regression", "visualization"],
64
- }
65
- default_params = {"title": "Cumulative Probabilities"}
66
-
67
- @staticmethod
68
- def compute_probabilities(model, X):
69
- """
70
- Predict probabilities and add them as a new column in X
71
- """
72
- probabilities = model.predict(X)
73
- X["probabilities"] = probabilities
74
- return X
75
-
76
- @staticmethod
77
- def plot_cumulative_prob(df_train, df_test, prob_col, target_col, title):
78
- # Separate probabilities based on target column
79
- train_0 = np.sort(df_train[df_train[target_col] == 0][prob_col])
80
- train_1 = np.sort(df_train[df_train[target_col] == 1][prob_col])
81
- test_0 = np.sort(df_test[df_test[target_col] == 0][prob_col])
82
- test_1 = np.sort(df_test[df_test[target_col] == 1][prob_col])
83
-
84
- # Calculate cumulative distributions
85
- cumulative_train_0 = np.cumsum(train_0) / np.sum(train_0)
86
- cumulative_train_1 = np.cumsum(train_1) / np.sum(train_1)
87
- cumulative_test_0 = np.cumsum(test_0) / np.sum(test_0)
88
- cumulative_test_1 = np.cumsum(test_1) / np.sum(test_1)
89
-
90
- # Create subplot
91
- fig = make_subplots(rows=1, cols=2, subplot_titles=("Train Data", "Test Data"))
92
-
93
- # Create line plots for training data
94
- trace_train_0 = go.Scatter(
95
- x=train_0,
96
- y=cumulative_train_0,
97
- mode="lines",
98
- name=f"Train {target_col} = 0",
99
- )
100
- trace_train_1 = go.Scatter(
101
- x=train_1,
102
- y=cumulative_train_1,
103
- mode="lines",
104
- name=f"Train {target_col} = 1",
105
- )
106
-
107
- # Create line plots for testing data
108
- trace_test_0 = go.Scatter(
109
- x=test_0, y=cumulative_test_0, mode="lines", name=f"Test {target_col} = 0"
110
- )
111
- trace_test_1 = go.Scatter(
112
- x=test_1, y=cumulative_test_1, mode="lines", name=f"Test {target_col} = 1"
113
- )
114
-
115
- # Add traces to the subplots
116
- fig.add_trace(trace_train_0, row=1, col=1)
117
- fig.add_trace(trace_train_1, row=1, col=1)
118
- fig.add_trace(trace_test_0, row=1, col=2)
119
- fig.add_trace(trace_test_1, row=1, col=2)
120
-
121
- # Update layout
122
- fig.update_layout(title_text=title)
123
-
124
- return fig
125
-
126
- def run(self):
127
- model = (
128
- self.inputs.model[0]
129
- if isinstance(self.inputs.model, list)
130
- else self.inputs.model
131
- )
132
-
133
- target_column = self.datasets[0].target_column
134
- title = self.params["title"]
135
-
136
- # Create a copy of training and testing dataframes
137
- df_train = self.datasets[0].df.copy()
138
- df_test = self.datasets[1].df.copy()
139
-
140
- # Drop target_column to create feature dataframes
141
- X_train = df_train.drop(columns=[target_column])
142
- X_test = df_test.drop(columns=[target_column])
143
-
144
- # Subset only target_column to create target dataframes
145
- y_train = df_train[[target_column]]
146
- y_test = df_test[[target_column]]
147
-
148
- X_train = self.compute_probabilities(model, X_train)
149
- X_test = self.compute_probabilities(model, X_test)
150
-
151
- df_train = pd.concat([X_train, y_train], axis=1)
152
- df_test = pd.concat([X_test, y_test], axis=1)
153
-
154
- fig = self.plot_cumulative_prob(
155
- df_train, df_test, "probabilities", target_column, title
156
- )
157
-
158
- return self.cache_results(
159
- metric_value={
160
- "cum_prob": {
161
- "train_probs": list(X_train["probabilities"]),
162
- "test_probs": list(X_test["probabilities"]),
163
- },
164
- },
165
- figures=[
166
- Figure(
167
- for_object=self,
168
- key="cum_prob",
169
- figure=fig,
170
- )
171
- ],
172
- )
@@ -1,181 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import numpy as np
8
- import pandas as pd
9
- import plotly.graph_objects as go
10
-
11
- from validmind.vm_models import Figure, Metric
12
-
13
-
14
- @dataclass
15
- class ScorecardBucketHistogram(Metric):
16
- """
17
- Evaluates and visualizes distribution of risk categories in a classification model's scores, useful in credit risk
18
- assessment.
19
-
20
- **Purpose**: The 'Scorecard Bucket Histogram' is employed as a metric to evaluate the performance of a
21
- classification model, specifically in credit risk assessment. It categorizes model scores into different rating
22
- classes, and visualizes the distribution of scores or probabilities within each class. It essentially measures how
23
- different risk categories (classes) are distributed in the model scores and provides insight into the model's
24
- classification ability. This makes it particularly useful in credit scoring and risk modeling where understanding
25
- the probability of default is critical.
26
-
27
- **Test Mechanism**: The test works by computing the probabilities for each record in the test and train dataset
28
- using the model's predict function. Subsequently, it calculates the scores using a formula incorporating target
29
- score, target odds, and points to double odds (PDO). The scores are then bucketed into predefined rating classes
30
- (such as 'A', 'B', 'C', 'D') and plotted in a histogram for both the train and test datasets. The target score,
31
- target odds, points to double the odds (PDO), and rating classes are customizable parameters, providing flexibility
32
- in test metrics based on differing model or industry norms.
33
-
34
- **Signs of High Risk**:
35
-
36
- - Disproportionate scores within rating classes
37
- - Excessive overlap between classes
38
- - Inconsistent distribution of scores between the training and testing datasets
39
-
40
- If the model is accurately classifying and risk is being evenly distributed, we would anticipate smooth and
41
- relatively balanced histograms within classes.
42
-
43
- **Strengths**:
44
-
45
- - Provides a quick visual snapshot of score distribution
46
- - Breaks down complex predictions into simple, understandable classes, making it easily interpretable for both
47
- technical and non-technical audiences
48
- - Caters to customization of parameters
49
- - Gives ownership of the class definitions to the user
50
- - Useful in the field of credit risk, providing a clear understanding of which class or 'bucket' a potential
51
- borrower belongs to
52
-
53
- **Limitations**:
54
-
55
- - Relies on manual setting of classes and other parameters (like target score, target odds, and PDO), potentially
56
- leading to arbitrary classifications and potential bias if not judiciously performed
57
- - Effectiveness can be limited with non-tabular data
58
- - Doesn't provide a numerical value easily compared across different models or runs as the output is primarily
59
- visual
60
- - Might not present a complete view of model performance and should be used in conjunction with other metrics
61
- """
62
-
63
- name = "scorecard_bucket_histogram"
64
- required_inputs = ["model", "datasets"]
65
- metadata = {
66
- "task_types": ["classification"],
67
- "tags": ["tabular_data", "visualization", "credit_risk"],
68
- }
69
- default_params = {
70
- "title": "Distribution of Scores by Rating Classes",
71
- "target_score": 600,
72
- "target_odds": 50,
73
- "pdo": 20,
74
- "rating_classes": ["A", "B", "C", "D"],
75
- }
76
-
77
- @staticmethod
78
- def compute_probabilities(model, X):
79
- """
80
- Predict probabilities and add them as a new column in X
81
- """
82
- probabilities = model.predict(X)
83
- X["probabilities"] = probabilities
84
- return X
85
-
86
- @staticmethod
87
- def compute_scores(X, target_score, target_odds, pdo):
88
- X_copy = X.copy()
89
- factor = pdo / np.log(2)
90
- offset = target_score - (factor * np.log(target_odds))
91
-
92
- X_copy["score"] = offset + factor * np.log(
93
- X_copy["probabilities"] / (1 - X_copy["probabilities"])
94
- )
95
-
96
- return X_copy
97
-
98
- @staticmethod
99
- def plot_score_bucket_histogram(df, score_col, title, rating_classes):
100
- df["bucket"] = pd.cut(
101
- df[score_col], bins=len(rating_classes), labels=rating_classes, right=False
102
- )
103
-
104
- fig = go.Figure()
105
-
106
- color_scale = [[0.0, "rgba(178, 24, 43, 1)"], [1.0, "rgba(33, 102, 172, 1)"]]
107
-
108
- for bucket in rating_classes:
109
- df_bucket = df[df["bucket"] == bucket]
110
- bucket_values = df_bucket[score_col]
111
- fig.add_trace(
112
- go.Histogram(
113
- x=bucket_values,
114
- name=bucket,
115
- opacity=0.6,
116
- )
117
- )
118
-
119
- fig.update_layout(
120
- title_text=title,
121
- xaxis_title="",
122
- yaxis_title="Frequency",
123
- barmode="overlay",
124
- coloraxis=dict(colorscale=color_scale, colorbar=dict(title="")),
125
- )
126
-
127
- return fig
128
-
129
- def run(self):
130
- title = self.params["title"]
131
- target_score = self.params["target_score"]
132
- target_odds = self.params["target_odds"]
133
- pdo = self.params["pdo"]
134
- rating_classes = self.params["rating_classes"]
135
-
136
- X_train = self.inputs.datasets[0].x.copy()
137
- X_test = self.inputs.datasets[1].x.copy()
138
-
139
- X_train_probs = self.compute_probabilities(self.inputs.model, X_train)
140
- X_test_probs = self.compute_probabilities(self.inputs.model, X_test)
141
-
142
- df_train_scores = self.compute_scores(
143
- X_train_probs, target_score, target_odds, pdo
144
- )
145
- df_test_scores = self.compute_scores(
146
- X_test_probs, target_score, target_odds, pdo
147
- )
148
-
149
- fig_train = self.plot_score_bucket_histogram(
150
- df_train_scores,
151
- "score",
152
- title + " - Train Data",
153
- rating_classes,
154
- )
155
- fig_test = self.plot_score_bucket_histogram(
156
- df_test_scores,
157
- "score",
158
- title + " - Test Data",
159
- rating_classes,
160
- )
161
-
162
- return self.cache_results(
163
- metric_value={
164
- "score_distribution": {
165
- "train_scores": list(df_train_scores["score"]),
166
- "test_scores": list(df_test_scores["score"]),
167
- },
168
- },
169
- figures=[
170
- Figure(
171
- for_object=self,
172
- key="score_distribution_train",
173
- figure=fig_train,
174
- ),
175
- Figure(
176
- for_object=self,
177
- key="score_distribution_test",
178
- figure=fig_test,
179
- ),
180
- ],
181
- )
@@ -1,175 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- import numpy as np
8
- import pandas as pd
9
- import plotly.graph_objects as go
10
-
11
- from validmind.vm_models import Figure, Metric
12
-
13
-
14
- @dataclass
15
- class ScorecardProbabilitiesHistogram(Metric):
16
- """
17
- Evaluates risk classification of a model by visualizing the distribution of default probability across score
18
- buckets.
19
-
20
- **Purpose**: The Scorecard Probabilities Histogram, a specific metric used within the credit risk domain, is
21
- designed to evaluate and visualize risk classification of a model. It aims at examining the distribution of the
22
- probability of default across varied score buckets, with the score buckets being categories that entities (e.g.,
23
- loan applicants) are classed under based on their predicted default risks. The key idea is to ensure that the model
24
- accurately classifies entities into appropriate risk categories (score buckets) and aptly represents their default
25
- probabilities.
26
-
27
- **Test Mechanism**: The mechanism behind the Scorecard Probabilities Histogram includes several steps. It starts
28
- with the calculation of default probabilities by the 'compute_probabilities' method, where the resulting
29
- probability is added as a fresh column to the input dataset. Following that, scores are computed using these
30
- probabilities, a target score, target odds, and a Points to Double the odds (pdo) factor by the 'compute_scores'
31
- method. These scores are then bucketed via the 'compute_buckets' method. A histogram is then plotted for each score
32
- bucket, with default probabilities as the x-axis and their frequency as the y-axis - implemented within the
33
- 'plot_probabilities_histogram' method. This entire process is executed distinctly for both training and testing
34
- datasets.
35
-
36
- **Signs of High Risk**:
37
- - A significant overlap of different score buckets in the histogram indicates that the model is not efficiently
38
- distinguishing between various risk categories.
39
- - If very high or low probabilities are commonplace across all buckets, the model's predictions could be skewed.
40
-
41
- **Strengths**:
42
- - The Scorecard Probabilities Histogram allows for the visualization and analysis of the predicted default risk
43
- distribution across different risk classes, thereby facilitating a visual inspection of the model's performance and
44
- calibration for various risk categories.
45
- - It provides a means to visualize how these classifications are distributed on the training and testing datasets
46
- separately, contributing to a better comprehension of model generalization.
47
-
48
- **Limitations**:
49
- - The Scorecard Probabilities Histogram assumes linear and equally spaced risk categories, which might not always
50
- hold true.
51
- - If there are too few or too many score buckets, the visualization may not convey sufficient information.
52
- - While it effectively illustrates the distribution of probabilities, it does not provide adequate numerical
53
- metrics or threshold to definitively evaluate the model's performance. A more accurate evaluation necessitates its
54
- usage in conjunction with other metrics and tools including the confusion matrix, AUC-ROC, Precision, Recall, and
55
- so forth.
56
- """
57
-
58
- name = "scorecard_probabilities_histogram"
59
- required_inputs = ["model"]
60
- metadata = {
61
- "task_types": ["classification"],
62
- "tags": ["tabular_data", "visualization", "credit_risk"],
63
- }
64
- default_params = {
65
- "title": "Probability of Default by Score Bucket",
66
- "target_score": 600,
67
- "target_odds": 50,
68
- "pdo": 20,
69
- "score_buckets": ["A", "B", "C", "D"],
70
- }
71
-
72
- @staticmethod
73
- def compute_probabilities(model, X):
74
- """
75
- Predict probabilities and add them as a new column in X
76
- """
77
- probabilities = model.predict(X)
78
- X["probabilities"] = probabilities
79
- return X
80
-
81
- @staticmethod
82
- def compute_scores(X, target_score, target_odds, pdo):
83
- X_copy = X.copy()
84
- factor = pdo / np.log(2)
85
- offset = target_score - (factor * np.log(target_odds))
86
-
87
- X_copy["score"] = offset + factor * np.log(
88
- X_copy["probabilities"] / (1 - X_copy["probabilities"])
89
- )
90
-
91
- return X_copy
92
-
93
- @staticmethod
94
- def compute_buckets(X, score_buckets):
95
- X["bucket"] = pd.qcut(X["score"], q=len(score_buckets), labels=score_buckets)
96
- return X
97
-
98
- @staticmethod
99
- def plot_probabilities_histogram(df, title, score_buckets):
100
- fig = go.Figure()
101
-
102
- for bucket in score_buckets:
103
- df_bucket = df[df["bucket"] == bucket]
104
- bucket_values = df_bucket["probabilities"]
105
- fig.add_trace(
106
- go.Histogram(
107
- x=bucket_values,
108
- name=bucket,
109
- opacity=0.6,
110
- )
111
- )
112
-
113
- fig.update_layout(
114
- title_text=title,
115
- xaxis_title="Probability",
116
- yaxis_title="Frequency",
117
- barmode="overlay",
118
- )
119
-
120
- return fig
121
-
122
- def run(self):
123
- title = self.params["title"]
124
- target_score = self.params["target_score"]
125
- target_odds = self.params["target_odds"]
126
- pdo = self.params["pdo"]
127
- score_buckets = self.params["score_buckets"]
128
-
129
- X_train = self.inputs.datasets[0].x.copy()
130
- X_test = self.inputs.datasets[1].x.copy()
131
-
132
- X_train_probs = self.compute_probabilities(self.inputs.model, X_train)
133
- X_test_probs = self.compute_probabilities(self.inputs.model, X_test)
134
-
135
- df_train_scores = self.compute_scores(
136
- X_train_probs, target_score, target_odds, pdo
137
- )
138
- df_test_scores = self.compute_scores(
139
- X_test_probs, target_score, target_odds, pdo
140
- )
141
-
142
- df_train_buckets = self.compute_buckets(df_train_scores, score_buckets)
143
- df_test_buckets = self.compute_buckets(df_test_scores, score_buckets)
144
-
145
- fig_train = self.plot_probabilities_histogram(
146
- df_train_buckets,
147
- title + " - Train Data",
148
- score_buckets,
149
- )
150
- fig_test = self.plot_probabilities_histogram(
151
- df_test_buckets,
152
- title + " - Test Data",
153
- score_buckets,
154
- )
155
-
156
- return self.cache_results(
157
- metric_value={
158
- "probability_distribution": {
159
- "train_probs": list(df_train_buckets["probabilities"]),
160
- "test_probs": list(df_test_buckets["probabilities"]),
161
- },
162
- },
163
- figures=[
164
- Figure(
165
- for_object=self,
166
- key="probability_distribution_train",
167
- figure=fig_train,
168
- ),
169
- Figure(
170
- for_object=self,
171
- key="probability_distribution_test",
172
- figure=fig_test,
173
- ),
174
- ],
175
- )
@@ -1,22 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- from sklearn.metrics import accuracy_score
8
-
9
- from validmind.vm_models import UnitMetric
10
-
11
-
12
- @dataclass
13
- class Accuracy(UnitMetric):
14
- required_inputs = ["dataset", "model"]
15
-
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
-
20
- value = accuracy_score(y_true, y_pred, **self.params)
21
-
22
- return self.cache_results(metric_value=value)
@@ -1,24 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- from sklearn.metrics import f1_score
8
-
9
- from validmind.vm_models import UnitMetric
10
-
11
-
12
- @dataclass
13
- class F1(UnitMetric):
14
- required_inputs = ["dataset", "model"]
15
-
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
-
20
- value = f1_score(y_true, y_pred, **self.params)
21
-
22
- return self.cache_results(
23
- metric_value=value,
24
- )
@@ -1,24 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- from sklearn.metrics import precision_score
8
-
9
- from validmind.vm_models import UnitMetric
10
-
11
-
12
- @dataclass
13
- class Precision(UnitMetric):
14
- required_inputs = ["dataset", "model"]
15
-
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
-
20
- value = precision_score(y_true, y_pred, **self.params)
21
-
22
- return self.cache_results(
23
- metric_value=value,
24
- )
@@ -1,22 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- from sklearn.metrics import roc_auc_score
8
-
9
- from validmind.vm_models import UnitMetric
10
-
11
-
12
- @dataclass
13
- class ROC_AUC(UnitMetric):
14
- required_inputs = ["dataset", "model"]
15
-
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
-
20
- value = roc_auc_score(y_true, y_pred, **self.params)
21
-
22
- return self.cache_results(metric_value=value)
@@ -1,22 +0,0 @@
1
- # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
- # See the LICENSE file in the root of this repository for details.
3
- # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
-
5
- from dataclasses import dataclass
6
-
7
- from sklearn.metrics import recall_score
8
-
9
- from validmind.vm_models import UnitMetric
10
-
11
-
12
- @dataclass
13
- class Recall(UnitMetric):
14
- required_inputs = ["dataset", "model"]
15
-
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
-
20
- value = recall_score(y_true, y_pred, **self.params)
21
-
22
- return self.cache_results(metric_value=value)