validmind 2.7.5__py3-none-any.whl → 2.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/datasets/credit_risk/lending_club.py +354 -88
  3. validmind/tests/data_validation/HighPearsonCorrelation.py +12 -2
  4. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +218 -0
  5. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +153 -0
  6. validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +144 -0
  7. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +146 -0
  8. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +191 -0
  9. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +176 -0
  10. validmind/tests/ongoing_monitoring/FeatureDrift.py +120 -121
  11. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +18 -23
  12. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +86 -45
  13. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +202 -0
  14. validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +97 -0
  15. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +149 -0
  16. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +210 -0
  17. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +207 -0
  18. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +91 -14
  19. validmind/vm_models/dataset/dataset.py +0 -4
  20. {validmind-2.7.5.dist-info → validmind-2.7.6.dist-info}/METADATA +2 -2
  21. {validmind-2.7.5.dist-info → validmind-2.7.6.dist-info}/RECORD +24 -13
  22. {validmind-2.7.5.dist-info → validmind-2.7.6.dist-info}/LICENSE +0 -0
  23. {validmind-2.7.5.dist-info → validmind-2.7.6.dist-info}/WHEEL +0 -0
  24. {validmind-2.7.5.dist-info → validmind-2.7.6.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,191 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from sklearn.metrics import confusion_matrix
8
+ from typing import List
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ @tags(
14
+ "sklearn", "binary_classification", "multiclass_classification", "model_performance"
15
+ )
16
+ @tasks("classification", "text_classification")
17
+ def ConfusionMatrixDrift(
18
+ datasets: List[VMDataset], model: VMModel, drift_pct_threshold=20
19
+ ):
20
+ """
21
+ Compares confusion matrix metrics between reference and monitoring datasets.
22
+
23
+ ### Purpose
24
+
25
+ The Confusion Matrix Drift test is designed to evaluate changes in the model's error patterns
26
+ over time. By comparing confusion matrix elements between reference and monitoring datasets, this
27
+ test helps identify whether the model maintains consistent prediction behavior in production. This
28
+ is crucial for understanding if the model's error patterns have shifted and whether specific types
29
+ of misclassifications have become more prevalent.
30
+
31
+ ### Test Mechanism
32
+
33
+ This test proceeds by generating confusion matrices for both reference and monitoring datasets.
34
+ For binary classification, it tracks True Positives, True Negatives, False Positives, and False
35
+ Negatives as percentages of total predictions. For multiclass problems, it analyzes per-class
36
+ metrics including true positives and error rates. The test quantifies drift as percentage changes
37
+ in these metrics between datasets, providing detailed insight into shifting prediction patterns.
38
+
39
+ ### Signs of High Risk
40
+
41
+ - Large drifts in confusion matrix elements exceeding threshold
42
+ - Systematic changes in false positive or false negative rates
43
+ - Inconsistent changes across different classes
44
+ - Significant shifts in error patterns for specific classes
45
+ - Unexpected improvements in certain metrics
46
+ - Divergent trends between different types of errors
47
+
48
+ ### Strengths
49
+
50
+ - Provides detailed analysis of prediction behavior
51
+ - Identifies specific types of prediction changes
52
+ - Enables early detection of systematic errors
53
+ - Includes comprehensive error pattern analysis
54
+ - Supports both binary and multiclass problems
55
+ - Maintains interpretable percentage-based metrics
56
+
57
+ ### Limitations
58
+
59
+ - May be sensitive to class distribution changes
60
+ - Cannot identify root causes of prediction drift
61
+ - Requires sufficient samples for reliable comparison
62
+ - Limited to hard predictions (not probabilities)
63
+ - May not capture subtle changes in decision boundaries
64
+ - Complex interpretation for multiclass problems
65
+ """
66
+ # Get predictions and true values for reference dataset
67
+ y_pred_ref = datasets[0].y_pred(model)
68
+ y_true_ref = datasets[0].y.astype(y_pred_ref.dtype)
69
+
70
+ # Get predictions and true values for monitoring dataset
71
+ y_pred_mon = datasets[1].y_pred(model)
72
+ y_true_mon = datasets[1].y.astype(y_pred_mon.dtype)
73
+
74
+ # Get unique labels from reference dataset
75
+ labels = np.unique(y_true_ref)
76
+ labels = sorted(labels.tolist())
77
+
78
+ # Calculate confusion matrices
79
+ cm_ref = confusion_matrix(y_true_ref, y_pred_ref, labels=labels)
80
+ cm_mon = confusion_matrix(y_true_mon, y_pred_mon, labels=labels)
81
+
82
+ # Get total counts
83
+ total_ref = len(y_true_ref)
84
+ total_mon = len(y_true_mon)
85
+
86
+ # Create sample counts table
87
+ counts_data = {
88
+ "Dataset": ["Reference", "Monitoring"],
89
+ "Total": [total_ref, total_mon],
90
+ }
91
+
92
+ # Add per-class counts
93
+ for label in labels:
94
+ label_str = f"Class_{label}"
95
+ counts_data[label_str] = [
96
+ np.sum(y_true_ref == label),
97
+ np.sum(y_true_mon == label),
98
+ ]
99
+
100
+ counts_df = pd.DataFrame(counts_data)
101
+
102
+ # Create confusion matrix metrics
103
+ metrics = []
104
+
105
+ if len(labels) == 2:
106
+ # Binary classification
107
+ tn_ref, fp_ref, fn_ref, tp_ref = cm_ref.ravel()
108
+ tn_mon, fp_mon, fn_mon, tp_mon = cm_mon.ravel()
109
+
110
+ confusion_elements = [
111
+ ("True Negatives (%)", tn_ref / total_ref * 100, tn_mon / total_mon * 100),
112
+ ("False Positives (%)", fp_ref / total_ref * 100, fp_mon / total_mon * 100),
113
+ ("False Negatives (%)", fn_ref / total_ref * 100, fn_mon / total_mon * 100),
114
+ ("True Positives (%)", tp_ref / total_ref * 100, tp_mon / total_mon * 100),
115
+ ]
116
+
117
+ for name, ref_val, mon_val in confusion_elements:
118
+ metrics.append(
119
+ {
120
+ "Metric": name,
121
+ "Reference": round(ref_val, 2),
122
+ "Monitoring": round(mon_val, 2),
123
+ }
124
+ )
125
+
126
+ else:
127
+ # Multiclass - calculate per-class metrics
128
+ for i, label in enumerate(labels):
129
+ # True Positives for this class
130
+ tp_ref = cm_ref[i, i]
131
+ tp_mon = cm_mon[i, i]
132
+
133
+ # False Positives (sum of column minus TP)
134
+ fp_ref = cm_ref[:, i].sum() - tp_ref
135
+ fp_mon = cm_mon[:, i].sum() - tp_mon
136
+
137
+ # False Negatives (sum of row minus TP)
138
+ fn_ref = cm_ref[i, :].sum() - tp_ref
139
+ fn_mon = cm_mon[i, :].sum() - tp_mon
140
+
141
+ class_metrics = [
142
+ (
143
+ f"True Positives_{label} (%)",
144
+ tp_ref / total_ref * 100,
145
+ tp_mon / total_mon * 100,
146
+ ),
147
+ (
148
+ f"False Positives_{label} (%)",
149
+ fp_ref / total_ref * 100,
150
+ fp_mon / total_mon * 100,
151
+ ),
152
+ (
153
+ f"False Negatives_{label} (%)",
154
+ fn_ref / total_ref * 100,
155
+ fn_mon / total_mon * 100,
156
+ ),
157
+ ]
158
+
159
+ for name, ref_val, mon_val in class_metrics:
160
+ metrics.append(
161
+ {
162
+ "Metric": name,
163
+ "Reference": round(ref_val, 2),
164
+ "Monitoring": round(mon_val, 2),
165
+ }
166
+ )
167
+
168
+ # Create metrics DataFrame
169
+ metrics_df = pd.DataFrame(metrics)
170
+
171
+ # Calculate drift percentage with direction
172
+ metrics_df["Drift (%)"] = (
173
+ (metrics_df["Monitoring"] - metrics_df["Reference"])
174
+ / metrics_df["Reference"].abs()
175
+ * 100
176
+ ).round(2)
177
+
178
+ # Add Pass/Fail column based on absolute drift
179
+ metrics_df["Pass/Fail"] = (
180
+ metrics_df["Drift (%)"]
181
+ .abs()
182
+ .apply(lambda x: "Pass" if x < drift_pct_threshold else "Fail")
183
+ )
184
+
185
+ # Calculate overall pass/fail
186
+ pass_fail_bool = (metrics_df["Pass/Fail"] == "Pass").all()
187
+
188
+ return (
189
+ {"Confusion Matrix Metrics": metrics_df, "Sample Counts": counts_df},
190
+ pass_fail_bool,
191
+ )
@@ -0,0 +1,176 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import numpy as np
6
+ import plotly.graph_objects as go
7
+ from plotly.subplots import make_subplots
8
+ from typing import List
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
11
+
12
+
13
+ @tags("visualization", "credit_risk")
14
+ @tasks("classification")
15
+ def CumulativePredictionProbabilitiesDrift(
16
+ datasets: List[VMDataset],
17
+ model: VMModel,
18
+ ):
19
+ """
20
+ Compares cumulative prediction probability distributions between reference and monitoring datasets.
21
+
22
+ ### Purpose
23
+
24
+ The Cumulative Prediction Probabilities Drift test is designed to evaluate changes in the model's
25
+ probability predictions over time. By comparing cumulative distribution functions of predicted
26
+ probabilities between reference and monitoring datasets, this test helps identify whether the
27
+ model's probability assignments remain stable in production. This is crucial for understanding if
28
+ the model's risk assessment behavior has shifted and whether its probability calibration remains
29
+ consistent.
30
+
31
+ ### Test Mechanism
32
+
33
+ This test proceeds by generating cumulative distribution functions (CDFs) of predicted probabilities
34
+ for both reference and monitoring datasets. For each class, it plots the cumulative proportion of
35
+ predictions against probability values, enabling direct comparison of probability distributions.
36
+ The test visualizes both the CDFs and their differences, providing insight into how probability
37
+ assignments have shifted across the entire probability range.
38
+
39
+ ### Signs of High Risk
40
+
41
+ - Large gaps between reference and monitoring CDFs
42
+ - Systematic shifts in probability assignments
43
+ - Concentration of differences in specific probability ranges
44
+ - Changes in the shape of probability distributions
45
+ - Unexpected patterns in cumulative differences
46
+ - Significant shifts in probability thresholds
47
+
48
+ ### Strengths
49
+
50
+ - Provides comprehensive view of probability changes
51
+ - Identifies specific probability ranges with drift
52
+ - Enables visualization of distribution differences
53
+ - Supports analysis across multiple classes
54
+ - Maintains interpretable probability scale
55
+ - Captures subtle changes in probability assignments
56
+
57
+ ### Limitations
58
+
59
+ - Does not provide single drift metric
60
+ - May be complex to interpret for multiple classes
61
+ - Cannot suggest probability recalibration
62
+ - Requires visual inspection for assessment
63
+ - Sensitive to sample size differences
64
+ - May not capture class-specific calibration issues
65
+ """
66
+ # Get predictions and true values
67
+ y_prob_ref = datasets[0].y_prob(model)
68
+ df_ref = datasets[0].df.copy()
69
+ df_ref["probabilities"] = y_prob_ref
70
+
71
+ y_prob_mon = datasets[1].y_prob(model)
72
+ df_mon = datasets[1].df.copy()
73
+ df_mon["probabilities"] = y_prob_mon
74
+
75
+ # Get unique classes
76
+ classes = sorted(df_ref[datasets[0].target_column].unique())
77
+
78
+ # Define colors
79
+ ref_color = "rgba(31, 119, 180, 0.8)" # Blue with 0.8 opacity
80
+ mon_color = "rgba(255, 127, 14, 0.8)" # Orange with 0.8 opacity
81
+ diff_color = "rgba(148, 103, 189, 0.8)" # Purple with 0.8 opacity
82
+
83
+ figures = []
84
+ for class_value in classes:
85
+ # Create figure with secondary y-axis
86
+ fig = make_subplots(
87
+ rows=2,
88
+ cols=1,
89
+ subplot_titles=[
90
+ f"Cumulative Distributions - Class {class_value}",
91
+ "Difference (Monitoring - Reference)",
92
+ ],
93
+ vertical_spacing=0.15,
94
+ shared_xaxes=True,
95
+ )
96
+
97
+ # Get probabilities for current class
98
+ ref_probs = df_ref[df_ref[datasets[0].target_column] == class_value][
99
+ "probabilities"
100
+ ]
101
+ mon_probs = df_mon[df_mon[datasets[1].target_column] == class_value][
102
+ "probabilities"
103
+ ]
104
+
105
+ # Calculate cumulative distributions
106
+ ref_sorted = np.sort(ref_probs)
107
+ ref_cumsum = np.arange(len(ref_sorted)) / float(len(ref_sorted))
108
+
109
+ mon_sorted = np.sort(mon_probs)
110
+ mon_cumsum = np.arange(len(mon_sorted)) / float(len(mon_sorted))
111
+
112
+ # Reference dataset cumulative curve
113
+ fig.add_trace(
114
+ go.Scatter(
115
+ x=ref_sorted,
116
+ y=ref_cumsum,
117
+ mode="lines",
118
+ name="Reference",
119
+ line=dict(color=ref_color, width=2),
120
+ ),
121
+ row=1,
122
+ col=1,
123
+ )
124
+
125
+ # Monitoring dataset cumulative curve
126
+ fig.add_trace(
127
+ go.Scatter(
128
+ x=mon_sorted,
129
+ y=mon_cumsum,
130
+ mode="lines",
131
+ name="Monitoring",
132
+ line=dict(color=mon_color, width=2),
133
+ ),
134
+ row=1,
135
+ col=1,
136
+ )
137
+
138
+ # Calculate and plot difference
139
+ # Interpolate monitoring values to match reference x-points
140
+ mon_interp = np.interp(ref_sorted, mon_sorted, mon_cumsum)
141
+ difference = mon_interp - ref_cumsum
142
+
143
+ fig.add_trace(
144
+ go.Scatter(
145
+ x=ref_sorted,
146
+ y=difference,
147
+ mode="lines",
148
+ name="Difference",
149
+ line=dict(color=diff_color, width=2),
150
+ ),
151
+ row=2,
152
+ col=1,
153
+ )
154
+
155
+ # Add horizontal line at y=0 for difference plot
156
+ fig.add_hline(y=0, line=dict(color="grey", dash="dash"), row=2, col=1)
157
+
158
+ # Update layout
159
+ fig.update_layout(
160
+ height=600,
161
+ width=800,
162
+ showlegend=True,
163
+ legend=dict(yanchor="middle", y=0.9, xanchor="left", x=1.05),
164
+ )
165
+
166
+ # Update axes
167
+ fig.update_xaxes(title_text="Probability", range=[0, 1], row=2, col=1)
168
+ fig.update_xaxes(range=[0, 1], row=1, col=1)
169
+ fig.update_yaxes(
170
+ title_text="Cumulative Distribution", range=[0, 1], row=1, col=1
171
+ )
172
+ fig.update_yaxes(title_text="Difference", row=2, col=1)
173
+
174
+ figures.append(fig)
175
+
176
+ return tuple(figures)
@@ -2,18 +2,99 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
-
6
- import matplotlib.pyplot as plt
7
5
  import numpy as np
8
6
  import pandas as pd
9
-
7
+ import plotly.graph_objects as go
10
8
  from validmind import tags, tasks
11
9
 
12
10
 
11
+ def calculate_psi_score(actual, expected):
12
+ """Calculate PSI score for a single bucket."""
13
+ return (actual - expected) * np.log((actual + 1e-6) / (expected + 1e-6))
14
+
15
+
16
+ def calculate_feature_distributions(
17
+ reference_data, monitoring_data, feature_columns, bins
18
+ ):
19
+ """Calculate population distributions for each feature."""
20
+ # Calculate quantiles from reference data
21
+ quantiles = reference_data[feature_columns].quantile(
22
+ bins, method="single", interpolation="nearest"
23
+ )
24
+
25
+ distributions = {}
26
+ for dataset_name, data in [
27
+ ("reference", reference_data),
28
+ ("monitoring", monitoring_data),
29
+ ]:
30
+ for feature in feature_columns:
31
+ for bin_idx, threshold in enumerate(quantiles[feature]):
32
+ if bin_idx == 0:
33
+ mask = data[feature] < threshold
34
+ else:
35
+ prev_threshold = quantiles[feature][bins[bin_idx - 1]]
36
+ mask = (data[feature] >= prev_threshold) & (
37
+ data[feature] < threshold
38
+ )
39
+
40
+ count = mask.sum()
41
+ proportion = count / len(data)
42
+ distributions[(dataset_name, feature, bins[bin_idx])] = proportion
43
+
44
+ return distributions
45
+
46
+
47
+ def create_distribution_plot(feature_name, reference_dist, monitoring_dist, bins):
48
+ """Create population distribution plot for a feature."""
49
+ fig = go.Figure()
50
+
51
+ # Add reference distribution
52
+ fig.add_trace(
53
+ go.Bar(
54
+ x=list(range(len(bins))),
55
+ y=reference_dist,
56
+ name="Reference",
57
+ marker_color="blue",
58
+ marker_line_color="black",
59
+ marker_line_width=1,
60
+ opacity=0.75,
61
+ )
62
+ )
63
+
64
+ # Add monitoring distribution
65
+ fig.add_trace(
66
+ go.Bar(
67
+ x=list(range(len(bins))),
68
+ y=monitoring_dist,
69
+ name="Monitoring",
70
+ marker_color="green",
71
+ marker_line_color="black",
72
+ marker_line_width=1,
73
+ opacity=0.75,
74
+ )
75
+ )
76
+
77
+ fig.update_layout(
78
+ title=f"Population Distribution: {feature_name}",
79
+ xaxis_title="Bin",
80
+ yaxis_title="Population %",
81
+ barmode="group",
82
+ template="plotly_white",
83
+ showlegend=True,
84
+ width=800,
85
+ height=400,
86
+ )
87
+
88
+ return fig
89
+
90
+
13
91
  @tags("visualization")
14
92
  @tasks("monitoring")
15
93
  def FeatureDrift(
16
- datasets, bins=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], feature_columns=None
94
+ datasets,
95
+ bins=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
96
+ feature_columns=None,
97
+ psi_threshold=0.2,
17
98
  ):
18
99
  """
19
100
  Evaluates changes in feature distribution over time to identify potential model drift.
@@ -57,130 +138,48 @@ def FeatureDrift(
57
138
  - PSI score interpretation can be overly simplistic for complex datasets.
58
139
  """
59
140
 
60
- # Feature columns for both datasets should be the same if not given
61
- default_feature_columns = datasets[0].feature_columns
62
- feature_columns = feature_columns or default_feature_columns
141
+ # Get feature columns
142
+ feature_columns = feature_columns or datasets[0].feature_columns
63
143
 
64
- x_train_df = datasets[0].x_df()
65
- x_test_df = datasets[1].x_df()
144
+ # Get data
145
+ reference_data = datasets[0].df
146
+ monitoring_data = datasets[1].df
66
147
 
67
- quantiles_train = x_train_df[feature_columns].quantile(
68
- bins, method="single", interpolation="nearest"
69
- )
70
- PSI_QUANTILES = quantiles_train.to_dict()
71
-
72
- PSI_BUCKET_FRAC, col, n = get_psi_buckets(
73
- x_test_df, x_train_df, feature_columns, bins, PSI_QUANTILES
148
+ # Calculate distributions
149
+ distributions = calculate_feature_distributions(
150
+ reference_data, monitoring_data, feature_columns, bins
74
151
  )
75
152
 
76
- def nest(d: dict) -> dict:
77
- result = {}
78
- for key, value in d.items():
79
- target = result
80
- for k in key[:-1]: # traverse all keys but the last
81
- target = target.setdefault(k, {})
82
- target[key[-1]] = value
83
- return result
84
-
85
- PSI_BUCKET_FRAC = nest(PSI_BUCKET_FRAC)
86
-
87
- PSI_SCORES = {}
88
- for col in feature_columns:
153
+ # Calculate PSI scores
154
+ psi_scores = {}
155
+ for feature in feature_columns:
89
156
  psi = 0
90
- for n in bins:
91
- actual = PSI_BUCKET_FRAC["test"][col][n]
92
- expected = PSI_BUCKET_FRAC["train"][col][n]
93
- psi_of_bucket = (actual - expected) * np.log(
94
- (actual + 1e-6) / (expected + 1e-6)
95
- )
96
- psi += psi_of_bucket
97
- PSI_SCORES[col] = psi
98
-
99
- psi_df = pd.DataFrame(list(PSI_SCORES.items()), columns=["Features", "PSI Score"])
157
+ for bin_val in bins:
158
+ reference_prop = distributions[("reference", feature, bin_val)]
159
+ monitoring_prop = distributions[("monitoring", feature, bin_val)]
160
+ psi += calculate_psi_score(monitoring_prop, reference_prop)
161
+ psi_scores[feature] = psi
162
+
163
+ # Create PSI score dataframe
164
+ psi_df = pd.DataFrame(list(psi_scores.items()), columns=["Feature", "PSI Score"])
165
+
166
+ # Add Pass/Fail column
167
+ psi_df["Pass/Fail"] = psi_df["PSI Score"].apply(
168
+ lambda x: "Pass" if x < psi_threshold else "Fail"
169
+ )
100
170
 
171
+ # Sort by PSI Score
101
172
  psi_df.sort_values(by=["PSI Score"], inplace=True, ascending=False)
102
173
 
103
- psi_table = [
104
- {"Features": values["Features"], "PSI Score": values["PSI Score"]}
105
- for i, values in enumerate(psi_df.to_dict(orient="records"))
106
- ]
107
-
108
- save_fig = plot_hist(PSI_BUCKET_FRAC, bins)
109
-
110
- final_psi = pd.DataFrame(psi_table)
111
-
112
- return (final_psi, *save_fig)
113
-
114
-
115
- def get_psi_buckets(x_test_df, x_train_df, feature_columns, bins, PSI_QUANTILES):
116
- DATA = {"test": x_test_df, "train": x_train_df}
117
- PSI_BUCKET_FRAC = {}
118
- for table in DATA.keys():
119
- total_count = DATA[table].shape[0]
120
- for col in feature_columns:
121
- count_sum = 0
122
- for n in bins:
123
- if n == 0:
124
- bucket_count = (DATA[table][col] < PSI_QUANTILES[col][n]).sum()
125
- elif n < 9:
126
- bucket_count = (
127
- total_count
128
- - count_sum
129
- - ((DATA[table][col] >= PSI_QUANTILES[col][n]).sum())
130
- )
131
- elif n == 9:
132
- bucket_count = total_count - count_sum
133
- count_sum += bucket_count
134
- PSI_BUCKET_FRAC[table, col, n] = bucket_count / total_count
135
- return PSI_BUCKET_FRAC, col, n
136
-
137
-
138
- def plot_hist(PSI_BUCKET_FRAC, bins):
139
- bin_table_psi = pd.DataFrame(PSI_BUCKET_FRAC)
140
- save_fig = []
141
- for i in range(len(bin_table_psi)):
174
+ # Create distribution plots
175
+ figures = []
176
+ for feature in feature_columns:
177
+ reference_dist = [distributions[("reference", feature, b)] for b in bins]
178
+ monitoring_dist = [distributions[("monitoring", feature, b)] for b in bins]
179
+ fig = create_distribution_plot(feature, reference_dist, monitoring_dist, bins)
180
+ figures.append(fig)
142
181
 
143
- x = pd.DataFrame(
144
- bin_table_psi.iloc[i]["test"].items(),
145
- columns=["Bin", "Population % Reference"],
146
- )
147
- y = pd.DataFrame(
148
- bin_table_psi.iloc[i]["train"].items(),
149
- columns=["Bin", "Population % Monitoring"],
150
- )
151
- xy = x.merge(y, on="Bin")
152
- xy.index = xy["Bin"]
153
- xy = xy.drop(columns="Bin", axis=1)
154
- feature_name = bin_table_psi.index[i]
155
-
156
- n = len(bins)
157
- r = np.arange(n)
158
- width = 0.25
159
-
160
- fig = plt.figure()
161
-
162
- plt.bar(
163
- r,
164
- xy["Population % Reference"],
165
- color="b",
166
- width=width,
167
- edgecolor="black",
168
- label="Reference {0}".format(feature_name),
169
- )
170
- plt.bar(
171
- r + width,
172
- xy["Population % Monitoring"],
173
- color="g",
174
- width=width,
175
- edgecolor="black",
176
- label="Monitoring {0}".format(feature_name),
177
- )
182
+ # Calculate overall pass/fail
183
+ pass_fail_bool = (psi_df["Pass/Fail"] == "Pass").all()
178
184
 
179
- plt.xlabel("Bin")
180
- plt.ylabel("Population %")
181
- plt.title("Histogram of Population Differences {0}".format(feature_name))
182
- plt.legend()
183
- plt.tight_layout()
184
- plt.close()
185
- save_fig.append(fig)
186
- return save_fig
185
+ return ({"PSI Scores": psi_df}, *figures, pass_fail_bool)
@@ -53,30 +53,25 @@ def PredictionAcrossEachFeature(datasets, model):
53
53
  observed during the training of the model.
54
54
  """
55
55
 
56
- df_reference = datasets[0]._df
57
- df_monitoring = datasets[1]._df
56
+ y_prob_reference = datasets[0].y_prob(model)
57
+ y_prob_monitoring = datasets[1].y_prob(model)
58
58
 
59
59
  figures_to_save = []
60
- for column in df_reference:
61
- prediction_prob_column = f"{model.input_id}_probabilities"
62
- prediction_column = f"{model.input_id}_prediction"
63
- if column == prediction_prob_column or column == prediction_column:
64
- pass
65
- else:
66
- fig, axs = plt.subplots(1, 2, figsize=(20, 10), sharey="row")
67
-
68
- ax1, ax2 = axs
69
-
70
- ax1.scatter(df_reference[column], df_reference[prediction_prob_column])
71
- ax2.scatter(df_monitoring[column], df_monitoring[prediction_prob_column])
72
-
73
- ax1.set_title("Reference")
74
- ax1.set_xlabel(column)
75
- ax1.set_ylabel("Prediction Value")
76
-
77
- ax2.set_title("Monitoring")
78
- ax2.set_xlabel(column)
79
- figures_to_save.append(fig)
80
- plt.close()
60
+ for column in datasets[0].feature_columns:
61
+ fig, axs = plt.subplots(1, 2, figsize=(20, 10), sharey="row")
62
+
63
+ ax1, ax2 = axs
64
+
65
+ ax1.scatter(datasets[0].df[column], y_prob_reference)
66
+ ax2.scatter(datasets[1].df[column], y_prob_monitoring)
67
+
68
+ ax1.set_title("Reference")
69
+ ax1.set_xlabel(column)
70
+ ax1.set_ylabel("Prediction Value")
71
+
72
+ ax2.set_title("Monitoring")
73
+ ax2.set_xlabel(column)
74
+ figures_to_save.append(fig)
75
+ plt.close()
81
76
 
82
77
  return tuple(figures_to_save)