validmind 2.7.5__py3-none-any.whl → 2.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. validmind/__init__.py +2 -0
  2. validmind/__version__.py +1 -1
  3. validmind/api_client.py +8 -1
  4. validmind/datasets/credit_risk/lending_club.py +352 -87
  5. validmind/html_templates/content_blocks.py +1 -1
  6. validmind/tests/__types__.py +17 -0
  7. validmind/tests/data_validation/ACFandPACFPlot.py +6 -2
  8. validmind/tests/data_validation/AutoMA.py +2 -2
  9. validmind/tests/data_validation/BivariateScatterPlots.py +4 -2
  10. validmind/tests/data_validation/BoxPierce.py +2 -2
  11. validmind/tests/data_validation/ClassImbalance.py +2 -1
  12. validmind/tests/data_validation/DatasetDescription.py +11 -2
  13. validmind/tests/data_validation/DatasetSplit.py +2 -2
  14. validmind/tests/data_validation/DickeyFullerGLS.py +2 -2
  15. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +8 -2
  16. validmind/tests/data_validation/HighCardinality.py +9 -2
  17. validmind/tests/data_validation/HighPearsonCorrelation.py +18 -4
  18. validmind/tests/data_validation/IQROutliersBarPlot.py +9 -2
  19. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -2
  20. validmind/tests/data_validation/MissingValuesBarPlot.py +12 -9
  21. validmind/tests/data_validation/MutualInformation.py +6 -8
  22. validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -2
  23. validmind/tests/data_validation/ProtectedClassesCombination.py +6 -1
  24. validmind/tests/data_validation/ProtectedClassesDescription.py +1 -1
  25. validmind/tests/data_validation/ProtectedClassesDisparity.py +4 -5
  26. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +1 -4
  27. validmind/tests/data_validation/RollingStatsPlot.py +21 -10
  28. validmind/tests/data_validation/ScatterPlot.py +3 -5
  29. validmind/tests/data_validation/ScoreBandDefaultRates.py +2 -1
  30. validmind/tests/data_validation/SeasonalDecompose.py +12 -2
  31. validmind/tests/data_validation/Skewness.py +6 -3
  32. validmind/tests/data_validation/SpreadPlot.py +8 -3
  33. validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -2
  34. validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -2
  35. validmind/tests/data_validation/TargetRateBarPlots.py +4 -3
  36. validmind/tests/data_validation/TimeSeriesFrequency.py +7 -2
  37. validmind/tests/data_validation/TimeSeriesMissingValues.py +14 -10
  38. validmind/tests/data_validation/TimeSeriesOutliers.py +1 -5
  39. validmind/tests/data_validation/WOEBinPlots.py +2 -2
  40. validmind/tests/data_validation/WOEBinTable.py +11 -9
  41. validmind/tests/data_validation/nlp/CommonWords.py +2 -2
  42. validmind/tests/data_validation/nlp/Hashtags.py +2 -2
  43. validmind/tests/data_validation/nlp/LanguageDetection.py +9 -6
  44. validmind/tests/data_validation/nlp/Mentions.py +9 -6
  45. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -2
  46. validmind/tests/data_validation/nlp/Punctuations.py +4 -2
  47. validmind/tests/data_validation/nlp/Sentiment.py +2 -2
  48. validmind/tests/data_validation/nlp/StopWords.py +5 -4
  49. validmind/tests/data_validation/nlp/TextDescription.py +2 -2
  50. validmind/tests/data_validation/nlp/Toxicity.py +2 -2
  51. validmind/tests/model_validation/BertScore.py +2 -2
  52. validmind/tests/model_validation/BleuScore.py +2 -2
  53. validmind/tests/model_validation/ClusterSizeDistribution.py +2 -2
  54. validmind/tests/model_validation/ContextualRecall.py +2 -2
  55. validmind/tests/model_validation/FeaturesAUC.py +2 -2
  56. validmind/tests/model_validation/MeteorScore.py +2 -2
  57. validmind/tests/model_validation/ModelPredictionResiduals.py +2 -2
  58. validmind/tests/model_validation/RegardScore.py +6 -2
  59. validmind/tests/model_validation/RegressionResidualsPlot.py +4 -3
  60. validmind/tests/model_validation/RougeScore.py +6 -5
  61. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +11 -2
  62. validmind/tests/model_validation/TokenDisparity.py +2 -2
  63. validmind/tests/model_validation/ToxicityScore.py +10 -2
  64. validmind/tests/model_validation/embeddings/ClusterDistribution.py +9 -3
  65. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +16 -2
  66. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -3
  67. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +2 -2
  68. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +14 -4
  69. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -2
  70. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +16 -2
  71. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +2 -2
  72. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -5
  73. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +4 -2
  74. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +4 -2
  75. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -2
  76. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +4 -2
  77. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +8 -6
  78. validmind/tests/model_validation/embeddings/utils.py +11 -1
  79. validmind/tests/model_validation/ragas/AnswerCorrectness.py +2 -1
  80. validmind/tests/model_validation/ragas/AspectCritic.py +11 -7
  81. validmind/tests/model_validation/ragas/ContextEntityRecall.py +2 -1
  82. validmind/tests/model_validation/ragas/ContextPrecision.py +2 -1
  83. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +2 -1
  84. validmind/tests/model_validation/ragas/ContextRecall.py +2 -1
  85. validmind/tests/model_validation/ragas/Faithfulness.py +2 -1
  86. validmind/tests/model_validation/ragas/NoiseSensitivity.py +2 -1
  87. validmind/tests/model_validation/ragas/ResponseRelevancy.py +2 -1
  88. validmind/tests/model_validation/ragas/SemanticSimilarity.py +2 -1
  89. validmind/tests/model_validation/sklearn/CalibrationCurve.py +3 -2
  90. validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +2 -5
  91. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -2
  92. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +2 -2
  93. validmind/tests/model_validation/sklearn/FeatureImportance.py +1 -14
  94. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +6 -3
  95. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +2 -2
  96. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +8 -4
  97. validmind/tests/model_validation/sklearn/ModelParameters.py +1 -0
  98. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -3
  99. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +2 -2
  100. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +20 -16
  101. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +4 -2
  102. validmind/tests/model_validation/sklearn/ROCCurve.py +1 -1
  103. validmind/tests/model_validation/sklearn/RegressionR2Square.py +7 -9
  104. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +1 -3
  105. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +2 -1
  106. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +2 -1
  107. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -3
  108. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +9 -1
  109. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +1 -1
  110. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +11 -4
  111. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -3
  112. validmind/tests/model_validation/statsmodels/GINITable.py +7 -15
  113. validmind/tests/model_validation/statsmodels/Lilliefors.py +2 -2
  114. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +1 -1
  115. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +2 -2
  116. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +5 -2
  117. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +5 -2
  118. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +7 -7
  119. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +2 -2
  120. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +220 -0
  121. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +155 -0
  122. validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +146 -0
  123. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +148 -0
  124. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +193 -0
  125. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +178 -0
  126. validmind/tests/ongoing_monitoring/FeatureDrift.py +120 -120
  127. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +18 -23
  128. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +86 -44
  129. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +204 -0
  130. validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +98 -0
  131. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +150 -0
  132. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +212 -0
  133. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +209 -0
  134. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +91 -13
  135. validmind/tests/prompt_validation/Bias.py +13 -9
  136. validmind/tests/prompt_validation/Clarity.py +13 -9
  137. validmind/tests/prompt_validation/Conciseness.py +13 -9
  138. validmind/tests/prompt_validation/Delimitation.py +13 -9
  139. validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
  140. validmind/tests/prompt_validation/Robustness.py +6 -2
  141. validmind/tests/prompt_validation/Specificity.py +13 -9
  142. validmind/tests/run.py +6 -0
  143. validmind/utils.py +7 -8
  144. validmind/vm_models/dataset/dataset.py +0 -4
  145. {validmind-2.7.5.dist-info → validmind-2.7.7.dist-info}/METADATA +2 -3
  146. {validmind-2.7.5.dist-info → validmind-2.7.7.dist-info}/RECORD +149 -138
  147. {validmind-2.7.5.dist-info → validmind-2.7.7.dist-info}/WHEEL +1 -1
  148. {validmind-2.7.5.dist-info → validmind-2.7.7.dist-info}/LICENSE +0 -0
  149. {validmind-2.7.5.dist-info → validmind-2.7.7.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,212 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ from validmind import tags, tasks
11
+ from validmind.vm_models import VMDataset, VMModel
12
+
13
+
14
+ @tags("visualization", "credit_risk", "scorecard")
15
+ @tasks("classification")
16
+ def ScoreBandsDrift(
17
+ datasets: List[VMDataset],
18
+ model: VMModel,
19
+ score_column: str = "score",
20
+ score_bands: list = None,
21
+ drift_threshold: float = 20.0,
22
+ ):
23
+ """
24
+ Analyzes drift in population distribution and default rates across score bands.
25
+
26
+ ### Purpose
27
+
28
+ The Score Bands Drift test is designed to evaluate changes in score-based risk segmentation
29
+ over time. By comparing population distribution and default rates across score bands between
30
+ reference and monitoring datasets, this test helps identify whether the model's risk
31
+ stratification remains stable in production. This is crucial for understanding if the model's
32
+ scoring behavior maintains its intended risk separation and whether specific score ranges
33
+ have experienced significant shifts.
34
+
35
+ ### Test Mechanism
36
+
37
+ This test proceeds by segmenting scores into predefined bands and analyzing three key metrics
38
+ across these bands: population distribution, predicted default rates, and observed default
39
+ rates. For each band, it computes these metrics for both reference and monitoring datasets
40
+ and quantifies drift as percentage changes. The test provides both detailed band-by-band
41
+ comparisons and overall stability assessment, with special attention to bands showing
42
+ significant drift.
43
+
44
+ ### Signs of High Risk
45
+
46
+ - Large shifts in population distribution across bands
47
+ - Significant changes in default rates within bands
48
+ - Inconsistent drift patterns between adjacent bands
49
+ - Divergence between predicted and observed rates
50
+ - Systematic shifts in risk concentration
51
+ - Empty or sparse score bands in monitoring data
52
+
53
+ ### Strengths
54
+
55
+ - Provides comprehensive view of score-based drift
56
+ - Identifies specific score ranges with instability
57
+ - Enables comparison of multiple risk metrics
58
+ - Includes both distribution and performance drift
59
+ - Supports business-relevant score segmentation
60
+ - Maintains interpretable drift thresholds
61
+
62
+ ### Limitations
63
+
64
+ - Sensitive to choice of score band boundaries
65
+ - Requires sufficient samples in each band
66
+ - Cannot suggest optimal band adjustments
67
+ - May not capture within-band distribution changes
68
+ - Limited to predefined scoring metrics
69
+ - Complex interpretation with multiple drift signals
70
+ """
71
+ # Validate score column
72
+ if score_column not in datasets[0].df.columns:
73
+ raise ValueError(
74
+ f"Score column '{score_column}' not found in reference dataset"
75
+ )
76
+ if score_column not in datasets[1].df.columns:
77
+ raise ValueError(
78
+ f"Score column '{score_column}' not found in monitoring dataset"
79
+ )
80
+
81
+ # Default score bands if none provided
82
+ if score_bands is None:
83
+ score_bands = [410, 440, 470]
84
+
85
+ # Create band labels
86
+ band_labels = [
87
+ f"{score_bands[i]}-{score_bands[i+1]}" for i in range(len(score_bands) - 1)
88
+ ]
89
+ band_labels.insert(0, f"<{score_bands[0]}")
90
+ band_labels.append(f">{score_bands[-1]}")
91
+
92
+ # Process reference and monitoring datasets
93
+ def process_dataset(dataset, model):
94
+ df = dataset.df.copy()
95
+ df["score_band"] = pd.cut(
96
+ df[score_column],
97
+ bins=[-np.inf] + score_bands + [np.inf],
98
+ labels=band_labels,
99
+ )
100
+ y_pred = dataset.y_pred(model)
101
+
102
+ results = {}
103
+ total_population = len(df)
104
+
105
+ # Store min and max scores
106
+ min_score = df[score_column].min()
107
+ max_score = df[score_column].max()
108
+
109
+ for band in band_labels:
110
+ band_mask = df["score_band"] == band
111
+ population = band_mask.sum()
112
+
113
+ results[band] = {
114
+ "Population (%)": population / total_population * 100,
115
+ "Predicted Default Rate (%)": (
116
+ y_pred[band_mask].sum() / population * 100 if population > 0 else 0
117
+ ),
118
+ "Observed Default Rate (%)": (
119
+ df[band_mask][dataset.target_column].sum() / population * 100
120
+ if population > 0
121
+ else 0
122
+ ),
123
+ }
124
+
125
+ results["min_score"] = min_score
126
+ results["max_score"] = max_score
127
+ return results
128
+
129
+ # Get metrics for both datasets
130
+ ref_results = process_dataset(datasets[0], model)
131
+ mon_results = process_dataset(datasets[1], model)
132
+
133
+ # Create the three comparison tables
134
+ tables = {}
135
+ all_passed = True
136
+
137
+ metrics = [
138
+ ("Population Distribution (%)", "Population (%)"),
139
+ ("Predicted Default Rates (%)", "Predicted Default Rate (%)"),
140
+ ("Observed Default Rates (%)", "Observed Default Rate (%)"),
141
+ ]
142
+
143
+ for table_name, metric in metrics:
144
+ rows = []
145
+ metric_passed = True
146
+
147
+ for band in band_labels:
148
+ ref_val = ref_results[band][metric]
149
+ mon_val = mon_results[band][metric]
150
+
151
+ # Calculate drift - using absolute difference when reference is 0
152
+ drift = (
153
+ abs(mon_val - ref_val)
154
+ if ref_val == 0
155
+ else ((mon_val - ref_val) / abs(ref_val)) * 100
156
+ )
157
+ passed = abs(drift) < drift_threshold
158
+ metric_passed &= passed
159
+
160
+ rows.append(
161
+ {
162
+ "Score Band": band,
163
+ "Reference": round(ref_val, 4),
164
+ "Monitoring": round(mon_val, 4),
165
+ "Drift (%)": round(drift, 2),
166
+ "Pass/Fail": "Pass" if passed else "Fail",
167
+ }
168
+ )
169
+
170
+ # Add total row for all metrics
171
+ if metric == "Population (%)":
172
+ ref_total = 100.0
173
+ mon_total = 100.0
174
+ drift_total = 0.0
175
+ passed_total = True
176
+ else:
177
+ ref_total = sum(
178
+ ref_results[band][metric] * (ref_results[band]["Population (%)"] / 100)
179
+ for band in band_labels
180
+ )
181
+ mon_total = sum(
182
+ mon_results[band][metric] * (mon_results[band]["Population (%)"] / 100)
183
+ for band in band_labels
184
+ )
185
+ # Apply same drift calculation to totals
186
+ drift_total = (
187
+ abs(mon_total - ref_total)
188
+ if ref_total == 0
189
+ else ((mon_total - ref_total) / abs(ref_total)) * 100
190
+ )
191
+ passed_total = abs(drift_total) < drift_threshold
192
+
193
+ # Format total row with score ranges
194
+ total_label = (
195
+ f"Total ({ref_results['min_score']:.0f}-{ref_results['max_score']:.0f})"
196
+ )
197
+
198
+ rows.append(
199
+ {
200
+ "Score Band": total_label,
201
+ "Reference": round(ref_total, 4),
202
+ "Monitoring": round(mon_total, 4),
203
+ "Drift (%)": round(drift_total, 2),
204
+ "Pass/Fail": "Pass" if passed_total else "Fail",
205
+ }
206
+ )
207
+
208
+ metric_passed &= passed_total
209
+ tables[table_name] = pd.DataFrame(rows)
210
+ all_passed &= metric_passed
211
+
212
+ return tables, all_passed
@@ -0,0 +1,209 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from typing import List
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import plotly.graph_objects as go
10
+ from plotly.subplots import make_subplots
11
+ from scipy import stats
12
+
13
+ from validmind import tags, tasks
14
+ from validmind.vm_models import VMDataset
15
+
16
+
17
+ @tags("visualization", "credit_risk", "logistic_regression")
18
+ @tasks("classification")
19
+ def ScorecardHistogramDrift(
20
+ datasets: List[VMDataset],
21
+ score_column: str = "score",
22
+ title: str = "Scorecard Histogram Drift",
23
+ drift_pct_threshold: float = 20.0,
24
+ ):
25
+ """
26
+ Compares score distributions between reference and monitoring datasets for each class.
27
+
28
+ ### Purpose
29
+
30
+ The Scorecard Histogram Drift test is designed to evaluate changes in the model's scoring
31
+ patterns over time. By comparing score distributions between reference and monitoring datasets
32
+ for each class, this test helps identify whether the model's scoring behavior remains stable
33
+ in production. This is crucial for understanding if the model's risk assessment maintains
34
+ consistent patterns and whether specific score ranges have experienced significant shifts
35
+ in their distribution.
36
+
37
+ ### Test Mechanism
38
+
39
+ This test proceeds by generating histograms of scores for each class in both reference and
40
+ monitoring datasets. It analyzes distribution characteristics through multiple statistical
41
+ moments: mean, variance, skewness, and kurtosis. The test quantifies drift as percentage
42
+ changes in these moments between datasets, providing both visual and numerical assessments
43
+ of distribution stability. Special attention is paid to class-specific distribution changes.
44
+
45
+ ### Signs of High Risk
46
+
47
+ - Significant shifts in score distribution shapes
48
+ - Large drifts in distribution moments exceeding threshold
49
+ - Changes in the relative positioning of class distributions
50
+ - Appearance of new modes or peaks in monitoring data
51
+ - Unexpected changes in score spread or concentration
52
+ - Systematic shifts in class-specific scoring patterns
53
+
54
+ ### Strengths
55
+
56
+ - Provides class-specific distribution analysis
57
+ - Identifies detailed changes in scoring patterns
58
+ - Enables visual comparison of distributions
59
+ - Includes comprehensive moment analysis
60
+ - Supports multiple class evaluation
61
+ - Maintains interpretable score scale
62
+
63
+ ### Limitations
64
+
65
+ - Sensitive to binning choices in visualization
66
+ - Requires sufficient samples per class
67
+ - Cannot suggest score adjustments
68
+ - May not capture subtle distribution changes
69
+ - Complex interpretation with multiple classes
70
+ - Limited to univariate score analysis
71
+ """
72
+ # Verify score column exists
73
+ if score_column not in datasets[0].df.columns:
74
+ raise ValueError(
75
+ f"Score column '{score_column}' not found in reference dataset"
76
+ )
77
+ if score_column not in datasets[1].df.columns:
78
+ raise ValueError(
79
+ f"Score column '{score_column}' not found in monitoring dataset"
80
+ )
81
+
82
+ # Get reference and monitoring data
83
+ df_ref = datasets[0].df
84
+ df_mon = datasets[1].df
85
+
86
+ # Get unique classes
87
+ classes = sorted(df_ref[datasets[0].target_column].unique())
88
+
89
+ # Create subplots with more horizontal space for legends
90
+ fig = make_subplots(
91
+ rows=len(classes),
92
+ cols=1,
93
+ subplot_titles=[f"Class {cls}" for cls in classes],
94
+ horizontal_spacing=0.15,
95
+ )
96
+
97
+ # Define colors
98
+ ref_color = "rgba(31, 119, 180, 0.8)" # Blue with 0.8 opacity
99
+ mon_color = "rgba(255, 127, 14, 0.8)" # Orange with 0.8 opacity
100
+
101
+ # Dictionary to store tables for each class
102
+ tables = {}
103
+ all_passed = True # Track overall pass/fail
104
+
105
+ # Add histograms and create tables for each class
106
+ for i, class_value in enumerate(classes, start=1):
107
+ # Get scores for current class
108
+ ref_scores = df_ref[df_ref[datasets[0].target_column] == class_value][
109
+ score_column
110
+ ]
111
+ mon_scores = df_mon[df_mon[datasets[1].target_column] == class_value][
112
+ score_column
113
+ ]
114
+
115
+ # Calculate distribution moments
116
+ ref_stats = {
117
+ "Mean": np.mean(ref_scores),
118
+ "Variance": np.var(ref_scores),
119
+ "Skewness": stats.skew(ref_scores),
120
+ "Kurtosis": stats.kurtosis(ref_scores),
121
+ }
122
+
123
+ mon_stats = {
124
+ "Mean": np.mean(mon_scores),
125
+ "Variance": np.var(mon_scores),
126
+ "Skewness": stats.skew(mon_scores),
127
+ "Kurtosis": stats.kurtosis(mon_scores),
128
+ }
129
+
130
+ # Create table for this class
131
+ table_data = []
132
+ class_passed = True # Track pass/fail for this class
133
+
134
+ for stat_name in ["Mean", "Variance", "Skewness", "Kurtosis"]:
135
+ ref_val = ref_stats[stat_name]
136
+ mon_val = mon_stats[stat_name]
137
+ drift = (
138
+ ((mon_val - ref_val) / abs(ref_val)) * 100 if ref_val != 0 else np.inf
139
+ )
140
+ passed = abs(drift) < drift_pct_threshold
141
+ class_passed &= passed # Update class pass/fail
142
+
143
+ table_data.append(
144
+ {
145
+ "Statistic": stat_name,
146
+ "Reference": round(ref_val, 4),
147
+ "Monitoring": round(mon_val, 4),
148
+ "Drift (%)": round(drift, 2),
149
+ "Pass/Fail": "Pass" if passed else "Fail",
150
+ }
151
+ )
152
+
153
+ tables[f"Class {class_value}"] = pd.DataFrame(table_data)
154
+ all_passed &= class_passed # Update overall pass/fail
155
+
156
+ # Reference dataset histogram
157
+ fig.add_trace(
158
+ go.Histogram(
159
+ x=ref_scores,
160
+ name=f"Reference - Class {class_value}",
161
+ marker_color=ref_color,
162
+ showlegend=True,
163
+ legendrank=i * 2 - 1,
164
+ ),
165
+ row=i,
166
+ col=1,
167
+ )
168
+
169
+ # Monitoring dataset histogram
170
+ fig.add_trace(
171
+ go.Histogram(
172
+ x=mon_scores,
173
+ name=f"Monitoring - Class {class_value}",
174
+ marker_color=mon_color,
175
+ showlegend=True,
176
+ legendrank=i * 2,
177
+ ),
178
+ row=i,
179
+ col=1,
180
+ )
181
+
182
+ # Update layout
183
+ fig.update_layout(
184
+ title_text=title,
185
+ barmode="overlay",
186
+ height=300 * len(classes),
187
+ width=1000,
188
+ showlegend=True,
189
+ )
190
+
191
+ # Update axes labels and add separate legends for each subplot
192
+ for i in range(len(classes)):
193
+ fig.update_xaxes(title_text="Score", row=i + 1, col=1)
194
+ fig.update_yaxes(title_text="Frequency", row=i + 1, col=1)
195
+
196
+ # Add separate legend for each subplot
197
+ fig.update_layout(
198
+ **{
199
+ f'legend{i+1 if i > 0 else ""}': dict(
200
+ yanchor="middle",
201
+ y=1 - (i / len(classes)) - (0.5 / len(classes)),
202
+ xanchor="left",
203
+ x=1.05,
204
+ tracegroupgap=5,
205
+ )
206
+ }
207
+ )
208
+
209
+ return fig, tables, all_passed
@@ -2,15 +2,17 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- import matplotlib.pyplot as plt
6
- import seaborn as sns
5
+ import pandas as pd
6
+ import plotly.figure_factory as ff
7
+ import plotly.graph_objects as go
8
+ from scipy.stats import kurtosis, skew
7
9
 
8
10
  from validmind import tags, tasks
9
11
 
10
12
 
11
13
  @tags("visualization")
12
14
  @tasks("monitoring")
13
- def TargetPredictionDistributionPlot(datasets, model):
15
+ def TargetPredictionDistributionPlot(datasets, model, drift_pct_threshold=20):
14
16
  """
15
17
  Assesses differences in prediction distributions between a reference dataset and a monitoring dataset to identify
16
18
  potential data drift.
@@ -45,23 +47,99 @@ def TargetPredictionDistributionPlot(datasets, model):
45
47
  - Less effective if the differences in distributions are subtle and not easily visible.
46
48
  """
47
49
 
50
+ # Get predictions
48
51
  pred_ref = datasets[0].y_prob_df(model)
49
52
  pred_ref.columns = ["Reference Prediction"]
50
53
  pred_monitor = datasets[1].y_prob_df(model)
51
54
  pred_monitor.columns = ["Monitoring Prediction"]
52
55
 
53
- fig = plt.figure()
54
- plot = sns.kdeplot(
55
- pred_ref["Reference Prediction"], fill=True, label="Reference Prediction"
56
+ # Calculate distribution moments
57
+ moments = pd.DataFrame(
58
+ {
59
+ "Statistic": ["Mean", "Std", "Skewness", "Kurtosis"],
60
+ "Reference": [
61
+ pred_ref["Reference Prediction"].mean(),
62
+ pred_ref["Reference Prediction"].std(),
63
+ skew(pred_ref["Reference Prediction"]),
64
+ kurtosis(pred_ref["Reference Prediction"]),
65
+ ],
66
+ "Monitoring": [
67
+ pred_monitor["Monitoring Prediction"].mean(),
68
+ pred_monitor["Monitoring Prediction"].std(),
69
+ skew(pred_monitor["Monitoring Prediction"]),
70
+ kurtosis(pred_monitor["Monitoring Prediction"]),
71
+ ],
72
+ }
56
73
  )
57
- plot = sns.kdeplot(
58
- pred_monitor["Monitoring Prediction"], fill=True, label="Monitor Prediction"
74
+
75
+ # Calculate drift percentage with direction
76
+ moments["Drift (%)"] = (
77
+ (moments["Monitoring"] - moments["Reference"])
78
+ / moments["Reference"].abs()
79
+ * 100
80
+ ).round(2)
81
+
82
+ # Add Pass/Fail column based on absolute drift
83
+ moments["Pass/Fail"] = (
84
+ moments["Drift (%)"]
85
+ .abs()
86
+ .apply(lambda x: "Pass" if x < drift_pct_threshold else "Fail")
87
+ )
88
+
89
+ # Set Statistic as index but keep it as a column
90
+ moments = moments.set_index("Statistic", drop=False)
91
+
92
+ # Create KDE for both distributions
93
+ ref_kde = ff.create_distplot(
94
+ [pred_ref["Reference Prediction"].values],
95
+ ["Reference"],
96
+ show_hist=False,
97
+ show_rug=False,
98
+ )
99
+ monitor_kde = ff.create_distplot(
100
+ [pred_monitor["Monitoring Prediction"].values],
101
+ ["Monitoring"],
102
+ show_hist=False,
103
+ show_rug=False,
59
104
  )
60
- plot.set(
61
- xlabel="Prediction", title="Distribution of Reference & Monitor Predictions"
105
+
106
+ # Create new figure
107
+ fig = go.Figure()
108
+
109
+ # Add reference distribution
110
+ fig.add_trace(
111
+ go.Scatter(
112
+ x=ref_kde.data[0].x,
113
+ y=ref_kde.data[0].y,
114
+ fill="tozeroy",
115
+ name="Reference Prediction",
116
+ line=dict(color="blue", width=2),
117
+ opacity=0.6,
118
+ )
119
+ )
120
+
121
+ # Add monitoring distribution
122
+ fig.add_trace(
123
+ go.Scatter(
124
+ x=monitor_kde.data[0].x,
125
+ y=monitor_kde.data[0].y,
126
+ fill="tozeroy",
127
+ name="Monitor Prediction",
128
+ line=dict(color="red", width=2),
129
+ opacity=0.6,
130
+ )
131
+ )
132
+
133
+ # Update layout
134
+ fig.update_layout(
135
+ title="Distribution of Reference & Monitor Predictions",
136
+ xaxis_title="Prediction",
137
+ yaxis_title="Density",
138
+ showlegend=True,
139
+ template="plotly_white",
140
+ hovermode="x unified",
62
141
  )
63
- plot.legend()
64
142
 
65
- plt.close()
143
+ pass_fail_bool = (moments["Pass/Fail"] == "Pass").all()
66
144
 
67
- return fig
145
+ return ({"Distribution Moments": moments}, fig, pass_fail_bool)
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from validmind import tags, tasks
5
+ from validmind import RawData, tags, tasks
6
6
  from validmind.errors import MissingRequiredTestInputError
7
7
 
8
8
  from .ai_powered_test import (
@@ -107,11 +107,15 @@ def Bias(model, min_threshold=7):
107
107
 
108
108
  passed = score > min_threshold
109
109
 
110
- return [
111
- {
112
- "Score": score,
113
- "Explanation": explanation,
114
- "Threshold": min_threshold,
115
- "Pass/Fail": "Pass" if passed else "Fail",
116
- }
117
- ], passed
110
+ return (
111
+ [
112
+ {
113
+ "Score": score,
114
+ "Explanation": explanation,
115
+ "Threshold": min_threshold,
116
+ "Pass/Fail": "Pass" if passed else "Fail",
117
+ }
118
+ ],
119
+ passed,
120
+ RawData(response=response),
121
+ )
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from validmind import tags, tasks
5
+ from validmind import RawData, tags, tasks
6
6
  from validmind.errors import MissingRequiredTestInputError
7
7
 
8
8
  from .ai_powered_test import (
@@ -96,11 +96,15 @@ def Clarity(model, min_threshold=7):
96
96
 
97
97
  passed = score > min_threshold
98
98
 
99
- return [
100
- {
101
- "Score": score,
102
- "Explanation": explanation,
103
- "Threshold": min_threshold,
104
- "Pass/Fail": "Pass" if passed else "Fail",
105
- }
106
- ], passed
99
+ return (
100
+ [
101
+ {
102
+ "Score": score,
103
+ "Explanation": explanation,
104
+ "Threshold": min_threshold,
105
+ "Pass/Fail": "Pass" if passed else "Fail",
106
+ }
107
+ ],
108
+ passed,
109
+ RawData(response=response),
110
+ )
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from validmind import tags, tasks
5
+ from validmind import RawData, tags, tasks
6
6
  from validmind.errors import MissingRequiredTestInputError
7
7
 
8
8
  from .ai_powered_test import (
@@ -103,11 +103,15 @@ def Conciseness(model, min_threshold=7):
103
103
 
104
104
  passed = score > min_threshold
105
105
 
106
- return [
107
- {
108
- "Score": score,
109
- "Threshold": min_threshold,
110
- "Explanation": explanation,
111
- "Pass/Fail": "Pass" if passed else "Fail",
112
- }
113
- ], passed
106
+ return (
107
+ [
108
+ {
109
+ "Score": score,
110
+ "Threshold": min_threshold,
111
+ "Explanation": explanation,
112
+ "Pass/Fail": "Pass" if passed else "Fail",
113
+ }
114
+ ],
115
+ passed,
116
+ RawData(response=response),
117
+ )
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from validmind import tags, tasks
5
+ from validmind import RawData, tags, tasks
6
6
  from validmind.errors import MissingRequiredTestInputError
7
7
 
8
8
  from .ai_powered_test import (
@@ -89,11 +89,15 @@ def Delimitation(model, min_threshold=7):
89
89
 
90
90
  passed = score > min_threshold
91
91
 
92
- return [
93
- {
94
- "Score": score,
95
- "Threshold": min_threshold,
96
- "Explanation": explanation,
97
- "Pass/Fail": "Pass" if passed else "Fail",
98
- }
99
- ], passed
92
+ return (
93
+ [
94
+ {
95
+ "Score": score,
96
+ "Threshold": min_threshold,
97
+ "Explanation": explanation,
98
+ "Pass/Fail": "Pass" if passed else "Fail",
99
+ }
100
+ ],
101
+ passed,
102
+ RawData(response=response),
103
+ )