validmind 2.5.15__py3-none-any.whl → 2.5.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +54 -112
  3. validmind/ai/test_result_description/config.yaml +29 -0
  4. validmind/ai/test_result_description/context.py +73 -0
  5. validmind/ai/test_result_description/image_processing.py +124 -0
  6. validmind/ai/test_result_description/system.jinja +39 -0
  7. validmind/ai/test_result_description/user.jinja +25 -0
  8. validmind/datasets/credit_risk/__init__.py +1 -0
  9. validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
  10. validmind/datasets/credit_risk/lending_club_bias.py +142 -0
  11. validmind/tests/__types__.py +19 -10
  12. validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +20 -24
  13. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +4 -1
  14. validmind/tests/{model_validation/statsmodels → data_validation}/JarqueBera.py +22 -30
  15. validmind/tests/{model_validation/statsmodels → data_validation}/LJungBox.py +23 -27
  16. validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
  17. validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
  18. validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
  19. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
  20. validmind/tests/{model_validation/statsmodels → data_validation}/RunsTest.py +17 -20
  21. validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +20 -22
  22. validmind/tests/data_validation/nlp/Hashtags.py +15 -20
  23. validmind/tests/data_validation/nlp/TextDescription.py +3 -1
  24. validmind/tests/model_validation/ContextualRecall.py +3 -0
  25. validmind/tests/model_validation/ragas/AspectCritique.py +5 -6
  26. validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
  27. validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
  28. validmind/tests/model_validation/sklearn/FeatureImportance.py +3 -3
  29. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +1 -1
  30. validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -2
  31. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +59 -0
  32. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +40 -20
  33. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +0 -1
  34. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +1 -1
  35. validmind/utils.py +4 -0
  36. validmind/vm_models/test/metric.py +1 -0
  37. validmind/vm_models/test/result_wrapper.py +50 -26
  38. validmind/vm_models/test/threshold_test.py +1 -0
  39. {validmind-2.5.15.dist-info → validmind-2.5.18.dist-info}/METADATA +4 -3
  40. {validmind-2.5.15.dist-info → validmind-2.5.18.dist-info}/RECORD +43 -30
  41. {validmind-2.5.15.dist-info → validmind-2.5.18.dist-info}/LICENSE +0 -0
  42. {validmind-2.5.15.dist-info → validmind-2.5.18.dist-info}/WHEEL +0 -0
  43. {validmind-2.5.15.dist-info → validmind-2.5.18.dist-info}/entry_points.txt +0 -0
@@ -33,7 +33,6 @@ TestID = Literal[
33
33
  "validmind.model_validation.ClusterSizeDistribution",
34
34
  "validmind.model_validation.TokenDisparity",
35
35
  "validmind.model_validation.ToxicityScore",
36
- "validmind.model_validation.ModelMetadata",
37
36
  "validmind.model_validation.TimeSeriesR2SquareBySegments",
38
37
  "validmind.model_validation.embeddings.CosineSimilarityComparison",
39
38
  "validmind.model_validation.embeddings.EmbeddingsVisualization2D",
@@ -53,12 +52,13 @@ TestID = Literal[
53
52
  "validmind.model_validation.ragas.ContextEntityRecall",
54
53
  "validmind.model_validation.ragas.Faithfulness",
55
54
  "validmind.model_validation.ragas.AspectCritique",
55
+ "validmind.model_validation.ragas.NoiseSensitivity",
56
56
  "validmind.model_validation.ragas.AnswerSimilarity",
57
57
  "validmind.model_validation.ragas.AnswerCorrectness",
58
58
  "validmind.model_validation.ragas.ContextRecall",
59
59
  "validmind.model_validation.ragas.ContextPrecision",
60
60
  "validmind.model_validation.ragas.AnswerRelevance",
61
- "validmind.model_validation.sklearn.RegressionModelsPerformanceComparison",
61
+ "validmind.model_validation.ragas.ContextUtilization",
62
62
  "validmind.model_validation.sklearn.AdjustedMutualInformation",
63
63
  "validmind.model_validation.sklearn.SilhouettePlot",
64
64
  "validmind.model_validation.sklearn.RobustnessDiagnosis",
@@ -77,35 +77,35 @@ TestID = Literal[
77
77
  "validmind.model_validation.sklearn.ClassifierPerformance",
78
78
  "validmind.model_validation.sklearn.VMeasure",
79
79
  "validmind.model_validation.sklearn.MinimumF1Score",
80
+ "validmind.model_validation.sklearn.RegressionPerformance",
80
81
  "validmind.model_validation.sklearn.ROCCurve",
81
82
  "validmind.model_validation.sklearn.RegressionR2Square",
82
83
  "validmind.model_validation.sklearn.RegressionErrors",
83
84
  "validmind.model_validation.sklearn.ClusterPerformance",
84
- "validmind.model_validation.sklearn.FeatureImportance",
85
85
  "validmind.model_validation.sklearn.TrainingTestDegradation",
86
+ "validmind.model_validation.sklearn.RegressionErrorsComparison",
87
+ "validmind.model_validation.sklearn.FeatureImportance",
86
88
  "validmind.model_validation.sklearn.HyperParametersTuning",
87
89
  "validmind.model_validation.sklearn.KMeansClustersOptimization",
88
90
  "validmind.model_validation.sklearn.ModelsPerformanceComparison",
89
91
  "validmind.model_validation.sklearn.WeakspotsDiagnosis",
92
+ "validmind.model_validation.sklearn.RegressionR2SquareComparison",
90
93
  "validmind.model_validation.sklearn.PopulationStabilityIndex",
91
94
  "validmind.model_validation.sklearn.MinimumAccuracy",
92
- "validmind.model_validation.statsmodels.RegressionModelCoeffs",
93
- "validmind.model_validation.statsmodels.BoxPierce",
94
- "validmind.model_validation.statsmodels.RegressionCoeffsPlot",
95
+ "validmind.model_validation.statsmodels.RegressionModelSensitivityPlot",
96
+ "validmind.model_validation.statsmodels.RegressionModelForecastPlotLevels",
95
97
  "validmind.model_validation.statsmodels.ScorecardHistogram",
96
- "validmind.model_validation.statsmodels.LJungBox",
97
- "validmind.model_validation.statsmodels.JarqueBera",
98
98
  "validmind.model_validation.statsmodels.KolmogorovSmirnov",
99
- "validmind.model_validation.statsmodels.ShapiroWilk",
100
99
  "validmind.model_validation.statsmodels.CumulativePredictionProbabilities",
101
100
  "validmind.model_validation.statsmodels.RegressionFeatureSignificance",
102
101
  "validmind.model_validation.statsmodels.RegressionModelSummary",
102
+ "validmind.model_validation.statsmodels.RegressionCoeffs",
103
103
  "validmind.model_validation.statsmodels.Lilliefors",
104
- "validmind.model_validation.statsmodels.RunsTest",
105
104
  "validmind.model_validation.statsmodels.RegressionPermutationFeatureImportance",
106
105
  "validmind.model_validation.statsmodels.PredictionProbabilitiesHistogram",
107
106
  "validmind.model_validation.statsmodels.AutoARIMA",
108
107
  "validmind.model_validation.statsmodels.GINITable",
108
+ "validmind.model_validation.statsmodels.RegressionModelForecastPlot",
109
109
  "validmind.model_validation.statsmodels.DurbinWatsonTest",
110
110
  "validmind.ongoing_monitoring.PredictionCorrelation",
111
111
  "validmind.ongoing_monitoring.PredictionAcrossEachFeature",
@@ -113,9 +113,11 @@ TestID = Literal[
113
113
  "validmind.ongoing_monitoring.TargetPredictionDistributionPlot",
114
114
  "validmind.data_validation.IQROutliersTable",
115
115
  "validmind.data_validation.Skewness",
116
+ "validmind.data_validation.BoxPierce",
116
117
  "validmind.data_validation.Duplicates",
117
118
  "validmind.data_validation.MissingValuesBarPlot",
118
119
  "validmind.data_validation.DatasetDescription",
120
+ "validmind.data_validation.ProtectedClassesCombination",
119
121
  "validmind.data_validation.ZivotAndrewsArch",
120
122
  "validmind.data_validation.ScatterPlot",
121
123
  "validmind.data_validation.TimeSeriesOutliers",
@@ -123,7 +125,9 @@ TestID = Literal[
123
125
  "validmind.data_validation.AutoStationarity",
124
126
  "validmind.data_validation.DescriptiveStatistics",
125
127
  "validmind.data_validation.TimeSeriesDescription",
128
+ "validmind.data_validation.LJungBox",
126
129
  "validmind.data_validation.TargetRateBarPlots",
130
+ "validmind.data_validation.JarqueBera",
127
131
  "validmind.data_validation.PearsonCorrelationMatrix",
128
132
  "validmind.data_validation.FeatureTargetCorrelationPlot",
129
133
  "validmind.data_validation.TabularNumericalHistograms",
@@ -133,9 +137,11 @@ TestID = Literal[
133
137
  "validmind.data_validation.MissingValues",
134
138
  "validmind.data_validation.PhillipsPerronArch",
135
139
  "validmind.data_validation.RollingStatsPlot",
140
+ "validmind.data_validation.ProtectedClassesDisparity",
136
141
  "validmind.data_validation.TabularDescriptionTables",
137
142
  "validmind.data_validation.AutoMA",
138
143
  "validmind.data_validation.UniqueRows",
144
+ "validmind.data_validation.ShapiroWilk",
139
145
  "validmind.data_validation.TooManyZeroValues",
140
146
  "validmind.data_validation.HighPearsonCorrelation",
141
147
  "validmind.data_validation.ACFandPACFPlot",
@@ -146,10 +152,12 @@ TestID = Literal[
146
152
  "validmind.data_validation.TimeSeriesLinePlot",
147
153
  "validmind.data_validation.KPSS",
148
154
  "validmind.data_validation.AutoSeasonality",
155
+ "validmind.data_validation.ProtectedClassesDescription",
149
156
  "validmind.data_validation.BivariateScatterPlots",
150
157
  "validmind.data_validation.EngleGrangerCoint",
151
158
  "validmind.data_validation.TimeSeriesMissingValues",
152
159
  "validmind.data_validation.TimeSeriesHistogram",
160
+ "validmind.data_validation.RunsTest",
153
161
  "validmind.data_validation.LaggedCorrelationHeatmap",
154
162
  "validmind.data_validation.SeasonalDecompose",
155
163
  "validmind.data_validation.WOEBinPlots",
@@ -159,6 +167,7 @@ TestID = Literal[
159
167
  "validmind.data_validation.TimeSeriesDescriptiveStatistics",
160
168
  "validmind.data_validation.AutoAR",
161
169
  "validmind.data_validation.TabularDateTimeHistograms",
170
+ "validmind.data_validation.ProtectedClassesThresholdOptimizer",
162
171
  "validmind.data_validation.ADF",
163
172
  "validmind.data_validation.nlp.Toxicity",
164
173
  "validmind.data_validation.nlp.PolarityAndSubjectivity",
@@ -2,12 +2,15 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ import pandas as pd
5
6
  from statsmodels.stats.diagnostic import acorr_ljungbox
6
7
 
7
- from validmind.vm_models import Metric
8
+ from validmind import tags, tasks
8
9
 
9
10
 
10
- class BoxPierce(Metric):
11
+ @tasks("regression")
12
+ @tags("time_series_data", "forecasting", "statistical_test", "statsmodels")
13
+ def BoxPierce(dataset):
11
14
  """
12
15
  Detects autocorrelation in time-series data through the Box-Pierce test to validate model performance.
13
16
 
@@ -51,25 +54,18 @@ class BoxPierce(Metric):
51
54
  - Applicability is limited to time-series data, which limits its overall utility.
52
55
  """
53
56
 
54
- name = "box_pierce"
55
- required_inputs = ["dataset"]
56
- tasks = ["regression"]
57
- tags = ["time_series_data", "forecasting", "statistical_test", "statsmodels"]
58
-
59
- def run(self):
60
- """
61
- Calculates Box-Pierce test for each of the dataset features
62
- """
63
- x_train = self.inputs.dataset.df
64
-
65
- box_pierce_values = {}
66
- for col in x_train.columns:
67
- bp_results = acorr_ljungbox(
68
- x_train[col].values, boxpierce=True, return_df=True
69
- )
70
- box_pierce_values[col] = {
71
- "stat": bp_results.iloc[0]["lb_stat"],
72
- "pvalue": bp_results.iloc[0]["lb_pvalue"],
73
- }
74
-
75
- return self.cache_results(box_pierce_values)
57
+ df = dataset.df
58
+
59
+ box_pierce_values = {}
60
+ for col in df.columns:
61
+ bp_results = acorr_ljungbox(df[col].values, boxpierce=True, return_df=True)
62
+ box_pierce_values[col] = {
63
+ "stat": bp_results.iloc[0]["lb_stat"],
64
+ "pvalue": bp_results.iloc[0]["lb_pvalue"],
65
+ }
66
+
67
+ box_pierce_df = pd.DataFrame.from_dict(box_pierce_values, orient="index")
68
+ box_pierce_df.reset_index(inplace=True)
69
+ box_pierce_df.columns = ["column", "stat", "pvalue"]
70
+
71
+ return box_pierce_df
@@ -7,6 +7,7 @@ import pandas as pd
7
7
  from scipy.stats import chi2_contingency
8
8
 
9
9
  from validmind import tags, tasks
10
+ from validmind.errors import SkipTestError
10
11
 
11
12
 
12
13
  @tags("tabular_data", "categorical_data", "statistical_test")
@@ -55,9 +56,11 @@ def ChiSquaredFeaturesTable(dataset, p_threshold=0.05):
55
56
  """
56
57
 
57
58
  target_column = dataset.target_column
58
-
59
59
  features = dataset.feature_columns_categorical
60
60
 
61
+ if not features:
62
+ raise SkipTestError("No categorical features found in dataset")
63
+
61
64
  results_df = _chi_squared_categorical_feature_selection(
62
65
  dataset.df, features, target_column, p_threshold
63
66
  )
@@ -2,12 +2,15 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ import pandas as pd
5
6
  from statsmodels.stats.stattools import jarque_bera
6
7
 
7
- from validmind.vm_models import Metric
8
+ from validmind import tags, tasks
8
9
 
9
10
 
10
- class JarqueBera(Metric):
11
+ @tasks("classification", "regression")
12
+ @tags("tabular_data", "data_distribution", "statistical_test", "statsmodels")
13
+ def JarqueBera(dataset):
11
14
  """
12
15
  Assesses normality of dataset features in an ML model using the Jarque-Bera test.
13
16
 
@@ -48,31 +51,20 @@ class JarqueBera(Metric):
48
51
  even for minor deviations in larger datasets.
49
52
  """
50
53
 
51
- name = "jarque_bera"
52
- required_inputs = ["dataset"]
53
- tasks = ["classification", "regression"]
54
- tags = [
55
- "tabular_data",
56
- "data_distribution",
57
- "statistical_test",
58
- "statsmodels",
59
- ]
60
-
61
- def run(self):
62
- """
63
- Calculates JB for each of the dataset features
64
- """
65
- x_train = self.inputs.dataset.df[self.inputs.dataset.feature_columns_numeric]
66
-
67
- jb_values = {}
68
- for col in x_train.columns:
69
- jb_stat, jb_pvalue, jb_skew, jb_kurtosis = jarque_bera(x_train[col].values)
70
-
71
- jb_values[col] = {
72
- "stat": jb_stat,
73
- "pvalue": jb_pvalue,
74
- "skew": jb_skew,
75
- "kurtosis": jb_kurtosis,
76
- }
77
-
78
- return self.cache_results(jb_values)
54
+ df = dataset.df[dataset.feature_columns_numeric]
55
+
56
+ jb_values = {}
57
+ for col in df.columns:
58
+ jb_stat, jb_pvalue, jb_skew, jb_kurtosis = jarque_bera(df[col].values)
59
+ jb_values[col] = {
60
+ "stat": jb_stat,
61
+ "pvalue": jb_pvalue,
62
+ "skew": jb_skew,
63
+ "kurtosis": jb_kurtosis,
64
+ }
65
+
66
+ jb_df = pd.DataFrame.from_dict(jb_values, orient="index")
67
+ jb_df.reset_index(inplace=True)
68
+ jb_df.columns = ["column", "stat", "pvalue", "skew", "kurtosis"]
69
+
70
+ return jb_df
@@ -2,12 +2,15 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ import pandas as pd
5
6
  from statsmodels.stats.diagnostic import acorr_ljungbox
6
7
 
7
- from validmind.vm_models import Metric
8
+ from validmind import tags, tasks
8
9
 
9
10
 
10
- class LJungBox(Metric):
11
+ @tasks("regression")
12
+ @tags("time_series_data", "forecasting", "statistical_test", "statsmodels")
13
+ def LJungBox(dataset):
11
14
  """
12
15
  Assesses autocorrelations in dataset features by performing a Ljung-Box test on each feature.
13
16
 
@@ -20,11 +23,11 @@ class LJungBox(Metric):
20
23
 
21
24
  ### Test Mechanism
22
25
 
23
- The test operates by iterating over each feature within the training dataset and applying the `acorr_ljungbox`
26
+ The test operates by iterating over each feature within the dataset and applying the `acorr_ljungbox`
24
27
  function from the `statsmodels.stats.diagnostic` library. This function calculates the Ljung-Box statistic and
25
- p-value for each feature. These results are then stored in a dictionary where the keys are the feature names and
26
- the values are dictionaries containing the statistic and p-value respectively. Generally, a lower p-value indicates
27
- a higher likelihood of significant autocorrelations within the feature.
28
+ p-value for each feature. These results are then stored in a pandas DataFrame where the columns are the feature names,
29
+ statistic, and p-value respectively. Generally, a lower p-value indicates a higher likelihood of significant
30
+ autocorrelations within the feature.
28
31
 
29
32
  ### Signs of High Risk
30
33
 
@@ -41,30 +44,23 @@ class LJungBox(Metric):
41
44
  ### Limitations
42
45
 
43
46
  - Cannot detect all types of non-linearity or complex interrelationships among variables.
44
- - Testing individual features may not fully encapsulate the dynamics of the data if features interact with each
45
- other.
47
+ - Testing individual features may not fully encapsulate the dynamics of the data if features interact with each other.
46
48
  - Designed more for traditional statistical models and may not be fully compatible with certain types of complex
47
- machine learning models.
49
+ machine learning models.
48
50
  """
49
51
 
50
- name = "ljung_box"
51
- required_inputs = ["dataset"]
52
- tasks = ["regression"]
53
- tags = ["time_series_data", "forecasting", "statistical_test", "statsmodels"]
52
+ df = dataset.df
54
53
 
55
- def run(self):
56
- """
57
- Calculates Ljung-Box test for each of the dataset features
58
- """
59
- x_train = self.inputs.dataset.df
54
+ ljung_box_values = {}
55
+ for col in df.columns:
56
+ lb_results = acorr_ljungbox(df[col].values, return_df=True)
57
+ ljung_box_values[col] = {
58
+ "stat": lb_results.iloc[0]["lb_stat"],
59
+ "pvalue": lb_results.iloc[0]["lb_pvalue"],
60
+ }
60
61
 
61
- ljung_box_values = {}
62
- for col in x_train.columns:
63
- lb_results = acorr_ljungbox(x_train[col].values, return_df=True)
62
+ ljung_box_df = pd.DataFrame.from_dict(ljung_box_values, orient="index")
63
+ ljung_box_df.reset_index(inplace=True)
64
+ ljung_box_df.columns = ["column", "stat", "pvalue"]
64
65
 
65
- ljung_box_values[col] = {
66
- "stat": lb_results["lb_stat"].values[0],
67
- "pvalue": lb_results["lb_pvalue"].values[0],
68
- }
69
-
70
- return self.cache_results(ljung_box_values)
66
+ return ljung_box_df
@@ -0,0 +1,197 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import sys
6
+
7
+ import pandas as pd
8
+ import plotly.graph_objects as go
9
+ import plotly.subplots as sp
10
+ from fairlearn.metrics import (
11
+ MetricFrame,
12
+ count,
13
+ demographic_parity_ratio,
14
+ equalized_odds_ratio,
15
+ false_positive_rate,
16
+ selection_rate,
17
+ true_positive_rate,
18
+ )
19
+
20
+ from validmind import tags, tasks
21
+ from validmind.logging import get_logger
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ @tags("bias_and_fairness")
27
+ @tasks("classification", "regression")
28
+ def ProtectedClassesCombination(dataset, model, protected_classes=None):
29
+ """
30
+ Visualizes combinations of protected classes and their corresponding error metric differences.
31
+
32
+ ### Purpose
33
+
34
+ This test aims to provide insights into how different combinations of protected classes affect various error metrics,
35
+ particularly the false negative rate (FNR) and false positive rate (FPR). By visualizing these combinations,
36
+ it helps identify potential biases or disparities in model performance across different intersectional groups.
37
+
38
+ ### Test Mechanism
39
+
40
+ The test performs the following steps:
41
+ 1. Combines the specified protected class columns to create a single multi-class category.
42
+ 2. Calculates error metrics (FNR, FPR, etc.) for each combination of protected classes.
43
+ 3. Generates visualizations showing the distribution of these metrics across all class combinations.
44
+
45
+ ### Signs of High Risk
46
+
47
+ - Large disparities in FNR or FPR across different protected class combinations.
48
+ - Consistent patterns of higher error rates for specific combinations of protected attributes.
49
+ - Unexpected or unexplainable variations in error metrics between similar group combinations.
50
+
51
+ ### Strengths
52
+
53
+ - Provides a comprehensive view of intersectional fairness across multiple protected attributes.
54
+ - Allows for easy identification of potentially problematic combinations of protected classes.
55
+ - Visualizations make it easier to spot patterns or outliers in model performance across groups.
56
+
57
+ ### Limitations
58
+
59
+ - May become complex and difficult to interpret with a large number of protected classes or combinations.
60
+ - Does not provide statistical significance of observed differences.
61
+ - Visualization alone may not capture all nuances of intersectional fairness.
62
+ """
63
+
64
+ if sys.version_info < (3, 9):
65
+ raise RuntimeError("This test requires Python 3.9 or higher.")
66
+
67
+ if protected_classes is None:
68
+ logger.warning(
69
+ "No protected classes provided. Please pass the 'protected_classes' parameter to run this test."
70
+ )
71
+ return pd.DataFrame()
72
+
73
+ # Construct a function dictionary for figures
74
+ my_metrics = {
75
+ "fpr": false_positive_rate,
76
+ "tpr": true_positive_rate,
77
+ "selection rate": selection_rate,
78
+ "count": count,
79
+ }
80
+
81
+ # Construct a MetricFrame for figures
82
+ mf = MetricFrame(
83
+ metrics=my_metrics,
84
+ y_true=dataset.y,
85
+ y_pred=dataset.y_pred(model),
86
+ sensitive_features=dataset._df[protected_classes],
87
+ )
88
+
89
+ # Combine protected class columns to create a single multi-class category for the x-axis
90
+ metrics_by_group = mf.by_group.reset_index()
91
+ metrics_by_group["class_combination"] = metrics_by_group[protected_classes].apply(
92
+ lambda row: ", ".join(row.values.astype(str)), axis=1
93
+ )
94
+
95
+ # Create the subplots for the bar plots
96
+ fig = sp.make_subplots(
97
+ rows=2,
98
+ cols=2,
99
+ subplot_titles=[
100
+ "False Positive Rate",
101
+ "True Positive Rate",
102
+ "Selection Rate",
103
+ "Count",
104
+ ],
105
+ )
106
+
107
+ # Add bar plots for each metric
108
+ fig.add_trace(
109
+ go.Bar(
110
+ x=metrics_by_group["class_combination"],
111
+ y=metrics_by_group["fpr"],
112
+ name="FPR",
113
+ ),
114
+ row=1,
115
+ col=1,
116
+ )
117
+ fig.add_trace(
118
+ go.Bar(
119
+ x=metrics_by_group["class_combination"],
120
+ y=metrics_by_group["tpr"],
121
+ name="TPR",
122
+ ),
123
+ row=1,
124
+ col=2,
125
+ )
126
+ fig.add_trace(
127
+ go.Bar(
128
+ x=metrics_by_group["class_combination"],
129
+ y=metrics_by_group["selection rate"],
130
+ name="Selection Rate",
131
+ ),
132
+ row=2,
133
+ col=1,
134
+ )
135
+ fig.add_trace(
136
+ go.Bar(
137
+ x=metrics_by_group["class_combination"],
138
+ y=metrics_by_group["count"],
139
+ name="Count",
140
+ ),
141
+ row=2,
142
+ col=2,
143
+ )
144
+
145
+ # Update layout of the figure to match the original style
146
+ fig.update_layout(
147
+ title="Show all metrics",
148
+ height=800,
149
+ width=900,
150
+ barmode="group",
151
+ legend=dict(orientation="h", yanchor="bottom", y=-0.3, xanchor="center", x=0.5),
152
+ margin=dict(t=50),
153
+ font=dict(size=12),
154
+ )
155
+
156
+ # Rotate x-axis labels for better readability
157
+ fig.update_xaxes(tickangle=45, row=1, col=1)
158
+ fig.update_xaxes(tickangle=45, row=1, col=2)
159
+ fig.update_xaxes(tickangle=45, row=2, col=1)
160
+ fig.update_xaxes(tickangle=45, row=2, col=2)
161
+
162
+ # Extract demographic parity ratio and equalized odds ratio
163
+ m_dpr = []
164
+ m_eqo = []
165
+ for protected_class in protected_classes:
166
+ m_dpr.append(
167
+ demographic_parity_ratio(
168
+ y_true=dataset.y,
169
+ y_pred=dataset.y_pred(model),
170
+ sensitive_features=dataset._df[[protected_class]],
171
+ )
172
+ )
173
+ m_eqo.append(
174
+ equalized_odds_ratio(
175
+ y_true=dataset.y,
176
+ y_pred=dataset.y_pred(model),
177
+ sensitive_features=dataset._df[[protected_class]],
178
+ )
179
+ )
180
+
181
+ # Create a DataFrame for the demographic parity and equalized odds ratio
182
+ dpr_eor_df = pd.DataFrame(
183
+ columns=protected_classes,
184
+ index=["demographic parity ratio", "equal odds ratio"],
185
+ )
186
+
187
+ for i in range(len(m_dpr)):
188
+ dpr_eor_df[protected_classes[i]]["demographic parity ratio"] = round(
189
+ m_dpr[i], 2
190
+ )
191
+ dpr_eor_df[protected_classes[i]]["equal odds ratio"] = round(m_eqo[i], 2)
192
+
193
+ return (
194
+ {"Class Combination Table": metrics_by_group},
195
+ {"DPR and EOR table": dpr_eor_df},
196
+ fig,
197
+ )
@@ -0,0 +1,130 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+
6
+ import pandas as pd
7
+ import plotly.graph_objects as go
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.logging import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ @tags("bias_and_fairness", "descriptive_statistics")
16
+ @tasks("classification", "regression")
17
+ def ProtectedClassesDescription(dataset, protected_classes=None):
18
+ """
19
+ Visualizes the distribution of protected classes in the dataset relative to the target variable
20
+ and provides descriptive statistics.
21
+
22
+ ### Purpose
23
+
24
+ The ProtectedClassesDescription test aims to identify potential biases or significant differences in the
25
+ distribution of target outcomes across different protected classes. This visualization and statistical summary
26
+ help in understanding the relationship between protected attributes and the target variable, which is crucial
27
+ for assessing fairness in machine learning models.
28
+
29
+ ### Test Mechanism
30
+
31
+ The function creates interactive stacked bar charts for each specified protected class using Plotly.
32
+ Additionally, it generates a single table of descriptive statistics for all protected classes, including:
33
+ - Protected class and category
34
+ - Count and percentage of each category within the protected class
35
+ - Mean, median, and mode of the target variable for each category
36
+ - Standard deviation of the target variable for each category
37
+ - Minimum and maximum values of the target variable for each category
38
+
39
+ ### Signs of High Risk
40
+
41
+ - Significant imbalances in the distribution of target outcomes across different categories of a protected class.
42
+ - Large disparities in mean, median, or mode of the target variable across categories.
43
+ - Underrepresentation or overrepresentation of certain groups within protected classes.
44
+ - High standard deviations in certain categories, indicating potential volatility or outliers.
45
+
46
+ ### Strengths
47
+
48
+ - Provides both visual and statistical representation of potential biases in the dataset.
49
+ - Allows for easy identification of imbalances in target variable distribution across protected classes.
50
+ - Interactive plots enable detailed exploration of the data.
51
+ - Consolidated statistical summary provides quantitative measures to complement visual analysis.
52
+ - Applicable to both classification and regression tasks.
53
+
54
+ ### Limitations
55
+
56
+ - Does not provide advanced statistical measures of bias or fairness.
57
+ - May become cluttered if there are many categories within a protected class or many unique target values.
58
+ - Interpretation may require domain expertise to understand the implications of observed disparities.
59
+ - Does not account for intersectionality or complex interactions between multiple protected attributes.
60
+ """
61
+
62
+ if protected_classes is None:
63
+ logger.warning(
64
+ "No protected classes provided. Please pass the 'protected_classes' parameter to run this test."
65
+ )
66
+ return pd.DataFrame()
67
+
68
+ figures = []
69
+ all_stats = []
70
+
71
+ df = dataset._df
72
+ target = dataset.target_column
73
+
74
+ for protected_class in protected_classes:
75
+ # Create the stacked bar chart
76
+ counts = df.groupby([protected_class, target]).size().unstack(fill_value=0)
77
+ fig = go.Figure()
78
+ for col in counts.columns:
79
+ fig.add_trace(
80
+ go.Bar(
81
+ x=counts.index,
82
+ y=counts[col],
83
+ name=str(col),
84
+ text=counts[col],
85
+ textposition="auto",
86
+ )
87
+ )
88
+
89
+ fig.update_layout(
90
+ title=f"Distribution of {protected_class} by {target}",
91
+ xaxis_title=protected_class,
92
+ yaxis_title="Count",
93
+ barmode="stack",
94
+ showlegend=True,
95
+ legend_title=target,
96
+ )
97
+
98
+ figures.append(fig)
99
+
100
+ # Get unique values in the target column
101
+ target_labels = df[target].unique()
102
+
103
+ for category in df[protected_class].unique():
104
+ category_data = df[df[protected_class] == category]
105
+ stats = {
106
+ "Protected Class": protected_class,
107
+ "Category": category,
108
+ "Count": len(category_data),
109
+ "Percentage": len(category_data) / len(df) * 100,
110
+ }
111
+
112
+ # Add mean for each target label
113
+ for label in target_labels:
114
+ label_data = category_data[category_data[target] == label]
115
+ stats[f"Rate {target}: {label}"] = (
116
+ len(label_data) / len(category_data) * 100
117
+ )
118
+
119
+ all_stats.append(stats)
120
+
121
+ # Create a single DataFrame with all statistics
122
+ stats_df = pd.DataFrame(all_stats)
123
+ stats_df = stats_df.round(2) # Round to 2 decimal places for readability
124
+
125
+ # Sort the DataFrame by Protected Class and Count (descending)
126
+ stats_df = stats_df.sort_values(
127
+ ["Protected Class", "Count"], ascending=[True, False]
128
+ )
129
+
130
+ return (stats_df, *tuple(figures))