validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +80 -119
  3. validmind/ai/test_result_description/config.yaml +29 -0
  4. validmind/ai/test_result_description/context.py +73 -0
  5. validmind/ai/test_result_description/image_processing.py +124 -0
  6. validmind/ai/test_result_description/system.jinja +39 -0
  7. validmind/ai/test_result_description/user.jinja +25 -0
  8. validmind/api_client.py +89 -43
  9. validmind/client.py +2 -2
  10. validmind/client_config.py +11 -14
  11. validmind/datasets/credit_risk/__init__.py +1 -0
  12. validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
  13. validmind/datasets/credit_risk/lending_club_bias.py +142 -0
  14. validmind/datasets/regression/fred_timeseries.py +67 -138
  15. validmind/template.py +1 -0
  16. validmind/test_suites/__init__.py +0 -2
  17. validmind/test_suites/statsmodels_timeseries.py +1 -1
  18. validmind/test_suites/summarization.py +0 -1
  19. validmind/test_suites/time_series.py +0 -43
  20. validmind/tests/__types__.py +14 -15
  21. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  22. validmind/tests/data_validation/ADF.py +31 -24
  23. validmind/tests/data_validation/AutoAR.py +9 -9
  24. validmind/tests/data_validation/AutoMA.py +23 -16
  25. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  26. validmind/tests/data_validation/AutoStationarity.py +21 -16
  27. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  28. validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +34 -34
  29. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +85 -124
  30. validmind/tests/data_validation/ClassImbalance.py +15 -12
  31. validmind/tests/data_validation/DFGLSArch.py +19 -13
  32. validmind/tests/data_validation/DatasetDescription.py +17 -11
  33. validmind/tests/data_validation/DatasetSplit.py +7 -5
  34. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  35. validmind/tests/data_validation/Duplicates.py +33 -25
  36. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  37. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  38. validmind/tests/data_validation/HighCardinality.py +19 -12
  39. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  40. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  41. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  42. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  43. validmind/tests/data_validation/JarqueBera.py +70 -0
  44. validmind/tests/data_validation/KPSS.py +34 -29
  45. validmind/tests/data_validation/LJungBox.py +66 -0
  46. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  47. validmind/tests/data_validation/MissingValues.py +32 -27
  48. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  49. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  50. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  51. validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
  52. validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
  53. validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
  54. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
  55. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  56. validmind/tests/data_validation/RunsTest.py +72 -0
  57. validmind/tests/data_validation/ScatterPlot.py +63 -78
  58. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  59. validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +35 -30
  60. validmind/tests/data_validation/Skewness.py +35 -37
  61. validmind/tests/data_validation/SpreadPlot.py +35 -35
  62. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  63. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  64. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  65. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  66. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  67. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  68. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  69. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  70. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  71. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  72. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  73. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  74. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  75. validmind/tests/data_validation/UniqueRows.py +11 -6
  76. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  77. validmind/tests/data_validation/WOEBinTable.py +35 -30
  78. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  79. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  80. validmind/tests/data_validation/nlp/Hashtags.py +42 -40
  81. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  82. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  83. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  84. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  85. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  86. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  87. validmind/tests/data_validation/nlp/TextDescription.py +39 -36
  88. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  89. validmind/tests/decorator.py +81 -42
  90. validmind/tests/model_validation/BertScore.py +36 -27
  91. validmind/tests/model_validation/BleuScore.py +25 -19
  92. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  93. validmind/tests/model_validation/ContextualRecall.py +38 -13
  94. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  95. validmind/tests/model_validation/MeteorScore.py +46 -33
  96. validmind/tests/model_validation/ModelMetadata.py +32 -64
  97. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  98. validmind/tests/model_validation/RegardScore.py +30 -14
  99. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  100. validmind/tests/model_validation/RougeScore.py +36 -30
  101. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  102. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  103. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  104. validmind/tests/model_validation/TokenDisparity.py +31 -23
  105. validmind/tests/model_validation/ToxicityScore.py +26 -17
  106. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  107. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  108. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  109. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  110. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  111. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  112. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  113. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  114. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  115. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  116. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  117. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  118. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  119. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  120. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  121. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  122. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  123. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  124. validmind/tests/model_validation/ragas/AspectCritique.py +12 -6
  125. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  126. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  127. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  128. validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
  129. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  130. validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
  131. validmind/tests/model_validation/ragas/utils.py +6 -0
  132. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  133. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  134. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  135. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  136. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  137. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  138. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  139. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  140. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  141. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  142. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  143. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  144. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  145. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  146. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  147. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  148. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  149. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  150. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +32 -26
  151. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  152. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  153. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  154. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  155. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  156. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  157. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -94
  158. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  159. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
  160. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +66 -5
  161. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  162. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  163. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  164. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  165. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  166. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  167. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +59 -32
  168. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  169. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  170. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  171. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +86 -119
  172. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  173. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  174. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  175. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  176. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  177. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  178. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  179. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  180. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  181. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  182. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  183. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  184. validmind/tests/prompt_validation/Bias.py +14 -11
  185. validmind/tests/prompt_validation/Clarity.py +16 -14
  186. validmind/tests/prompt_validation/Conciseness.py +7 -5
  187. validmind/tests/prompt_validation/Delimitation.py +23 -22
  188. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  189. validmind/tests/prompt_validation/Robustness.py +12 -10
  190. validmind/tests/prompt_validation/Specificity.py +13 -11
  191. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  192. validmind/tests/run.py +68 -23
  193. validmind/unit_metrics/__init__.py +81 -144
  194. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  195. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  196. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  197. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  198. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  199. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  200. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  201. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  202. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  203. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  204. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  205. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  206. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  207. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  208. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  209. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  210. validmind/utils.py +4 -0
  211. validmind/vm_models/dataset/dataset.py +2 -0
  212. validmind/vm_models/figure.py +5 -0
  213. validmind/vm_models/test/metric.py +1 -0
  214. validmind/vm_models/test/result_wrapper.py +143 -158
  215. validmind/vm_models/test/threshold_test.py +1 -0
  216. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/METADATA +4 -3
  217. validmind-2.5.18.dist-info/RECORD +324 -0
  218. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  219. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  220. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  221. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  222. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  223. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  224. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  225. validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -73
  226. validmind/tests/model_validation/statsmodels/LJungBox.py +0 -66
  227. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  228. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  229. validmind/tests/model_validation/statsmodels/RunsTest.py +0 -71
  230. validmind-2.5.8.dist-info/RECORD +0 -318
  231. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/LICENSE +0 -0
  232. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/WHEEL +0 -0
  233. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/entry_points.txt +0 -0
@@ -17,36 +17,43 @@ logger = get_logger(__name__)
17
17
  @dataclass
18
18
  class PhillipsPerronArch(Metric):
19
19
  """
20
- Executes Phillips-Perron test to assess the stationarity of time series data in each ML model feature.
21
-
22
- **Purpose**: The Phillips-Perron (PP) test is used to establish the order of integration in time series data,
23
- testing a null hypothesis that a time series is unit-root non-stationary. This is vital in forecasting and
24
- understanding the stochastic behavior of data within machine learning models. Essentially, the PP test aids in
25
- confirming the robustness of results and generating valid predictions from regression analysis models.
26
-
27
- **Test Mechanism**: The PP test is conducted for each feature in the dataset. A data frame is created from the
28
- dataset, and for each column in this frame, the PhillipsPerron method calculates the statistic value, p-value, used
29
- lags, and number of observations. This process computes the PP metric for each feature and stores the results for
30
- future reference.
31
-
32
- **Signs of High Risk**:
33
- - A high P-value could imply that the series has a unit root and is therefore non-stationary.
34
- - Test statistic values that surpass critical values indicate additional evidence of non-stationarity.
35
- - A high 'usedlag' value for a series could point towards autocorrelation issues which could further impede the
36
- model's performance.
37
-
38
- **Strengths**:
39
- - Resilience against heteroskedasticity in the error term is a significant strength of the PP test.
40
- - Its capacity to handle long time series data.
41
- - Its ability to determine whether the time series is stationary or not, influencing the selection of suitable
42
- models for forecasting.
43
-
44
- **Limitations**:
45
- - The PP test can only be employed within a univariate time series framework.
46
- - The test relies on asymptotic theory, which means the test's power can significantly diminish for small sample
47
- sizes.
48
- - The need to convert non-stationary time series into stationary series through differencing might lead to loss of
49
- vital data points.
20
+ Assesses the stationarity of time series data in each feature of the ML model using the Phillips-Perron test.
21
+
22
+ ### Purpose
23
+
24
+ The Phillips-Perron (PP) test is used to determine the stationarity of time series data for each feature in a
25
+ dataset, which is crucial for forecasting tasks. It tests the null hypothesis that a time series is unit-root
26
+ non-stationary. This is vital for understanding the stochastic behavior of the data and ensuring the robustness and
27
+ validity of predictions generated by regression analysis models.
28
+
29
+ ### Test Mechanism
30
+
31
+ The PP test is conducted for each feature in the dataset as follows:
32
+ - A data frame is created from the dataset.
33
+ - For each column, the Phillips-Perron method calculates the test statistic, p-value, lags used, and number of
34
+ observations.
35
+ - The results are then stored for each feature, providing a metric that indicates the stationarity of the time
36
+ series data.
37
+
38
+ ### Signs of High Risk
39
+
40
+ - A high p-value, indicating that the series has a unit root and is non-stationary.
41
+ - Test statistic values exceeding critical values, suggesting non-stationarity.
42
+ - High 'usedlag' value, pointing towards autocorrelation issues that may degrade model performance.
43
+
44
+ ### Strengths
45
+
46
+ - Resilience against heteroskedasticity in the error term.
47
+ - Effective for long time series data.
48
+ - Helps in determining whether the time series is stationary, aiding in the selection of suitable forecasting
49
+ models.
50
+
51
+ ### Limitations
52
+
53
+ - Applicable only within a univariate time series framework.
54
+ - Relies on asymptotic theory, which may reduce the test’s power for small sample sizes.
55
+ - Non-stationary time series must be converted to stationary series through differencing, potentially leading to
56
+ loss of important data points.
50
57
  """
51
58
 
52
59
  name = "phillips_perron"
@@ -0,0 +1,197 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import sys
6
+
7
+ import pandas as pd
8
+ import plotly.graph_objects as go
9
+ import plotly.subplots as sp
10
+ from fairlearn.metrics import (
11
+ MetricFrame,
12
+ count,
13
+ demographic_parity_ratio,
14
+ equalized_odds_ratio,
15
+ false_positive_rate,
16
+ selection_rate,
17
+ true_positive_rate,
18
+ )
19
+
20
+ from validmind import tags, tasks
21
+ from validmind.logging import get_logger
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ @tags("bias_and_fairness")
27
+ @tasks("classification", "regression")
28
+ def ProtectedClassesCombination(dataset, model, protected_classes=None):
29
+ """
30
+ Visualizes combinations of protected classes and their corresponding error metric differences.
31
+
32
+ ### Purpose
33
+
34
+ This test aims to provide insights into how different combinations of protected classes affect various error metrics,
35
+ particularly the false negative rate (FNR) and false positive rate (FPR). By visualizing these combinations,
36
+ it helps identify potential biases or disparities in model performance across different intersectional groups.
37
+
38
+ ### Test Mechanism
39
+
40
+ The test performs the following steps:
41
+ 1. Combines the specified protected class columns to create a single multi-class category.
42
+ 2. Calculates error metrics (FNR, FPR, etc.) for each combination of protected classes.
43
+ 3. Generates visualizations showing the distribution of these metrics across all class combinations.
44
+
45
+ ### Signs of High Risk
46
+
47
+ - Large disparities in FNR or FPR across different protected class combinations.
48
+ - Consistent patterns of higher error rates for specific combinations of protected attributes.
49
+ - Unexpected or unexplainable variations in error metrics between similar group combinations.
50
+
51
+ ### Strengths
52
+
53
+ - Provides a comprehensive view of intersectional fairness across multiple protected attributes.
54
+ - Allows for easy identification of potentially problematic combinations of protected classes.
55
+ - Visualizations make it easier to spot patterns or outliers in model performance across groups.
56
+
57
+ ### Limitations
58
+
59
+ - May become complex and difficult to interpret with a large number of protected classes or combinations.
60
+ - Does not provide statistical significance of observed differences.
61
+ - Visualization alone may not capture all nuances of intersectional fairness.
62
+ """
63
+
64
+ if sys.version_info < (3, 9):
65
+ raise RuntimeError("This test requires Python 3.9 or higher.")
66
+
67
+ if protected_classes is None:
68
+ logger.warning(
69
+ "No protected classes provided. Please pass the 'protected_classes' parameter to run this test."
70
+ )
71
+ return pd.DataFrame()
72
+
73
+ # Construct a function dictionary for figures
74
+ my_metrics = {
75
+ "fpr": false_positive_rate,
76
+ "tpr": true_positive_rate,
77
+ "selection rate": selection_rate,
78
+ "count": count,
79
+ }
80
+
81
+ # Construct a MetricFrame for figures
82
+ mf = MetricFrame(
83
+ metrics=my_metrics,
84
+ y_true=dataset.y,
85
+ y_pred=dataset.y_pred(model),
86
+ sensitive_features=dataset._df[protected_classes],
87
+ )
88
+
89
+ # Combine protected class columns to create a single multi-class category for the x-axis
90
+ metrics_by_group = mf.by_group.reset_index()
91
+ metrics_by_group["class_combination"] = metrics_by_group[protected_classes].apply(
92
+ lambda row: ", ".join(row.values.astype(str)), axis=1
93
+ )
94
+
95
+ # Create the subplots for the bar plots
96
+ fig = sp.make_subplots(
97
+ rows=2,
98
+ cols=2,
99
+ subplot_titles=[
100
+ "False Positive Rate",
101
+ "True Positive Rate",
102
+ "Selection Rate",
103
+ "Count",
104
+ ],
105
+ )
106
+
107
+ # Add bar plots for each metric
108
+ fig.add_trace(
109
+ go.Bar(
110
+ x=metrics_by_group["class_combination"],
111
+ y=metrics_by_group["fpr"],
112
+ name="FPR",
113
+ ),
114
+ row=1,
115
+ col=1,
116
+ )
117
+ fig.add_trace(
118
+ go.Bar(
119
+ x=metrics_by_group["class_combination"],
120
+ y=metrics_by_group["tpr"],
121
+ name="TPR",
122
+ ),
123
+ row=1,
124
+ col=2,
125
+ )
126
+ fig.add_trace(
127
+ go.Bar(
128
+ x=metrics_by_group["class_combination"],
129
+ y=metrics_by_group["selection rate"],
130
+ name="Selection Rate",
131
+ ),
132
+ row=2,
133
+ col=1,
134
+ )
135
+ fig.add_trace(
136
+ go.Bar(
137
+ x=metrics_by_group["class_combination"],
138
+ y=metrics_by_group["count"],
139
+ name="Count",
140
+ ),
141
+ row=2,
142
+ col=2,
143
+ )
144
+
145
+ # Update layout of the figure to match the original style
146
+ fig.update_layout(
147
+ title="Show all metrics",
148
+ height=800,
149
+ width=900,
150
+ barmode="group",
151
+ legend=dict(orientation="h", yanchor="bottom", y=-0.3, xanchor="center", x=0.5),
152
+ margin=dict(t=50),
153
+ font=dict(size=12),
154
+ )
155
+
156
+ # Rotate x-axis labels for better readability
157
+ fig.update_xaxes(tickangle=45, row=1, col=1)
158
+ fig.update_xaxes(tickangle=45, row=1, col=2)
159
+ fig.update_xaxes(tickangle=45, row=2, col=1)
160
+ fig.update_xaxes(tickangle=45, row=2, col=2)
161
+
162
+ # Extract demographic parity ratio and equalized odds ratio
163
+ m_dpr = []
164
+ m_eqo = []
165
+ for protected_class in protected_classes:
166
+ m_dpr.append(
167
+ demographic_parity_ratio(
168
+ y_true=dataset.y,
169
+ y_pred=dataset.y_pred(model),
170
+ sensitive_features=dataset._df[[protected_class]],
171
+ )
172
+ )
173
+ m_eqo.append(
174
+ equalized_odds_ratio(
175
+ y_true=dataset.y,
176
+ y_pred=dataset.y_pred(model),
177
+ sensitive_features=dataset._df[[protected_class]],
178
+ )
179
+ )
180
+
181
+ # Create a DataFrame for the demographic parity and equalized odds ratio
182
+ dpr_eor_df = pd.DataFrame(
183
+ columns=protected_classes,
184
+ index=["demographic parity ratio", "equal odds ratio"],
185
+ )
186
+
187
+ for i in range(len(m_dpr)):
188
+ dpr_eor_df[protected_classes[i]]["demographic parity ratio"] = round(
189
+ m_dpr[i], 2
190
+ )
191
+ dpr_eor_df[protected_classes[i]]["equal odds ratio"] = round(m_eqo[i], 2)
192
+
193
+ return (
194
+ {"Class Combination Table": metrics_by_group},
195
+ {"DPR and EOR table": dpr_eor_df},
196
+ fig,
197
+ )
@@ -0,0 +1,130 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+
6
+ import pandas as pd
7
+ import plotly.graph_objects as go
8
+
9
+ from validmind import tags, tasks
10
+ from validmind.logging import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ @tags("bias_and_fairness", "descriptive_statistics")
16
+ @tasks("classification", "regression")
17
+ def ProtectedClassesDescription(dataset, protected_classes=None):
18
+ """
19
+ Visualizes the distribution of protected classes in the dataset relative to the target variable
20
+ and provides descriptive statistics.
21
+
22
+ ### Purpose
23
+
24
+ The ProtectedClassesDescription test aims to identify potential biases or significant differences in the
25
+ distribution of target outcomes across different protected classes. This visualization and statistical summary
26
+ help in understanding the relationship between protected attributes and the target variable, which is crucial
27
+ for assessing fairness in machine learning models.
28
+
29
+ ### Test Mechanism
30
+
31
+ The function creates interactive stacked bar charts for each specified protected class using Plotly.
32
+ Additionally, it generates a single table of descriptive statistics for all protected classes, including:
33
+ - Protected class and category
34
+ - Count and percentage of each category within the protected class
35
+ - Mean, median, and mode of the target variable for each category
36
+ - Standard deviation of the target variable for each category
37
+ - Minimum and maximum values of the target variable for each category
38
+
39
+ ### Signs of High Risk
40
+
41
+ - Significant imbalances in the distribution of target outcomes across different categories of a protected class.
42
+ - Large disparities in mean, median, or mode of the target variable across categories.
43
+ - Underrepresentation or overrepresentation of certain groups within protected classes.
44
+ - High standard deviations in certain categories, indicating potential volatility or outliers.
45
+
46
+ ### Strengths
47
+
48
+ - Provides both visual and statistical representation of potential biases in the dataset.
49
+ - Allows for easy identification of imbalances in target variable distribution across protected classes.
50
+ - Interactive plots enable detailed exploration of the data.
51
+ - Consolidated statistical summary provides quantitative measures to complement visual analysis.
52
+ - Applicable to both classification and regression tasks.
53
+
54
+ ### Limitations
55
+
56
+ - Does not provide advanced statistical measures of bias or fairness.
57
+ - May become cluttered if there are many categories within a protected class or many unique target values.
58
+ - Interpretation may require domain expertise to understand the implications of observed disparities.
59
+ - Does not account for intersectionality or complex interactions between multiple protected attributes.
60
+ """
61
+
62
+ if protected_classes is None:
63
+ logger.warning(
64
+ "No protected classes provided. Please pass the 'protected_classes' parameter to run this test."
65
+ )
66
+ return pd.DataFrame()
67
+
68
+ figures = []
69
+ all_stats = []
70
+
71
+ df = dataset._df
72
+ target = dataset.target_column
73
+
74
+ for protected_class in protected_classes:
75
+ # Create the stacked bar chart
76
+ counts = df.groupby([protected_class, target]).size().unstack(fill_value=0)
77
+ fig = go.Figure()
78
+ for col in counts.columns:
79
+ fig.add_trace(
80
+ go.Bar(
81
+ x=counts.index,
82
+ y=counts[col],
83
+ name=str(col),
84
+ text=counts[col],
85
+ textposition="auto",
86
+ )
87
+ )
88
+
89
+ fig.update_layout(
90
+ title=f"Distribution of {protected_class} by {target}",
91
+ xaxis_title=protected_class,
92
+ yaxis_title="Count",
93
+ barmode="stack",
94
+ showlegend=True,
95
+ legend_title=target,
96
+ )
97
+
98
+ figures.append(fig)
99
+
100
+ # Get unique values in the target column
101
+ target_labels = df[target].unique()
102
+
103
+ for category in df[protected_class].unique():
104
+ category_data = df[df[protected_class] == category]
105
+ stats = {
106
+ "Protected Class": protected_class,
107
+ "Category": category,
108
+ "Count": len(category_data),
109
+ "Percentage": len(category_data) / len(df) * 100,
110
+ }
111
+
112
+ # Add mean for each target label
113
+ for label in target_labels:
114
+ label_data = category_data[category_data[target] == label]
115
+ stats[f"Rate {target}: {label}"] = (
116
+ len(label_data) / len(category_data) * 100
117
+ )
118
+
119
+ all_stats.append(stats)
120
+
121
+ # Create a single DataFrame with all statistics
122
+ stats_df = pd.DataFrame(all_stats)
123
+ stats_df = stats_df.round(2) # Round to 2 decimal places for readability
124
+
125
+ # Sort the DataFrame by Protected Class and Count (descending)
126
+ stats_df = stats_df.sort_values(
127
+ ["Protected Class", "Count"], ascending=[True, False]
128
+ )
129
+
130
+ return (stats_df, *tuple(figures))
@@ -0,0 +1,133 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import io
6
+ import sys
7
+
8
+ import aequitas.plot as ap
9
+ import pandas as pd
10
+ from aequitas.bias import Bias
11
+ from aequitas.group import Group
12
+ from aequitas.plotting import Plot
13
+
14
+ from validmind import tags, tasks
15
+ from validmind.logging import get_logger
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ @tags("bias_and_fairness")
21
+ @tasks("classification", "regression")
22
+ def ProtectedClassesDisparity(
23
+ dataset,
24
+ model,
25
+ protected_classes=None,
26
+ disparity_tolerance=1.25,
27
+ metrics=["fnr", "fpr", "tpr"],
28
+ ):
29
+ """
30
+ Investigates disparities in model performance across different protected class segments.
31
+
32
+ ### Purpose
33
+
34
+ This test aims to identify and quantify potential biases in model outcomes by comparing various performance metrics
35
+ across different segments of protected classes. It helps in assessing whether the model produces discriminatory
36
+ outcomes for certain groups, which is crucial for ensuring fairness in machine learning models.
37
+
38
+ ### Test Mechanism
39
+
40
+ The test performs the following steps:
41
+ 1. Calculates performance metrics (e.g., false negative rate, false positive rate, true positive rate) for each segment
42
+ of the specified protected classes.
43
+ 2. Computes disparity ratios by comparing these metrics between different segments and a reference group.
44
+ 3. Generates visualizations showing the disparities and their relation to a user-defined disparity tolerance threshold.
45
+ 4. Produces a comprehensive table with various disparity metrics for detailed analysis.
46
+
47
+ ### Signs of High Risk
48
+
49
+ - Disparity ratios exceeding the specified disparity tolerance threshold.
50
+ - Consistent patterns of higher error rates or lower performance for specific protected class segments.
51
+ - Statistically significant differences in performance metrics across segments.
52
+
53
+ ### Strengths
54
+
55
+ - Provides a comprehensive view of model fairness across multiple protected attributes and metrics.
56
+ - Allows for easy identification of problematic disparities through visual and tabular representations.
57
+ - Customizable disparity tolerance threshold to align with specific use-case requirements.
58
+ - Applicable to various performance metrics, offering a multi-faceted analysis of model fairness.
59
+
60
+ ### Limitations
61
+
62
+ - Relies on a predefined reference group for each protected class, which may not always be the most appropriate choice.
63
+ - Does not account for intersectionality between different protected attributes.
64
+ - The interpretation of results may require domain expertise to understand the implications of observed disparities.
65
+ """
66
+
67
+ if protected_classes is None:
68
+ logger.warning(
69
+ "No protected classes provided. Please pass the 'protected_classes' parameter to run this test."
70
+ )
71
+ return pd.DataFrame()
72
+
73
+ if sys.version_info < (3, 9):
74
+ raise RuntimeError("This test requires Python 3.9 or higher.")
75
+
76
+ df = dataset._df
77
+
78
+ for protected_class in protected_classes:
79
+ # make the dataset compatible for the python package of interest
80
+ df[protected_class] = pd.Categorical(df[protected_class]).astype("object")
81
+
82
+ df["score"] = dataset.y_pred(model).astype(int)
83
+ df["label_value"] = df[dataset.target_column].astype(int)
84
+
85
+ # let map the attributes for each protected class
86
+ # default use reference that is most observable for dictionary
87
+ attributes_and_reference_groups = {}
88
+ for protected_class in protected_classes:
89
+ attributes_and_reference_groups.update(
90
+ {protected_class: df[protected_class].value_counts().idxmax()}
91
+ )
92
+
93
+ attributes_to_audit = list(attributes_and_reference_groups.keys())
94
+
95
+ # Initialize Aequitas
96
+ g = Group()
97
+ b = Bias()
98
+ aqp = Plot()
99
+
100
+ columns_to_include = (
101
+ protected_classes + [dataset.target_column] + ["score", "label_value"]
102
+ )
103
+
104
+ # get_crosstabs returns a dataframe of the group counts and group value bias metrics.
105
+ xtab, _ = g.get_crosstabs(df[columns_to_include], attr_cols=attributes_to_audit)
106
+ bdf = b.get_disparity_predefined_groups(
107
+ xtab,
108
+ original_df=df[columns_to_include],
109
+ ref_groups_dict=attributes_and_reference_groups,
110
+ alpha=0.05,
111
+ mask_significance=True,
112
+ )
113
+
114
+ plots = []
115
+ for protected_class in protected_classes:
116
+ plot = ap.disparity(
117
+ bdf, metrics, protected_class, fairness_threshold=disparity_tolerance
118
+ )
119
+
120
+ buf = io.BytesIO() # create a bytes array to save the image into in memory
121
+ plot.save(
122
+ buf, format="png"
123
+ ) # as long as the above library is installed, this will work
124
+ plots.append(buf.getvalue())
125
+
126
+ string = "_disparity"
127
+ metrics_adj = [x + string for x in metrics]
128
+
129
+ table = bdf[["attribute_name", "attribute_value"] + b.list_disparities(bdf)]
130
+ plots.append(aqp.plot_disparity_all(bdf, metrics=metrics_adj))
131
+ plots_return = tuple(plots)
132
+
133
+ return (table, *plots_return)