validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +80 -119
  3. validmind/ai/test_result_description/config.yaml +29 -0
  4. validmind/ai/test_result_description/context.py +73 -0
  5. validmind/ai/test_result_description/image_processing.py +124 -0
  6. validmind/ai/test_result_description/system.jinja +39 -0
  7. validmind/ai/test_result_description/user.jinja +25 -0
  8. validmind/api_client.py +89 -43
  9. validmind/client.py +2 -2
  10. validmind/client_config.py +11 -14
  11. validmind/datasets/credit_risk/__init__.py +1 -0
  12. validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
  13. validmind/datasets/credit_risk/lending_club_bias.py +142 -0
  14. validmind/datasets/regression/fred_timeseries.py +67 -138
  15. validmind/template.py +1 -0
  16. validmind/test_suites/__init__.py +0 -2
  17. validmind/test_suites/statsmodels_timeseries.py +1 -1
  18. validmind/test_suites/summarization.py +0 -1
  19. validmind/test_suites/time_series.py +0 -43
  20. validmind/tests/__types__.py +14 -15
  21. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  22. validmind/tests/data_validation/ADF.py +31 -24
  23. validmind/tests/data_validation/AutoAR.py +9 -9
  24. validmind/tests/data_validation/AutoMA.py +23 -16
  25. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  26. validmind/tests/data_validation/AutoStationarity.py +21 -16
  27. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  28. validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +34 -34
  29. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +85 -124
  30. validmind/tests/data_validation/ClassImbalance.py +15 -12
  31. validmind/tests/data_validation/DFGLSArch.py +19 -13
  32. validmind/tests/data_validation/DatasetDescription.py +17 -11
  33. validmind/tests/data_validation/DatasetSplit.py +7 -5
  34. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  35. validmind/tests/data_validation/Duplicates.py +33 -25
  36. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  37. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  38. validmind/tests/data_validation/HighCardinality.py +19 -12
  39. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  40. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  41. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  42. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  43. validmind/tests/data_validation/JarqueBera.py +70 -0
  44. validmind/tests/data_validation/KPSS.py +34 -29
  45. validmind/tests/data_validation/LJungBox.py +66 -0
  46. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  47. validmind/tests/data_validation/MissingValues.py +32 -27
  48. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  49. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  50. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  51. validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
  52. validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
  53. validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
  54. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
  55. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  56. validmind/tests/data_validation/RunsTest.py +72 -0
  57. validmind/tests/data_validation/ScatterPlot.py +63 -78
  58. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  59. validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +35 -30
  60. validmind/tests/data_validation/Skewness.py +35 -37
  61. validmind/tests/data_validation/SpreadPlot.py +35 -35
  62. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  63. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  64. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  65. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  66. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  67. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  68. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  69. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  70. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  71. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  72. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  73. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  74. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  75. validmind/tests/data_validation/UniqueRows.py +11 -6
  76. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  77. validmind/tests/data_validation/WOEBinTable.py +35 -30
  78. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  79. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  80. validmind/tests/data_validation/nlp/Hashtags.py +42 -40
  81. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  82. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  83. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  84. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  85. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  86. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  87. validmind/tests/data_validation/nlp/TextDescription.py +39 -36
  88. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  89. validmind/tests/decorator.py +81 -42
  90. validmind/tests/model_validation/BertScore.py +36 -27
  91. validmind/tests/model_validation/BleuScore.py +25 -19
  92. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  93. validmind/tests/model_validation/ContextualRecall.py +38 -13
  94. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  95. validmind/tests/model_validation/MeteorScore.py +46 -33
  96. validmind/tests/model_validation/ModelMetadata.py +32 -64
  97. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  98. validmind/tests/model_validation/RegardScore.py +30 -14
  99. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  100. validmind/tests/model_validation/RougeScore.py +36 -30
  101. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  102. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  103. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  104. validmind/tests/model_validation/TokenDisparity.py +31 -23
  105. validmind/tests/model_validation/ToxicityScore.py +26 -17
  106. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  107. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  108. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  109. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  110. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  111. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  112. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  113. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  114. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  115. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  116. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  117. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  118. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  119. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  120. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  121. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  122. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  123. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  124. validmind/tests/model_validation/ragas/AspectCritique.py +12 -6
  125. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  126. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  127. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  128. validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
  129. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  130. validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
  131. validmind/tests/model_validation/ragas/utils.py +6 -0
  132. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  133. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  134. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  135. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  136. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  137. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  138. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  139. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  140. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  141. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  142. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  143. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  144. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  145. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  146. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  147. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  148. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  149. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  150. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +32 -26
  151. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  152. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  153. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  154. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  155. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  156. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  157. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -94
  158. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  159. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
  160. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +66 -5
  161. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  162. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  163. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  164. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  165. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  166. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  167. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +59 -32
  168. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  169. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  170. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  171. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +86 -119
  172. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  173. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  174. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  175. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  176. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  177. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  178. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  179. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  180. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  181. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  182. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  183. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  184. validmind/tests/prompt_validation/Bias.py +14 -11
  185. validmind/tests/prompt_validation/Clarity.py +16 -14
  186. validmind/tests/prompt_validation/Conciseness.py +7 -5
  187. validmind/tests/prompt_validation/Delimitation.py +23 -22
  188. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  189. validmind/tests/prompt_validation/Robustness.py +12 -10
  190. validmind/tests/prompt_validation/Specificity.py +13 -11
  191. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  192. validmind/tests/run.py +68 -23
  193. validmind/unit_metrics/__init__.py +81 -144
  194. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  195. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  196. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  197. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  198. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  199. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  200. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  201. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  202. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  203. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  204. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  205. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  206. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  207. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  208. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  209. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  210. validmind/utils.py +4 -0
  211. validmind/vm_models/dataset/dataset.py +2 -0
  212. validmind/vm_models/figure.py +5 -0
  213. validmind/vm_models/test/metric.py +1 -0
  214. validmind/vm_models/test/result_wrapper.py +143 -158
  215. validmind/vm_models/test/threshold_test.py +1 -0
  216. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/METADATA +4 -3
  217. validmind-2.5.18.dist-info/RECORD +324 -0
  218. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  219. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  220. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  221. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  222. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  223. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  224. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  225. validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -73
  226. validmind/tests/model_validation/statsmodels/LJungBox.py +0 -66
  227. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  228. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  229. validmind/tests/model_validation/statsmodels/RunsTest.py +0 -71
  230. validmind-2.5.8.dist-info/RECORD +0 -318
  231. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/LICENSE +0 -0
  232. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/WHEEL +0 -0
  233. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,70 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import pandas as pd
6
+ from statsmodels.stats.stattools import jarque_bera
7
+
8
+ from validmind import tags, tasks
9
+
10
+
11
+ @tasks("classification", "regression")
12
+ @tags("tabular_data", "data_distribution", "statistical_test", "statsmodels")
13
+ def JarqueBera(dataset):
14
+ """
15
+ Assesses normality of dataset features in an ML model using the Jarque-Bera test.
16
+
17
+ ### Purpose
18
+
19
+ The purpose of the Jarque-Bera test as implemented in this metric is to determine if the features in the dataset of
20
+ a given Machine Learning model follow a normal distribution. This is crucial for understanding the distribution and
21
+ behavior of the model's features, as numerous statistical methods assume normal distribution of the data.
22
+
23
+ ### Test Mechanism
24
+
25
+ The test mechanism involves computing the Jarque-Bera statistic, p-value, skew, and kurtosis for each feature in
26
+ the dataset. It utilizes the 'jarque_bera' function from the 'statsmodels' library in Python, storing the results
27
+ in a dictionary. The test evaluates the skewness and kurtosis to ascertain whether the dataset follows a normal
28
+ distribution. A significant p-value (typically less than 0.05) implies that the data does not possess normal
29
+ distribution.
30
+
31
+ ### Signs of High Risk
32
+
33
+ - A high Jarque-Bera statistic and a low p-value (usually less than 0.05) indicate high-risk conditions.
34
+ - Such results suggest the data significantly deviates from a normal distribution. If a machine learning model
35
+ expects feature data to be normally distributed, these findings imply that it may not function as intended.
36
+
37
+ ### Strengths
38
+
39
+ - Provides insights into the shape of the data distribution, helping determine whether a given set of data follows
40
+ a normal distribution.
41
+ - Particularly useful for risk assessment for models that assume a normal distribution of data.
42
+ - By measuring skewness and kurtosis, it provides additional insights into the nature and magnitude of a
43
+ distribution's deviation.
44
+
45
+ ### Limitations
46
+
47
+ - Only checks for normality in the data distribution. It cannot provide insights into other types of distributions.
48
+ - Datasets that aren't normally distributed but follow some other distribution might lead to inaccurate risk
49
+ assessments.
50
+ - Highly sensitive to large sample sizes, often rejecting the null hypothesis (that data is normally distributed)
51
+ even for minor deviations in larger datasets.
52
+ """
53
+
54
+ df = dataset.df[dataset.feature_columns_numeric]
55
+
56
+ jb_values = {}
57
+ for col in df.columns:
58
+ jb_stat, jb_pvalue, jb_skew, jb_kurtosis = jarque_bera(df[col].values)
59
+ jb_values[col] = {
60
+ "stat": jb_stat,
61
+ "pvalue": jb_pvalue,
62
+ "skew": jb_skew,
63
+ "kurtosis": jb_kurtosis,
64
+ }
65
+
66
+ jb_df = pd.DataFrame.from_dict(jb_values, orient="index")
67
+ jb_df.reset_index(inplace=True)
68
+ jb_df.columns = ["column", "stat", "pvalue", "skew", "kurtosis"]
69
+
70
+ return jb_df
@@ -16,35 +16,40 @@ logger = get_logger(__name__)
16
16
  @dataclass
17
17
  class KPSS(Metric):
18
18
  """
19
- Executes KPSS unit root test to validate stationarity of time-series data in machine learning model.
20
-
21
- **Purpose**: The Kwiatkowski-Phillips-Schmidt-Shin (KPSS) unit root test is utilized to ensure the stationarity of
22
- data within the machine learning model. It specifically works on time-series data to establish the order of
23
- integration, which is a prime requirement for accurate forecasting, given the fundamental condition for any time
24
- series model is that the series should be stationary.
25
-
26
- **Test Mechanism**: This metric evaluates the KPSS score for every feature present in the dataset. Within the KPSS
27
- score, there are various components, namely: a statistic, a p-value, a used lag, and critical values. The core
28
- scheme behind the KPSS score is to test the hypothesis that an observable time series is stationary around a
29
- deterministic trend. If the computed statistic surpasses the critical value, the null hypothesis is dismissed,
30
- inferring the series is non-stationary.
31
-
32
- **Signs of High Risk**:
33
- - High KPSS score represents a considerable risk, particularly if the calculated statistic is higher than the
34
- critical value.
35
- - If the null hypothesis is rejected and the series is recognized as non-stationary, it heavily influences the
36
- model's forecasting capability rendering it less effective.
37
-
38
- **Strengths**:
39
- - The KPSS test directly measures the stationarity of a series, allowing it to fulfill a key prerequisite for many
40
- time-series models, making it a valuable tool for model validation.
41
- - The logics underpinning the test are intuitive and simple, making it understandable and accessible for developers
42
- and risk management teams.
43
-
44
- **Limitations**:
45
- - The KPSS test presumes the absence of a unit root in the series and does not differentiate between series that
46
- are stationary and those border-lining stationarity.
47
- - The test might show restricted power against specific alternatives.
19
+ Assesses the stationarity of time-series data in a machine learning model using the KPSS unit root test.
20
+
21
+ ### Purpose
22
+
23
+ The KPSS (Kwiatkowski-Phillips-Schmidt-Shin) unit root test is utilized to ensure the stationarity of data within a
24
+ machine learning model. It specifically works on time-series data to establish the order of integration, which is
25
+ essential for accurate forecasting. A fundamental requirement for any time series model is that the series should
26
+ be stationary.
27
+
28
+ ### Test Mechanism
29
+
30
+ This test calculates the KPSS score for each feature in the dataset. The KPSS score includes a statistic, a
31
+ p-value, a used lag, and critical values. The core principle behind the KPSS test is to evaluate the hypothesis
32
+ that an observable time series is stationary around a deterministic trend. If the computed statistic exceeds the
33
+ critical value, the null hypothesis (that the series is stationary) is rejected, indicating that the series is
34
+ non-stationary.
35
+
36
+ ### Signs of High Risk
37
+
38
+ - High KPSS score, particularly if the calculated statistic is higher than the critical value.
39
+ - Rejection of the null hypothesis, indicating that the series is recognized as non-stationary, can severely affect
40
+ the model's forecasting capability.
41
+
42
+ ### Strengths
43
+
44
+ - Directly measures the stationarity of a series, fulfilling a key prerequisite for many time-series models.
45
+ - The underlying logic of the test is intuitive and simple, making it easy to understand and accessible for both
46
+ developers and risk management teams.
47
+
48
+ ### Limitations
49
+
50
+ - Assumes the absence of a unit root in the series and doesn't differentiate between series that are stationary and
51
+ those border-lining stationarity.
52
+ - The test may have restricted power against certain alternatives.
48
53
  - The reliability of the test is contingent on the number of lags selected, which introduces potential bias in the
49
54
  measurement.
50
55
  """
@@ -0,0 +1,66 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import pandas as pd
6
+ from statsmodels.stats.diagnostic import acorr_ljungbox
7
+
8
+ from validmind import tags, tasks
9
+
10
+
11
+ @tasks("regression")
12
+ @tags("time_series_data", "forecasting", "statistical_test", "statsmodels")
13
+ def LJungBox(dataset):
14
+ """
15
+ Assesses autocorrelations in dataset features by performing a Ljung-Box test on each feature.
16
+
17
+ ### Purpose
18
+
19
+ The Ljung-Box test is a type of statistical test utilized to ascertain whether there are autocorrelations within a
20
+ given dataset that differ significantly from zero. In the context of a machine learning model, this test is
21
+ primarily used to evaluate data utilized in regression tasks, especially those involving time series and
22
+ forecasting.
23
+
24
+ ### Test Mechanism
25
+
26
+ The test operates by iterating over each feature within the dataset and applying the `acorr_ljungbox`
27
+ function from the `statsmodels.stats.diagnostic` library. This function calculates the Ljung-Box statistic and
28
+ p-value for each feature. These results are then stored in a pandas DataFrame where the columns are the feature names,
29
+ statistic, and p-value respectively. Generally, a lower p-value indicates a higher likelihood of significant
30
+ autocorrelations within the feature.
31
+
32
+ ### Signs of High Risk
33
+
34
+ - High Ljung-Box statistic values or low p-values.
35
+ - Presence of significant autocorrelations in the respective features.
36
+ - Potential for negative impact on model performance or bias if autocorrelations are not properly handled.
37
+
38
+ ### Strengths
39
+
40
+ - Powerful tool for detecting autocorrelations within datasets, especially in time series data.
41
+ - Provides quantitative measures (statistic and p-value) for precise evaluation.
42
+ - Helps avoid issues related to autoregressive residuals and other challenges in regression models.
43
+
44
+ ### Limitations
45
+
46
+ - Cannot detect all types of non-linearity or complex interrelationships among variables.
47
+ - Testing individual features may not fully encapsulate the dynamics of the data if features interact with each other.
48
+ - Designed more for traditional statistical models and may not be fully compatible with certain types of complex
49
+ machine learning models.
50
+ """
51
+
52
+ df = dataset.df
53
+
54
+ ljung_box_values = {}
55
+ for col in df.columns:
56
+ lb_results = acorr_ljungbox(df[col].values, return_df=True)
57
+ ljung_box_values[col] = {
58
+ "stat": lb_results.iloc[0]["lb_stat"],
59
+ "pvalue": lb_results.iloc[0]["lb_pvalue"],
60
+ }
61
+
62
+ ljung_box_df = pd.DataFrame.from_dict(ljung_box_values, orient="index")
63
+ ljung_box_df.reset_index(inplace=True)
64
+ ljung_box_df.columns = ["column", "stat", "pvalue"]
65
+
66
+ return ljung_box_df
@@ -17,30 +17,37 @@ class LaggedCorrelationHeatmap(Metric):
17
17
  Assesses and visualizes correlation between target variable and lagged independent variables in a time-series
18
18
  dataset.
19
19
 
20
- **Purpose**: The LaggedCorrelationHeatmap metric is utilized to appraise and illustrate the correlation between the
21
- target variable and delayed copies (lags) of independent variables in a time-series dataset. It assists in
22
- revealing relationships in time-series data where the influence of an independent variable on the dependent
23
- variable is not immediate but occurs after a period (lags).
24
-
25
- **Test Mechanism**: To execute this test, Python's Pandas library pairs with Plotly to perform computations and
26
- present the visualization in the form of a heatmap. The test begins by extracting the target variable and
27
- corresponding independent variables from the dataset. Then, generation of lags of independent variables takes
28
- place, followed by the calculation of correlation between these lagged variables and the target variable. The
29
- outcome is a correlation matrix that gets recorded and illustrated as a heatmap, where different color intensities
30
- represent the strength of the correlation, making patterns easier to identify.
31
-
32
- **Signs of High Risk**:
20
+ ### Purpose
21
+
22
+ The LaggedCorrelationHeatmap metric is utilized to appraise and illustrate the correlation between the target
23
+ variable and delayed copies (lags) of independent variables in a time-series dataset. It assists in revealing
24
+ relationships in time-series data where the influence of an independent variable on the dependent variable is not
25
+ immediate but occurs after a period (lags).
26
+
27
+ ### Test Mechanism
28
+
29
+ To execute this test, Python's Pandas library pairs with Plotly to perform computations and present the
30
+ visualization in the form of a heatmap. The test begins by extracting the target variable and corresponding
31
+ independent variables from the dataset. Then, generation of lags of independent variables takes place, followed by
32
+ the calculation of correlation between these lagged variables and the target variable. The outcome is a correlation
33
+ matrix that gets recorded and illustrated as a heatmap, where different color intensities represent the strength of
34
+ the correlation, making patterns easier to identify.
35
+
36
+ ### Signs of High Risk
37
+
33
38
  - Insignificant correlations across the heatmap, indicating a lack of noteworthy relationships between variables.
34
39
  - Correlations that break intuition or previous understanding, suggesting potential issues with the dataset or the
35
40
  model.
36
41
 
37
- **Strengths**:
42
+ ### Strengths
43
+
38
44
  - This metric serves as an exceptional tool for exploring and visualizing time-dependent relationships between
39
45
  features and the target variable in a time-series dataset.
40
46
  - It aids in identifying delayed effects that might go unnoticed with other correlation measures.
41
47
  - The heatmap offers an intuitive visual representation of time-dependent correlations and influences.
42
48
 
43
- **Limitations**:
49
+ ### Limitations
50
+
44
51
  - The metric presumes linear relationships between variables, potentially ignoring non-linear relationships.
45
52
  - The correlation considered is linear; therefore, intricate non-linear interactions might be overlooked.
46
53
  - The metric is only applicable for time-series data, limiting its utility outside of this context.
@@ -19,34 +19,39 @@ class MissingValues(ThresholdTest):
19
19
  """
20
20
  Evaluates dataset quality by ensuring missing value ratio across all features does not exceed a set threshold.
21
21
 
22
- **Purpose**: This test is designed to evaluate the quality of a dataset by measuring the number of missing values
22
+ ### Purpose
23
+
24
+ The Missing Values test is designed to evaluate the quality of a dataset by measuring the number of missing values
23
25
  across all features. The objective is to ensure that the ratio of missing data to total data is less than a
24
- predefined threshold, defaulting to 1, to maintain the data quality necessary for reliable predictive strength in a
25
- machine learning model.
26
-
27
- **Test Mechanism**: The mechanism for this test involves iterating through each column of the dataset, counting
28
- missing values (represented as NaNs), and calculating the percentage they represent against the total number of
29
- rows. The test then checks if these missing value counts are less than the predefined `min_threshold`. The results
30
- are shown in a table summarizing each column, the number of missing values, the percentage of missing values in
31
- each column, and a Pass/Fail status based on the threshold comparison.
32
-
33
- **Signs of High Risk**:
34
- - When the number of missing values in any column exceeds the `min_threshold` value, it indicates a high risk.
35
- - A high risk is also flagged when missing values are present across many columns. In both instances, the test
36
- would return a "Fail" mark.
37
-
38
- **Strengths**:
39
- - The test offers a quick and granular identification of missing data across each feature in the dataset.
40
- - It provides an effective, straightforward means of maintaining data quality, which is vital for constructing
41
- efficient machine learning models.
42
-
43
- **Limitations**:
44
- - Even though the test can efficiently identify missing values, it does not suggest the root causes of these
45
- missing values or recommend ways to impute or handle them.
46
- - The test might overlook features with a significant amount of missing data, but still less than the
47
- `min_threshold`. This could impact the model, especially if `min_threshold` is set too high.
48
- - The test does not account for data encoded as values (like "-999" or "None"), which might not technically
49
- classify as missing but could bear similar implications.
26
+ predefined threshold, defaulting to 1, in order to maintain the data quality necessary for reliable predictive
27
+ strength in a machine learning model.
28
+
29
+ ### Test Mechanism
30
+
31
+ The mechanism for this test involves iterating through each column of the dataset, counting missing values
32
+ (represented as NaNs), and calculating the percentage they represent against the total number of rows. The test
33
+ then checks if these missing value counts are less than the predefined `min_threshold`. The results are shown in a
34
+ table summarizing each column, the number of missing values, the percentage of missing values in each column, and a
35
+ Pass/Fail status based on the threshold comparison.
36
+
37
+ ### Signs of High Risk
38
+
39
+ - When the number of missing values in any column exceeds the `min_threshold` value.
40
+ - Presence of missing values across many columns, leading to multiple instances of failing the threshold.
41
+
42
+ ### Strengths
43
+
44
+ - Quick and granular identification of missing data across each feature in the dataset.
45
+ - Provides an effective and straightforward means of maintaining data quality, essential for constructing efficient
46
+ machine learning models.
47
+
48
+ ### Limitations
49
+
50
+ - Does not suggest the root causes of the missing values or recommend ways to impute or handle them.
51
+ - May overlook features with significant missing data but still less than the `min_threshold`, potentially
52
+ impacting the model.
53
+ - Does not account for data encoded as values like "-999" or "None," which might not technically classify as
54
+ missing but could bear similar implications.
50
55
  """
51
56
 
52
57
  name = "missing"
@@ -12,37 +12,41 @@ from validmind.vm_models import Figure, Metric
12
12
  @dataclass
13
13
  class MissingValuesBarPlot(Metric):
14
14
  """
15
- Creates a bar plot showcasing the percentage of missing values in each column of the dataset with risk
16
- categorization based on a user-defined threshold.
17
-
18
- **Purpose:** The 'MissingValuesBarPlot' metric provides a color-coded visual representation of the percentage of
19
- missing values for each column in an ML model's dataset. The primary purpose of this metric is to easily identify
20
- and quantify missing data, which are essential steps in data preprocessing. The presence of missing data can
21
- potentially skew the model's predictions and decrease its accuracy. Additionally, this metric uses a pre-set
22
- threshold to categorize various columns into ones that contain missing data above the threshold (high risk) and
23
- below the threshold (less risky).
24
-
25
- **Test Mechanism:** The test mechanism involves scanning each column in the input dataset and calculating the
26
- percentage of missing values. It then compares each column's missing data percentage with the predefined threshold,
27
- categorizing columns with missing data above the threshold as high-risk. The test generates a bar plot in which
28
- columns with missing data are represented on the y-axis and their corresponding missing data percentages are
29
- displayed on the x-axis. The color of each bar reflects the missing data percentage in relation to the threshold:
30
- grey for values below the threshold and light coral for those exceeding it. The user-defined threshold is
31
- represented by a red dashed line on the plot.
32
-
33
- **Signs of High Risk:**
15
+ Assesses the percentage and distribution of missing values in the dataset via a bar plot, with emphasis on
16
+ identifying high-risk columns based on a user-defined threshold.
17
+
18
+ ### Purpose
19
+
20
+ The 'MissingValuesBarPlot' metric provides a color-coded visual representation of the percentage of missing values
21
+ for each column in an ML model's dataset. The primary purpose of this metric is to easily identify and quantify
22
+ missing data, which are essential steps in data preprocessing. The presence of missing data can potentially skew
23
+ the model's predictions and decrease its accuracy. Additionally, this metric uses a pre-set threshold to categorize
24
+ various columns into ones that contain missing data above the threshold (high risk) and below the threshold (less
25
+ risky).
26
+
27
+ ### Test Mechanism
28
+
29
+ The test mechanism involves scanning each column in the input dataset and calculating the percentage of missing
30
+ values. It then compares each column's missing data percentage with the predefined threshold, categorizing columns
31
+ with missing data above the threshold as high-risk. The test generates a bar plot in which columns with missing
32
+ data are represented on the y-axis and their corresponding missing data percentages are displayed on the x-axis.
33
+ The color of each bar reflects the missing data percentage in relation to the threshold: grey for values below the
34
+ threshold and light coral for those exceeding it. The user-defined threshold is represented by a red dashed line on
35
+ the plot.
36
+
37
+ ### Signs of High Risk
34
38
 
35
39
  - Columns with higher percentages of missing values beyond the threshold are high-risk. These are visually
36
40
  represented by light coral bars on the bar plot.
37
41
 
38
- **Strengths:**
42
+ ### Strengths
39
43
 
40
44
  - Helps in quickly identifying and quantifying missing data across all columns of the dataset.
41
45
  - Facilitates pattern recognition through visual representation.
42
46
  - Enables customization of the level of risk tolerance via a user-defined threshold.
43
47
  - Supports both classification and regression tasks, sharing its versatility.
44
48
 
45
- **Limitations:**
49
+ ### Limitations
46
50
 
47
51
  - It only considers the quantity of missing values, not differentiating between different types of missingness
48
52
  (Missing completely at random - MCAR, Missing at random - MAR, Not Missing at random - NMAR).
@@ -2,103 +2,90 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
5
 
7
6
  import plotly.graph_objects as go
8
7
 
9
- from validmind.vm_models import Figure, Metric
8
+ from validmind import tags, tasks
10
9
 
11
10
 
12
- @dataclass
13
- class PearsonCorrelationMatrix(Metric):
11
+ @tags("tabular_data", "numerical_data", "correlation")
12
+ @tasks("classification", "regression")
13
+ def PearsonCorrelationMatrix(dataset):
14
14
  """
15
15
  Evaluates linear dependency between numerical variables in a dataset via a Pearson Correlation coefficient heat map.
16
16
 
17
- **Purpose**: This test is intended to evaluate the extent of linear dependency between all pairs of numerical
18
- variables in the given dataset. It provides the Pearson Correlation coefficient, which reveals any high
19
- correlations present. The purpose of doing this is to identify potential redundancy, as variables that are highly
20
- correlated can often be removed to reduce the dimensionality of the dataset without significantly impacting the
21
- model's performance.
17
+ ### Purpose
22
18
 
23
- **Test Mechanism**: This metric test generates a correlation matrix for all numerical variables in the dataset
24
- using the Pearson correlation formula. A heat map is subsequently created to visualize this matrix effectively. The
25
- color of each point on the heat map corresponds to the magnitude and direction (positive or negative) of the
26
- correlation, with a range from -1 (perfect negative correlation) to 1 (perfect positive correlation). Any
27
- correlation coefficients higher than 0.7 (in absolute terms) are indicated in white in the heat map, suggesting a
28
- high degree of correlation.
19
+ This test is intended to evaluate the extent of linear dependency between all pairs of numerical variables in the
20
+ given dataset. It provides the Pearson Correlation coefficient, which reveals any high correlations present. The
21
+ purpose of doing this is to identify potential redundancy, as variables that are highly correlated can often be
22
+ removed to reduce the dimensionality of the dataset without significantly impacting the model's performance.
23
+
24
+ ### Test Mechanism
25
+
26
+ This metric test generates a correlation matrix for all numerical variables in the dataset using the Pearson
27
+ correlation formula. A heat map is subsequently created to visualize this matrix effectively. The color of each
28
+ point on the heat map corresponds to the magnitude and direction (positive or negative) of the correlation, with a
29
+ range from -1 (perfect negative correlation) to 1 (perfect positive correlation). Any correlation coefficients
30
+ higher than 0.7 (in absolute terms) are indicated in white in the heat map, suggesting a high degree of correlation.
31
+
32
+ ### Signs of High Risk
29
33
 
30
- **Signs of High Risk**:
31
34
  - A large number of variables in the dataset showing a high degree of correlation (coefficients approaching ±1).
32
35
  This indicates redundancy within the dataset, suggesting that some variables may not be contributing new
33
36
  information to the model.
34
- - This could potentially lead to overfitting.
35
-
36
- **Strengths**:
37
- - The primary strength of this metric test is its ability to detect and quantify the linearity of relationships
38
- between variables. This allows for the identification of redundant variables, which in turn can help in simplifying
39
- models and potentially improving their performance.
40
- - The visualization aspect (heatmap) is another strength as it offers an easy-to-understand overview of the
41
- correlations, beneficial for those not comfortable navigating numerical matrices.
42
-
43
- **Limitations**:
44
- - The primary limitation of Pearson Correlation is its inability to detect non-linear relationships between
45
- variables, which can lead to missed opportunities for dimensionality reduction.
46
- - It only measures the degree of linear relationship and not the strength of effect of one variable on the other.
47
- - The cutoff value of 0.7 for high correlation is a somewhat arbitrary choice and some valid dependencies might be
48
- missed if they have a correlation coefficient less than this value.
49
- """
37
+ - Potential risk of overfitting.
50
38
 
51
- name = "pearson_correlation_matrix"
52
- required_inputs = ["dataset"]
53
- tasks = ["classification", "regression"]
54
- tags = ["tabular_data", "numerical_data", "correlation"]
55
-
56
- def run(self):
57
- columns = self.params.get("columns", list(self.inputs.dataset.df.columns))
58
-
59
- corr_matrix = self.inputs.dataset.df[columns].corr(numeric_only=True)
60
- heatmap = go.Heatmap(
61
- z=corr_matrix.values,
62
- x=list(corr_matrix.columns),
63
- y=list(corr_matrix.index),
64
- colorscale="rdbu",
65
- zmin=-1,
66
- zmax=1,
67
- )
68
-
69
- annotations = []
70
- for i, row in enumerate(corr_matrix.values):
71
- for j, value in enumerate(row):
72
- color = "#ffffff" if abs(value) > 0.7 else "#000000"
73
- annotations.append(
74
- go.layout.Annotation(
75
- text=str(round(value, 2)),
76
- x=corr_matrix.columns[j],
77
- y=corr_matrix.index[i],
78
- showarrow=False,
79
- font=dict(color=color),
80
- )
81
- )
39
+ ### Strengths
40
+
41
+ - Detects and quantifies the linearity of relationships between variables, aiding in identifying redundant
42
+ variables to simplify models and potentially improve performance.
43
+ - The heatmap visualization provides an easy-to-understand overview of correlations, beneficial for users not
44
+ comfortable with numerical matrices.
45
+
46
+ ### Limitations
47
+
48
+ - Limited to detecting linear relationships, potentially missing non-linear relationships which impede
49
+ opportunities for dimensionality reduction.
50
+ - Measures only the degree of linear relationship, not the strength of one variable's effect on another.
51
+ - The 0.7 correlation threshold is arbitrary and might exclude valid dependencies with lower coefficients.
52
+ """
82
53
 
83
- layout = go.Layout(
84
- annotations=annotations,
85
- xaxis=dict(side="top"),
86
- yaxis=dict(scaleanchor="x", scaleratio=1),
87
- width=800,
88
- height=800,
89
- autosize=True,
90
- paper_bgcolor="rgba(0,0,0,0)",
91
- plot_bgcolor="rgba(0,0,0,0)",
92
- )
93
-
94
- fig = go.Figure(data=[heatmap], layout=layout)
95
-
96
- return self.cache_results(
97
- figures=[
98
- Figure(
99
- for_object=self,
100
- key=self.key,
101
- figure=fig,
54
+ corr_matrix = dataset.df.corr(numeric_only=True)
55
+ heatmap = go.Heatmap(
56
+ z=corr_matrix.values,
57
+ x=list(corr_matrix.columns),
58
+ y=list(corr_matrix.index),
59
+ colorscale="rdbu",
60
+ zmin=-1,
61
+ zmax=1,
62
+ )
63
+
64
+ annotations = []
65
+ for i, row in enumerate(corr_matrix.values):
66
+ for j, value in enumerate(row):
67
+ color = "#ffffff" if abs(value) > 0.7 else "#000000"
68
+ annotations.append(
69
+ go.layout.Annotation(
70
+ text=str(round(value, 2)),
71
+ x=corr_matrix.columns[j],
72
+ y=corr_matrix.index[i],
73
+ showarrow=False,
74
+ font=dict(color=color),
102
75
  )
103
- ]
104
- )
76
+ )
77
+
78
+ layout = go.Layout(
79
+ annotations=annotations,
80
+ xaxis=dict(side="top"),
81
+ yaxis=dict(scaleanchor="x", scaleratio=1),
82
+ width=800,
83
+ height=800,
84
+ autosize=True,
85
+ paper_bgcolor="rgba(0,0,0,0)",
86
+ plot_bgcolor="rgba(0,0,0,0)",
87
+ )
88
+
89
+ fig = go.Figure(data=[heatmap], layout=layout)
90
+
91
+ return fig