validmind 2.5.6__py3-none-any.whl → 2.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +26 -7
  3. validmind/api_client.py +89 -43
  4. validmind/client.py +2 -2
  5. validmind/client_config.py +11 -14
  6. validmind/datasets/regression/fred_timeseries.py +67 -138
  7. validmind/template.py +1 -0
  8. validmind/test_suites/__init__.py +0 -2
  9. validmind/test_suites/statsmodels_timeseries.py +1 -1
  10. validmind/test_suites/summarization.py +0 -1
  11. validmind/test_suites/time_series.py +0 -43
  12. validmind/tests/__types__.py +3 -13
  13. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  14. validmind/tests/data_validation/ADF.py +31 -24
  15. validmind/tests/data_validation/AutoAR.py +9 -9
  16. validmind/tests/data_validation/AutoMA.py +23 -16
  17. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  18. validmind/tests/data_validation/AutoStationarity.py +21 -16
  19. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  20. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +82 -124
  21. validmind/tests/data_validation/ClassImbalance.py +15 -12
  22. validmind/tests/data_validation/DFGLSArch.py +19 -13
  23. validmind/tests/data_validation/DatasetDescription.py +17 -11
  24. validmind/tests/data_validation/DatasetSplit.py +7 -5
  25. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  26. validmind/tests/data_validation/Duplicates.py +33 -25
  27. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  28. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  29. validmind/tests/data_validation/HighCardinality.py +19 -12
  30. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  31. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  32. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  33. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  34. validmind/tests/data_validation/KPSS.py +34 -29
  35. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  36. validmind/tests/data_validation/MissingValues.py +32 -27
  37. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  38. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  39. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  40. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  41. validmind/tests/data_validation/ScatterPlot.py +63 -78
  42. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  43. validmind/tests/data_validation/Skewness.py +35 -37
  44. validmind/tests/data_validation/SpreadPlot.py +35 -35
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  47. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  48. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  49. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  50. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  51. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  52. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  53. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  54. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  55. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  56. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  57. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  58. validmind/tests/data_validation/UniqueRows.py +11 -6
  59. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  60. validmind/tests/data_validation/WOEBinTable.py +35 -30
  61. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  62. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  63. validmind/tests/data_validation/nlp/Hashtags.py +27 -20
  64. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  65. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  66. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  67. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  68. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  69. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  70. validmind/tests/data_validation/nlp/TextDescription.py +36 -35
  71. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  72. validmind/tests/decorator.py +81 -42
  73. validmind/tests/model_validation/BertScore.py +36 -27
  74. validmind/tests/model_validation/BleuScore.py +25 -19
  75. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  76. validmind/tests/model_validation/ContextualRecall.py +35 -13
  77. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  78. validmind/tests/model_validation/MeteorScore.py +46 -33
  79. validmind/tests/model_validation/ModelMetadata.py +32 -64
  80. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  81. validmind/tests/model_validation/RegardScore.py +30 -14
  82. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  83. validmind/tests/model_validation/RougeScore.py +36 -30
  84. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  85. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  86. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  87. validmind/tests/model_validation/TokenDisparity.py +31 -23
  88. validmind/tests/model_validation/ToxicityScore.py +26 -17
  89. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  90. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  91. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  92. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  93. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  94. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  95. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  96. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  97. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  98. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  101. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  102. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  103. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  104. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  105. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  106. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  107. validmind/tests/model_validation/ragas/AspectCritique.py +7 -0
  108. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  109. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  110. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  111. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  112. validmind/tests/model_validation/ragas/utils.py +6 -0
  113. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  114. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  115. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  116. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  117. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  118. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  119. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  120. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  121. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  122. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  123. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  124. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  125. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  126. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  127. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  128. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  129. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  130. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  131. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +31 -25
  132. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  133. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  134. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  135. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  136. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  137. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  138. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -93
  139. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  140. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +113 -73
  141. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -5
  142. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  143. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  144. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  145. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  146. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  147. validmind/tests/model_validation/statsmodels/BoxPierce.py +14 -10
  148. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  149. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +19 -12
  150. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  151. validmind/tests/model_validation/statsmodels/JarqueBera.py +27 -22
  152. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  153. validmind/tests/model_validation/statsmodels/LJungBox.py +32 -28
  154. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  155. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +87 -119
  156. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  157. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  158. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  159. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  160. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  161. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  162. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  163. validmind/tests/model_validation/statsmodels/RunsTest.py +32 -28
  164. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  165. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +15 -8
  166. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  167. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  168. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  169. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  170. validmind/tests/prompt_validation/Bias.py +14 -11
  171. validmind/tests/prompt_validation/Clarity.py +16 -14
  172. validmind/tests/prompt_validation/Conciseness.py +7 -5
  173. validmind/tests/prompt_validation/Delimitation.py +23 -22
  174. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  175. validmind/tests/prompt_validation/Robustness.py +12 -10
  176. validmind/tests/prompt_validation/Specificity.py +13 -11
  177. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  178. validmind/tests/run.py +68 -23
  179. validmind/unit_metrics/__init__.py +81 -144
  180. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  181. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  182. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  183. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  184. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  185. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  186. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  187. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  188. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  189. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  190. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  191. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  192. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  193. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  194. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  195. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  196. validmind/vm_models/dataset/dataset.py +2 -0
  197. validmind/vm_models/figure.py +5 -0
  198. validmind/vm_models/test/result_wrapper.py +93 -132
  199. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/METADATA +1 -1
  200. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/RECORD +203 -210
  201. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  202. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  203. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  204. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  205. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  206. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  207. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  208. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  209. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  210. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/LICENSE +0 -0
  211. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/WHEEL +0 -0
  212. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/entry_points.txt +0 -0
@@ -28,6 +28,6 @@ class RegressionModelsEvaluation(TestSuite):
28
28
 
29
29
  suite_id = "regression_models_evaluation"
30
30
  tests = [
31
- "validmind.model_validation.statsmodels.RegressionModelsCoeffs",
31
+ "validmind.model_validation.statsmodels.RegressionModelCoeffs",
32
32
  "validmind.model_validation.sklearn.RegressionModelsPerformanceComparison",
33
33
  ]
@@ -16,7 +16,6 @@ class SummarizationMetrics(TestSuite):
16
16
 
17
17
  suite_id = "summarization_metrics"
18
18
  tests = [
19
- "validmind.model_validation.RougeMetrics",
20
19
  "validmind.model_validation.TokenDisparity",
21
20
  "validmind.model_validation.BleuScore",
22
21
  "validmind.model_validation.BertScore",
@@ -77,39 +77,6 @@ class TimeSeriesMultivariate(TestSuite):
77
77
  ]
78
78
 
79
79
 
80
- class TimeSeriesForecast(TestSuite):
81
- """
82
- This test suite computes predictions from statsmodels OLS linear regression models
83
- against a list of models and plots the historical data alongside the forecasted data.
84
- The purpose of this test suite is to evaluate the performance of each model in predicting
85
- future values of a time series based on historical data. By comparing the historical
86
- values with the forecasted values, users can visually assess the accuracy of each model
87
- and determine which one best fits the data. In addition, this test suite can help users
88
- identify any discrepancies between the models and the actual data, allowing for potential
89
- improvements in model selection and parameter tuning.
90
- """
91
-
92
- suite_id = "time_series_forecast"
93
- tests = ["validmind.model_validation.statsmodels.RegressionModelForecastPlotLevels"]
94
-
95
-
96
- class TimeSeriesSensitivity(TestSuite):
97
- """
98
- This test suite performs sensitivity analysis on a statsmodels OLS linear regression model
99
- by applying distinct shocks to each input variable individually and then computing the
100
- model's predictions. The aim of this test suite is to investigate the model's responsiveness
101
- to variations in its inputs. By juxtaposing the model's predictions under baseline and shocked
102
- conditions, users can visually evaluate the sensitivity of the model to changes in each
103
- variable. This kind of analysis can also shed light on potential model limitations, including
104
- over-reliance on specific variables or insufficient responsiveness to changes in inputs. As a
105
- result, this test suite can provide insights that may be beneficial for refining the model
106
- structure, improving its robustness, and ensuring a more reliable prediction performance.
107
- """
108
-
109
- suite_id = "time_series_sensitivity"
110
- tests = ["validmind.model_validation.statsmodels.RegressionModelSensitivityPlot"]
111
-
112
-
113
80
  class TimeSeriesDataset(TestSuite):
114
81
  """
115
82
  Test suite for time series datasets.
@@ -152,14 +119,4 @@ class TimeSeriesModelValidation(TestSuite):
152
119
  "section_description": RegressionModelsEvaluation.__doc__,
153
120
  "section_tests": RegressionModelsEvaluation.tests,
154
121
  },
155
- {
156
- "section_id": TimeSeriesForecast.suite_id,
157
- "section_description": TimeSeriesForecast.__doc__,
158
- "section_tests": TimeSeriesForecast.tests,
159
- },
160
- {
161
- "section_id": TimeSeriesSensitivity.suite_id,
162
- "section_description": TimeSeriesSensitivity.__doc__,
163
- "section_tests": TimeSeriesSensitivity.tests,
164
- },
165
122
  ]
@@ -33,7 +33,7 @@ TestID = Literal[
33
33
  "validmind.model_validation.ClusterSizeDistribution",
34
34
  "validmind.model_validation.TokenDisparity",
35
35
  "validmind.model_validation.ToxicityScore",
36
- "validmind.model_validation.ModelMetadataComparison",
36
+ "validmind.model_validation.ModelMetadata",
37
37
  "validmind.model_validation.TimeSeriesR2SquareBySegments",
38
38
  "validmind.model_validation.embeddings.CosineSimilarityComparison",
39
39
  "validmind.model_validation.embeddings.EmbeddingsVisualization2D",
@@ -81,21 +81,17 @@ TestID = Literal[
81
81
  "validmind.model_validation.sklearn.RegressionR2Square",
82
82
  "validmind.model_validation.sklearn.RegressionErrors",
83
83
  "validmind.model_validation.sklearn.ClusterPerformance",
84
- "validmind.model_validation.sklearn.FeatureImportanceComparison",
84
+ "validmind.model_validation.sklearn.FeatureImportance",
85
85
  "validmind.model_validation.sklearn.TrainingTestDegradation",
86
- "validmind.model_validation.sklearn.RegressionErrorsComparison",
87
86
  "validmind.model_validation.sklearn.HyperParametersTuning",
88
87
  "validmind.model_validation.sklearn.KMeansClustersOptimization",
89
88
  "validmind.model_validation.sklearn.ModelsPerformanceComparison",
90
89
  "validmind.model_validation.sklearn.WeakspotsDiagnosis",
91
- "validmind.model_validation.sklearn.RegressionR2SquareComparison",
92
90
  "validmind.model_validation.sklearn.PopulationStabilityIndex",
93
91
  "validmind.model_validation.sklearn.MinimumAccuracy",
94
- "validmind.model_validation.statsmodels.RegressionModelsCoeffs",
92
+ "validmind.model_validation.statsmodels.RegressionModelCoeffs",
95
93
  "validmind.model_validation.statsmodels.BoxPierce",
96
94
  "validmind.model_validation.statsmodels.RegressionCoeffsPlot",
97
- "validmind.model_validation.statsmodels.RegressionModelSensitivityPlot",
98
- "validmind.model_validation.statsmodels.RegressionModelForecastPlotLevels",
99
95
  "validmind.model_validation.statsmodels.ScorecardHistogram",
100
96
  "validmind.model_validation.statsmodels.LJungBox",
101
97
  "validmind.model_validation.statsmodels.JarqueBera",
@@ -110,15 +106,12 @@ TestID = Literal[
110
106
  "validmind.model_validation.statsmodels.PredictionProbabilitiesHistogram",
111
107
  "validmind.model_validation.statsmodels.AutoARIMA",
112
108
  "validmind.model_validation.statsmodels.GINITable",
113
- "validmind.model_validation.statsmodels.RegressionModelForecastPlot",
114
109
  "validmind.model_validation.statsmodels.DurbinWatsonTest",
115
110
  "validmind.ongoing_monitoring.PredictionCorrelation",
116
111
  "validmind.ongoing_monitoring.PredictionAcrossEachFeature",
117
112
  "validmind.ongoing_monitoring.FeatureDrift",
118
113
  "validmind.ongoing_monitoring.TargetPredictionDistributionPlot",
119
- "validmind.data_validation.MissingValuesRisk",
120
114
  "validmind.data_validation.IQROutliersTable",
121
- "validmind.data_validation.BivariateFeaturesBarPlots",
122
115
  "validmind.data_validation.Skewness",
123
116
  "validmind.data_validation.Duplicates",
124
117
  "validmind.data_validation.MissingValuesBarPlot",
@@ -130,7 +123,6 @@ TestID = Literal[
130
123
  "validmind.data_validation.AutoStationarity",
131
124
  "validmind.data_validation.DescriptiveStatistics",
132
125
  "validmind.data_validation.TimeSeriesDescription",
133
- "validmind.data_validation.ANOVAOneWayTable",
134
126
  "validmind.data_validation.TargetRateBarPlots",
135
127
  "validmind.data_validation.PearsonCorrelationMatrix",
136
128
  "validmind.data_validation.FeatureTargetCorrelationPlot",
@@ -147,9 +139,7 @@ TestID = Literal[
147
139
  "validmind.data_validation.TooManyZeroValues",
148
140
  "validmind.data_validation.HighPearsonCorrelation",
149
141
  "validmind.data_validation.ACFandPACFPlot",
150
- "validmind.data_validation.BivariateHistograms",
151
142
  "validmind.data_validation.WOEBinTable",
152
- "validmind.data_validation.HeatmapFeatureCorrelations",
153
143
  "validmind.data_validation.TimeSeriesFrequency",
154
144
  "validmind.data_validation.DatasetSplit",
155
145
  "validmind.data_validation.SpreadPlot",
@@ -14,25 +14,27 @@ class ACFandPACFPlot(Metric):
14
14
  Analyzes time series data using Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) plots to
15
15
  reveal trends and correlations.
16
16
 
17
- **Purpose**: The ACF (Autocorrelation Function) and PACF (Partial Autocorrelation Function) plot test is employed
18
- to analyze time series data in machine learning models. It illuminates the correlation of the data over time by
19
- plotting the correlation of the series with its own lags (ACF), and the correlations after removing effects already
20
- accounted for by earlier lags (PACF). This information can identify trends, such as seasonality, degrees of
21
- autocorrelation, and inform the selection of order parameters for AutoRegressive Integrated Moving Average (ARIMA)
22
- models.
17
+ ### Purpose
23
18
 
24
- **Test Mechanism**: The `ACFandPACFPlot` test accepts a dataset with a time-based index. It first confirms the
25
- index is of a datetime type, then handles any NaN values. The test subsequently generates ACF and PACF plots for
26
- each column in the dataset, producing a subplot for each. If the dataset doesn't include key columns, an error is
27
- returned.
19
+ The ACF (Autocorrelation Function) and PACF (Partial Autocorrelation Function) plot test is employed to analyze
20
+ time series data in machine learning models. It illuminates the correlation of the data over time by plotting the
21
+ correlation of the series with its own lags (ACF), and the correlations after removing effects already accounted
22
+ for by earlier lags (PACF). This information can identify trends, such as seasonality, degrees of autocorrelation,
23
+ and inform the selection of order parameters for AutoRegressive Integrated Moving Average (ARIMA) models.
28
24
 
29
- **Signs of High Risk**:
25
+ ### Test Mechanism
26
+
27
+ The `ACFandPACFPlot` test accepts a dataset with a time-based index. It first confirms the index is of a datetime
28
+ type, then handles any NaN values. The test subsequently generates ACF and PACF plots for each column in the
29
+ dataset, producing a subplot for each. If the dataset doesn't include key columns, an error is returned.
30
+
31
+ ### Signs of High Risk
30
32
 
31
33
  - Sudden drops in the correlation at a specific lag might signal a model at high risk.
32
34
  - Consistent high correlation across multiple lags could also indicate non-stationarity in the data, which may
33
35
  suggest that a model estimated on this data won't generalize well to future, unknown data.
34
36
 
35
- **Strengths**:
37
+ ### Strengths
36
38
 
37
39
  - ACF and PACF plots offer clear graphical representations of the correlations in time series data.
38
40
  - These plots are effective at revealing important data characteristics such as seasonality, trends, and
@@ -40,7 +42,7 @@ class ACFandPACFPlot(Metric):
40
42
  - The insights from these plots aid in better model configuration, particularly in the selection of ARIMA model
41
43
  parameters.
42
44
 
43
- **Limitations**:
45
+ ### Limitations
44
46
 
45
47
  - ACF and PACF plots are exclusively for time series data and hence, can't be applied to all ML models.
46
48
  - These plots require large, consistent datasets as gaps could lead to misleading results.
@@ -18,31 +18,38 @@ class ADF(Metric):
18
18
  """
19
19
  Assesses the stationarity of a time series dataset using the Augmented Dickey-Fuller (ADF) test.
20
20
 
21
- **Purpose**: The Augmented Dickey-Fuller (ADF) test metric is used here to determine the order of integration,
22
- i.e., the stationarity of a given time series data. The stationary property of data is pivotal in many machine
23
- learning models as it impacts the reliability and effectiveness of predictions and forecasts.
24
-
25
- **Test Mechanism**: The ADF test starts by executing the ADF function from the statsmodels library on every feature
26
- of the dataset. Multiple outputs are generated for each run, including the ADF test statistic and p-value, count of
27
- lags used, the number of observations factored into the test, critical values at various confidence levels, and the
28
- maximized information criterion. These results are stored for each feature for subsequent analysis.
29
-
30
- **Signs of High Risk**:
31
- - An inflated ADF statistic and high p-value (generally above 0.05) insinuate a high risk to the model's
32
- performance due to the presence of a unit root indicating non-stationarity.
33
- - Such non-stationarity might result in untrustworthy or insufficient forecasts.
34
-
35
- **Strengths**:
36
- - The ADF test is robust to more sophisticated correlation within the data, which empowers it to be deployed in
37
- settings where data might display complex stochastic behavior.
38
- - The ADF test provides explicit outputs like test statistics, critical values, and information criterion, thereby
39
- enhancing our understanding and transparency of the model validation process.
40
-
41
- **Limitations**:
21
+ ### Purpose
22
+
23
+ The Augmented Dickey-Fuller (ADF) test metric is used to determine the order of integration, i.e., the stationarity
24
+ of a given time series dataset. The stationary property of data is pivotal in many machine learning models as it
25
+ impacts the reliability and effectiveness of predictions and forecasts.
26
+
27
+ ### Test Mechanism
28
+
29
+ The ADF test is executed using the `adfuller` function from the `statsmodels` library on each feature of the
30
+ dataset. Multiple outputs are generated for each run, including the ADF test statistic and p-value, count of lags
31
+ used, the number of observations considered in the test, critical values at various confidence levels, and the
32
+ information criterion. These results are stored for each feature for subsequent analysis.
33
+
34
+ ### Signs of High Risk
35
+
36
+ - An inflated ADF statistic and high p-value (generally above 0.05) indicate a high risk to the model's performance
37
+ due to the presence of a unit root indicating non-stationarity.
38
+ - Non-stationarity might result in untrustworthy or insufficient forecasts.
39
+
40
+ ### Strengths
41
+
42
+ - The ADF test is robust to sophisticated correlations within the data, making it suitable for settings where data
43
+ displays complex stochastic behavior.
44
+ - It provides explicit outputs like test statistics, critical values, and information criterion, enhancing
45
+ understanding and transparency in the model validation process.
46
+
47
+ ### Limitations
48
+
42
49
  - The ADF test might demonstrate low statistical power, making it challenging to differentiate between a unit root
43
- and near-unit-root processes causing false negatives.
44
- - The test assumes the data follows an autoregressive process, which might not be the case all the time.
45
- - The ADF test finds it demanding to manage time series data with structural breaks.
50
+ and near-unit-root processes, potentially causing false negatives.
51
+ - It assumes the data follows an autoregressive process, which might not always be the case.
52
+ - The test struggles with time series data that have structural breaks.
46
53
  """
47
54
 
48
55
  name = "adf"
@@ -16,7 +16,7 @@ class AutoAR(Metric):
16
16
  """
17
17
  Automatically identifies the optimal Autoregressive (AR) order for a time series using BIC and AIC criteria.
18
18
 
19
- **Purpose**:
19
+ ### Purpose
20
20
 
21
21
  The AutoAR test is intended to automatically identify the Autoregressive (AR) order of a time series by utilizing
22
22
  the Bayesian Information Criterion (BIC) and Akaike Information Criterion (AIC). AR order is crucial in forecasting
@@ -24,30 +24,30 @@ class AutoAR(Metric):
24
24
  objective is to select the most fitting AR model that encapsulates the trend and seasonality in the time series
25
25
  data.
26
26
 
27
- **Test Mechanism**:
27
+ ### Test Mechanism
28
28
 
29
29
  The test mechanism operates by iterating through a possible range of AR orders up to a defined maximum. An AR model
30
30
  is fitted for each order, and the corresponding BIC and AIC are computed. BIC and AIC statistical measures are
31
31
  designed to penalize models for complexity, preferring simpler models that fit the data proficiently. To verify the
32
- stationarity of the time series, the Augmented Dickey-Fuller test is executed. The AR order, BIC, and AIC findings,
32
+ stationarity of the time series, the Augmented Dickey-Fuller test is executed. The AR order, BIC, and AIC findings
33
33
  are compiled into a dataframe for effortless comparison. Then, the AR order with the smallest BIC is established as
34
34
  the desirable order for each variable.
35
35
 
36
- **Signs of High Risk**:
36
+ ### Signs of High Risk
37
37
 
38
38
  - An augmented Dickey Fuller test p-value > 0.05, indicating the time series isn't stationary, may lead to
39
39
  inaccurate results.
40
40
  - Problems with the model fitting procedure, such as computational or convergence issues.
41
- - Continuous selection of the maximum specified AR order may suggest insufficient set limit.
41
+ - Continuous selection of the maximum specified AR order may suggest an insufficient set limit.
42
42
 
43
- **Strengths**:
43
+ ### Strengths
44
44
 
45
45
  - The test independently pinpoints the optimal AR order, thereby reducing potential human bias.
46
46
  - It strikes a balance between model simplicity and goodness-of-fit to avoid overfitting.
47
- - Has the capability to account for stationarity in a time series, an essential aspect for dependable AR modelling.
48
- - The results are aggregated into an comprehensive table, enabling an easy interpretation.
47
+ - Has the capability to account for stationarity in a time series, an essential aspect for dependable AR modeling.
48
+ - The results are aggregated into a comprehensive table, enabling an easy interpretation.
49
49
 
50
- **Limitations**:
50
+ ### Limitations
51
51
 
52
52
  - The tests need a stationary time series input.
53
53
  - They presume a linear relationship between the series and its lags.
@@ -17,32 +17,39 @@ class AutoMA(Metric):
17
17
  Automatically selects the optimal Moving Average (MA) order for each variable in a time series dataset based on
18
18
  minimal BIC and AIC values.
19
19
 
20
- **Purpose**: The `AutoMA` metric serves an essential role of automated decision-making for selecting the optimal
21
- Moving Average (MA) order for every variable in a given time series dataset. The selection is dependent on the
22
- minimalization of BIC (Bayesian Information Criterion) and AIC (Akaike Information Criterion); these are
23
- established statistical tools used for model selection. Furthermore, prior to the commencement of the model fitting
24
- process, the algorithm conducts a stationarity test (Augmented Dickey-Fuller test) on each series.
25
-
26
- **Test Mechanism**: Starting off, the `AutoMA` algorithm checks whether the `max_ma_order` parameter has been
27
- provided. It consequently loops through all variables in the dataset, carrying out the Dickey-Fuller test for
28
- stationarity. For each stationary variable, it fits an ARIMA model for orders running from 0 to `max_ma_order`. The
29
- result is a list showcasing the BIC and AIC values of the ARIMA models based on different orders. The MA order,
30
- which yields the smallest BIC, is chosen as the 'best MA order' for every single variable. The final results
31
- include a table summarizing the auto MA analysis and another table listing the best MA order for each variable.
32
-
33
- **Signs of High Risk**:
20
+ ### Purpose
21
+
22
+ The `AutoMA` metric serves an essential role of automated decision-making for selecting the optimal Moving Average
23
+ (MA) order for every variable in a given time series dataset. The selection is dependent on the minimalization of
24
+ BIC (Bayesian Information Criterion) and AIC (Akaike Information Criterion); these are established statistical
25
+ tools used for model selection. Furthermore, prior to the commencement of the model fitting process, the algorithm
26
+ conducts a stationarity test (Augmented Dickey-Fuller test) on each series.
27
+
28
+ ### Test Mechanism
29
+
30
+ Starting off, the `AutoMA` algorithm checks whether the `max_ma_order` parameter has been provided. It consequently
31
+ loops through all variables in the dataset, carrying out the Dickey-Fuller test for stationarity. For each
32
+ stationary variable, it fits an ARIMA model for orders running from 0 to `max_ma_order`. The result is a list
33
+ showcasing the BIC and AIC values of the ARIMA models based on different orders. The MA order, which yields the
34
+ smallest BIC, is chosen as the 'best MA order' for every single variable. The final results include a table
35
+ summarizing the auto MA analysis and another table listing the best MA order for each variable.
36
+
37
+ ### Signs of High Risk
38
+
34
39
  - When a series is non-stationary (p-value>0.05 in the Dickey-Fuller test), the produced result could be inaccurate.
35
40
  - Any error that arises in the process of fitting the ARIMA models, especially with a higher MA order, can
36
41
  potentially indicate risks and might need further investigation.
37
42
 
38
- **Strengths**:
43
+ ### Strengths
44
+
39
45
  - The metric facilitates automation in the process of selecting the MA order for time series forecasting. This
40
46
  significantly saves time and reduces efforts conventionally necessary for manual hyperparameter tuning.
41
47
  - The use of both BIC and AIC enhances the likelihood of selecting the most suitable model.
42
48
  - The metric ascertains the stationarity of the series prior to model fitting, thus ensuring that the underlying
43
49
  assumptions of the MA model are fulfilled.
44
50
 
45
- **Limitations**:
51
+ ### Limitations
52
+
46
53
  - If the time series fails to be stationary, the metric may yield inaccurate results. Consequently, it necessitates
47
54
  pre-processing steps to stabilize the series before fitting the ARIMA model.
48
55
  - The metric adopts a rudimentary model selection process based on BIC and doesn't consider other potential model
@@ -17,28 +17,30 @@ class AutoSeasonality(Metric):
17
17
  Automatically identifies and quantifies optimal seasonality in time series data to improve forecasting model
18
18
  performance.
19
19
 
20
- **Purpose:**
21
- The AutoSeasonality metric's purpose is to automatically detect and identify the best seasonal order or period for
22
- each variable in a time series dataset. This detection helps to quantify periodic patterns and seasonality that
23
- reoccur at fixed intervals of time in the data. This is especially significant for forecasting-based models, where
24
- understanding the seasonality component can drastically improve prediction accuracy.
25
-
26
- **Test Mechanism:**
27
- This metric uses the seasonal decomposition method from the Statsmodels Python library. The function takes the
20
+ ### Purpose
21
+
22
+ The AutoSeasonality test aims to automatically detect and identify the best seasonal order or period for each
23
+ variable in a time series dataset. This detection helps to quantify periodic patterns and seasonality that reoccur
24
+ at fixed intervals in the data. Understanding the seasonality component can drastically improve prediction
25
+ accuracy, which is especially significant for forecasting-based models.
26
+
27
+ ### Test Mechanism
28
+
29
+ This test uses the seasonal decomposition method from the Statsmodels Python library. The function takes the
28
30
  'additive' model type for each variable and applies it within the prescribed range of 'min_period' and
29
- 'max_period'. The function decomposes the seasonality for each period in the range and calculates the mean residual
30
- error for each period. The seasonal period that results in the minimum residuals is marked as the 'Best Period'.
31
- The test results include the 'Best Period', the calculated residual errors, and a determination of 'Seasonality' or
32
- 'No Seasonality'.
31
+ 'max_period'. It decomposes the seasonality for each period in the range and calculates the mean residual error for
32
+ each period. The seasonal period that results in the minimum residuals is marked as the 'Best Period'. The test
33
+ results include the 'Best Period', the calculated residual errors, and a determination of 'Seasonality' or 'No
34
+ Seasonality'.
33
35
 
34
- **Signs of High Risk:**
36
+ ### Signs of High Risk
35
37
 
36
38
  - If the optimal seasonal period (or 'Best Period') is consistently at the maximum or minimum limit of the offered
37
39
  range for a majority of variables, it may suggest that the range set does not adequately capture the true seasonal
38
40
  pattern in the series.
39
41
  - A high average 'Residual Error' for the selected 'Best Period' could indicate issues with the model's performance.
40
42
 
41
- **Strengths:**
43
+ ### Strengths
42
44
 
43
45
  - The metric offers an automatic approach to identifying and quantifying the optimal seasonality, providing a
44
46
  robust method for analyzing time series datasets.
@@ -46,9 +48,9 @@ class AutoSeasonality(Metric):
46
48
  seasonality.
47
49
  - The use of concrete and measurable statistical methods improves the objectivity and reproducibility of the model.
48
50
 
49
- **Limitations:**
51
+ ### Limitations
50
52
 
51
- - This AutoSeasonality metric may not be suitable if the time series data exhibits random walk behaviour or lacks
53
+ - This AutoSeasonality metric may not be suitable if the time series data exhibits random walk behavior or lacks
52
54
  clear seasonality, as the seasonal decomposition model may not be appropriate.
53
55
  - The defined range for the seasonal period (min_period and max_period) can influence the outcomes. If the actual
54
56
  seasonality period lies outside this range, this method will not be able to identify the true seasonal order.
@@ -13,26 +13,30 @@ class AutoStationarity(Metric):
13
13
  """
14
14
  Automates Augmented Dickey-Fuller test to assess stationarity across multiple time series in a DataFrame.
15
15
 
16
- **Purpose**: The AutoStationarity metric is intended to automatically detect and evaluate the stationary nature of
17
- each time series in a DataFrame. It incorporates the Augmented Dickey-Fuller (ADF) test, a statistical approach
18
- used to assess stationarity. Stationarity is a fundamental property suggesting that statistic features like mean
19
- and variance remain unchanged over time. This is necessary for many time-series models.
20
-
21
- **Test Mechanism**: The mechanism for the AutoStationarity test involves applying the Augmented Dicky-Fuller test
22
- to each time series within the given dataframe to assess if they are stationary. Every series in the dataframe is
23
- looped, using the ADF test up to a defined maximum order (configurable and by default set to 5). The p-value
24
- resulting from the ADF test is compared against a predetermined threshold (also configurable and by default set to
25
- 0.05). The time series is deemed stationary at its current differencing order if the p-value is less than the
26
- threshold.
27
-
28
- **Signs of High Risk**:
16
+ ### Purpose
17
+
18
+ The AutoStationarity metric is intended to automatically detect and evaluate the stationary nature of each time
19
+ series in a DataFrame. It incorporates the Augmented Dickey-Fuller (ADF) test, a statistical approach used to
20
+ assess stationarity. Stationarity is a fundamental property suggesting that statistic features like mean and
21
+ variance remain unchanged over time. This is necessary for many time-series models.
22
+
23
+ ### Test Mechanism
24
+
25
+ The mechanism for the AutoStationarity test involves applying the Augmented Dicky-Fuller test to each time series
26
+ within the given dataframe to assess if they are stationary. Every series in the dataframe is looped, using the ADF
27
+ test up to a defined maximum order (configurable and by default set to 5). The p-value resulting from the ADF test
28
+ is compared against a predetermined threshold (also configurable and by default set to 0.05). The time series is
29
+ deemed stationary at its current differencing order if the p-value is less than the threshold.
30
+
31
+ ### Signs of High Risk
32
+
29
33
  - A significant number of series not achieving stationarity even at the maximum order of differencing can indicate
30
34
  high risk or potential failure in the model.
31
35
  - This could suggest the series may not be appropriately modeled by a stationary process, hence other modeling
32
36
  approaches might be required.
33
37
 
38
+ ### Strengths
34
39
 
35
- **Strengths**:
36
40
  - The key strength in this metric lies in the automation of the ADF test, enabling mass stationarity analysis
37
41
  across various time series and boosting the efficiency and credibility of the analysis.
38
42
  - The utilization of the ADF test, a widely accepted method for testing stationarity, lends authenticity to the
@@ -40,8 +44,9 @@ class AutoStationarity(Metric):
40
44
  - The introduction of the max order and threshold parameters give users the autonomy to determine their preferred
41
45
  levels of stringency in the tests.
42
46
 
43
- **Limitations**:
44
- - The Augumented Dicky-Fuller test and the stationarity test are not without their limitations. These tests are
47
+ ### Limitations
48
+
49
+ - The Augmented Dickey-Fuller test and the stationarity test are not without their limitations. These tests are
45
50
  premised on the assumption that the series can be modeled by an autoregressive process, which may not always hold
46
51
  true.
47
52
  - The stationarity check is highly sensitive to the choice of threshold for the significance level; an extremely