validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +80 -119
  3. validmind/ai/test_result_description/config.yaml +29 -0
  4. validmind/ai/test_result_description/context.py +73 -0
  5. validmind/ai/test_result_description/image_processing.py +124 -0
  6. validmind/ai/test_result_description/system.jinja +39 -0
  7. validmind/ai/test_result_description/user.jinja +25 -0
  8. validmind/api_client.py +89 -43
  9. validmind/client.py +2 -2
  10. validmind/client_config.py +11 -14
  11. validmind/datasets/credit_risk/__init__.py +1 -0
  12. validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
  13. validmind/datasets/credit_risk/lending_club_bias.py +142 -0
  14. validmind/datasets/regression/fred_timeseries.py +67 -138
  15. validmind/template.py +1 -0
  16. validmind/test_suites/__init__.py +0 -2
  17. validmind/test_suites/statsmodels_timeseries.py +1 -1
  18. validmind/test_suites/summarization.py +0 -1
  19. validmind/test_suites/time_series.py +0 -43
  20. validmind/tests/__types__.py +14 -15
  21. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  22. validmind/tests/data_validation/ADF.py +31 -24
  23. validmind/tests/data_validation/AutoAR.py +9 -9
  24. validmind/tests/data_validation/AutoMA.py +23 -16
  25. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  26. validmind/tests/data_validation/AutoStationarity.py +21 -16
  27. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  28. validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +34 -34
  29. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +85 -124
  30. validmind/tests/data_validation/ClassImbalance.py +15 -12
  31. validmind/tests/data_validation/DFGLSArch.py +19 -13
  32. validmind/tests/data_validation/DatasetDescription.py +17 -11
  33. validmind/tests/data_validation/DatasetSplit.py +7 -5
  34. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  35. validmind/tests/data_validation/Duplicates.py +33 -25
  36. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  37. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  38. validmind/tests/data_validation/HighCardinality.py +19 -12
  39. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  40. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  41. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  42. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  43. validmind/tests/data_validation/JarqueBera.py +70 -0
  44. validmind/tests/data_validation/KPSS.py +34 -29
  45. validmind/tests/data_validation/LJungBox.py +66 -0
  46. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  47. validmind/tests/data_validation/MissingValues.py +32 -27
  48. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  49. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  50. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  51. validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
  52. validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
  53. validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
  54. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
  55. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  56. validmind/tests/data_validation/RunsTest.py +72 -0
  57. validmind/tests/data_validation/ScatterPlot.py +63 -78
  58. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  59. validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +35 -30
  60. validmind/tests/data_validation/Skewness.py +35 -37
  61. validmind/tests/data_validation/SpreadPlot.py +35 -35
  62. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  63. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  64. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  65. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  66. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  67. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  68. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  69. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  70. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  71. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  72. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  73. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  74. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  75. validmind/tests/data_validation/UniqueRows.py +11 -6
  76. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  77. validmind/tests/data_validation/WOEBinTable.py +35 -30
  78. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  79. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  80. validmind/tests/data_validation/nlp/Hashtags.py +42 -40
  81. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  82. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  83. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  84. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  85. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  86. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  87. validmind/tests/data_validation/nlp/TextDescription.py +39 -36
  88. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  89. validmind/tests/decorator.py +81 -42
  90. validmind/tests/model_validation/BertScore.py +36 -27
  91. validmind/tests/model_validation/BleuScore.py +25 -19
  92. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  93. validmind/tests/model_validation/ContextualRecall.py +38 -13
  94. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  95. validmind/tests/model_validation/MeteorScore.py +46 -33
  96. validmind/tests/model_validation/ModelMetadata.py +32 -64
  97. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  98. validmind/tests/model_validation/RegardScore.py +30 -14
  99. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  100. validmind/tests/model_validation/RougeScore.py +36 -30
  101. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  102. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  103. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  104. validmind/tests/model_validation/TokenDisparity.py +31 -23
  105. validmind/tests/model_validation/ToxicityScore.py +26 -17
  106. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  107. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  108. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  109. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  110. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  111. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  112. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  113. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  114. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  115. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  116. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  117. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  118. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  119. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  120. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  121. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  122. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  123. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  124. validmind/tests/model_validation/ragas/AspectCritique.py +12 -6
  125. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  126. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  127. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  128. validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
  129. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  130. validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
  131. validmind/tests/model_validation/ragas/utils.py +6 -0
  132. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  133. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  134. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  135. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  136. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  137. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  138. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  139. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  140. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  141. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  142. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  143. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  144. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  145. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  146. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  147. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  148. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  149. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  150. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +32 -26
  151. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  152. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  153. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  154. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  155. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  156. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  157. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -94
  158. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  159. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
  160. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +66 -5
  161. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  162. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  163. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  164. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  165. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  166. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  167. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +59 -32
  168. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  169. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  170. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  171. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +86 -119
  172. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  173. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  174. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  175. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  176. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  177. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  178. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  179. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  180. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  181. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  182. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  183. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  184. validmind/tests/prompt_validation/Bias.py +14 -11
  185. validmind/tests/prompt_validation/Clarity.py +16 -14
  186. validmind/tests/prompt_validation/Conciseness.py +7 -5
  187. validmind/tests/prompt_validation/Delimitation.py +23 -22
  188. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  189. validmind/tests/prompt_validation/Robustness.py +12 -10
  190. validmind/tests/prompt_validation/Specificity.py +13 -11
  191. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  192. validmind/tests/run.py +68 -23
  193. validmind/unit_metrics/__init__.py +81 -144
  194. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  195. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  196. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  197. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  198. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  199. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  200. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  201. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  202. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  203. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  204. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  205. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  206. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  207. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  208. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  209. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  210. validmind/utils.py +4 -0
  211. validmind/vm_models/dataset/dataset.py +2 -0
  212. validmind/vm_models/figure.py +5 -0
  213. validmind/vm_models/test/metric.py +1 -0
  214. validmind/vm_models/test/result_wrapper.py +143 -158
  215. validmind/vm_models/test/threshold_test.py +1 -0
  216. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/METADATA +4 -3
  217. validmind-2.5.18.dist-info/RECORD +324 -0
  218. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  219. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  220. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  221. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  222. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  223. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  224. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  225. validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -73
  226. validmind/tests/model_validation/statsmodels/LJungBox.py +0 -66
  227. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  228. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  229. validmind/tests/model_validation/statsmodels/RunsTest.py +0 -71
  230. validmind-2.5.8.dist-info/RECORD +0 -318
  231. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/LICENSE +0 -0
  232. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/WHEEL +0 -0
  233. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/entry_points.txt +0 -0
@@ -16,7 +16,7 @@ class AutoAR(Metric):
16
16
  """
17
17
  Automatically identifies the optimal Autoregressive (AR) order for a time series using BIC and AIC criteria.
18
18
 
19
- **Purpose**:
19
+ ### Purpose
20
20
 
21
21
  The AutoAR test is intended to automatically identify the Autoregressive (AR) order of a time series by utilizing
22
22
  the Bayesian Information Criterion (BIC) and Akaike Information Criterion (AIC). AR order is crucial in forecasting
@@ -24,30 +24,30 @@ class AutoAR(Metric):
24
24
  objective is to select the most fitting AR model that encapsulates the trend and seasonality in the time series
25
25
  data.
26
26
 
27
- **Test Mechanism**:
27
+ ### Test Mechanism
28
28
 
29
29
  The test mechanism operates by iterating through a possible range of AR orders up to a defined maximum. An AR model
30
30
  is fitted for each order, and the corresponding BIC and AIC are computed. BIC and AIC statistical measures are
31
31
  designed to penalize models for complexity, preferring simpler models that fit the data proficiently. To verify the
32
- stationarity of the time series, the Augmented Dickey-Fuller test is executed. The AR order, BIC, and AIC findings,
32
+ stationarity of the time series, the Augmented Dickey-Fuller test is executed. The AR order, BIC, and AIC findings
33
33
  are compiled into a dataframe for effortless comparison. Then, the AR order with the smallest BIC is established as
34
34
  the desirable order for each variable.
35
35
 
36
- **Signs of High Risk**:
36
+ ### Signs of High Risk
37
37
 
38
38
  - An augmented Dickey Fuller test p-value > 0.05, indicating the time series isn't stationary, may lead to
39
39
  inaccurate results.
40
40
  - Problems with the model fitting procedure, such as computational or convergence issues.
41
- - Continuous selection of the maximum specified AR order may suggest insufficient set limit.
41
+ - Continuous selection of the maximum specified AR order may suggest an insufficient set limit.
42
42
 
43
- **Strengths**:
43
+ ### Strengths
44
44
 
45
45
  - The test independently pinpoints the optimal AR order, thereby reducing potential human bias.
46
46
  - It strikes a balance between model simplicity and goodness-of-fit to avoid overfitting.
47
- - Has the capability to account for stationarity in a time series, an essential aspect for dependable AR modelling.
48
- - The results are aggregated into an comprehensive table, enabling an easy interpretation.
47
+ - Has the capability to account for stationarity in a time series, an essential aspect for dependable AR modeling.
48
+ - The results are aggregated into a comprehensive table, enabling an easy interpretation.
49
49
 
50
- **Limitations**:
50
+ ### Limitations
51
51
 
52
52
  - The tests need a stationary time series input.
53
53
  - They presume a linear relationship between the series and its lags.
@@ -17,32 +17,39 @@ class AutoMA(Metric):
17
17
  Automatically selects the optimal Moving Average (MA) order for each variable in a time series dataset based on
18
18
  minimal BIC and AIC values.
19
19
 
20
- **Purpose**: The `AutoMA` metric serves an essential role of automated decision-making for selecting the optimal
21
- Moving Average (MA) order for every variable in a given time series dataset. The selection is dependent on the
22
- minimalization of BIC (Bayesian Information Criterion) and AIC (Akaike Information Criterion); these are
23
- established statistical tools used for model selection. Furthermore, prior to the commencement of the model fitting
24
- process, the algorithm conducts a stationarity test (Augmented Dickey-Fuller test) on each series.
25
-
26
- **Test Mechanism**: Starting off, the `AutoMA` algorithm checks whether the `max_ma_order` parameter has been
27
- provided. It consequently loops through all variables in the dataset, carrying out the Dickey-Fuller test for
28
- stationarity. For each stationary variable, it fits an ARIMA model for orders running from 0 to `max_ma_order`. The
29
- result is a list showcasing the BIC and AIC values of the ARIMA models based on different orders. The MA order,
30
- which yields the smallest BIC, is chosen as the 'best MA order' for every single variable. The final results
31
- include a table summarizing the auto MA analysis and another table listing the best MA order for each variable.
32
-
33
- **Signs of High Risk**:
20
+ ### Purpose
21
+
22
+ The `AutoMA` metric serves an essential role of automated decision-making for selecting the optimal Moving Average
23
+ (MA) order for every variable in a given time series dataset. The selection is dependent on the minimalization of
24
+ BIC (Bayesian Information Criterion) and AIC (Akaike Information Criterion); these are established statistical
25
+ tools used for model selection. Furthermore, prior to the commencement of the model fitting process, the algorithm
26
+ conducts a stationarity test (Augmented Dickey-Fuller test) on each series.
27
+
28
+ ### Test Mechanism
29
+
30
+ Starting off, the `AutoMA` algorithm checks whether the `max_ma_order` parameter has been provided. It consequently
31
+ loops through all variables in the dataset, carrying out the Dickey-Fuller test for stationarity. For each
32
+ stationary variable, it fits an ARIMA model for orders running from 0 to `max_ma_order`. The result is a list
33
+ showcasing the BIC and AIC values of the ARIMA models based on different orders. The MA order, which yields the
34
+ smallest BIC, is chosen as the 'best MA order' for every single variable. The final results include a table
35
+ summarizing the auto MA analysis and another table listing the best MA order for each variable.
36
+
37
+ ### Signs of High Risk
38
+
34
39
  - When a series is non-stationary (p-value>0.05 in the Dickey-Fuller test), the produced result could be inaccurate.
35
40
  - Any error that arises in the process of fitting the ARIMA models, especially with a higher MA order, can
36
41
  potentially indicate risks and might need further investigation.
37
42
 
38
- **Strengths**:
43
+ ### Strengths
44
+
39
45
  - The metric facilitates automation in the process of selecting the MA order for time series forecasting. This
40
46
  significantly saves time and reduces efforts conventionally necessary for manual hyperparameter tuning.
41
47
  - The use of both BIC and AIC enhances the likelihood of selecting the most suitable model.
42
48
  - The metric ascertains the stationarity of the series prior to model fitting, thus ensuring that the underlying
43
49
  assumptions of the MA model are fulfilled.
44
50
 
45
- **Limitations**:
51
+ ### Limitations
52
+
46
53
  - If the time series fails to be stationary, the metric may yield inaccurate results. Consequently, it necessitates
47
54
  pre-processing steps to stabilize the series before fitting the ARIMA model.
48
55
  - The metric adopts a rudimentary model selection process based on BIC and doesn't consider other potential model
@@ -17,28 +17,30 @@ class AutoSeasonality(Metric):
17
17
  Automatically identifies and quantifies optimal seasonality in time series data to improve forecasting model
18
18
  performance.
19
19
 
20
- **Purpose:**
21
- The AutoSeasonality metric's purpose is to automatically detect and identify the best seasonal order or period for
22
- each variable in a time series dataset. This detection helps to quantify periodic patterns and seasonality that
23
- reoccur at fixed intervals of time in the data. This is especially significant for forecasting-based models, where
24
- understanding the seasonality component can drastically improve prediction accuracy.
25
-
26
- **Test Mechanism:**
27
- This metric uses the seasonal decomposition method from the Statsmodels Python library. The function takes the
20
+ ### Purpose
21
+
22
+ The AutoSeasonality test aims to automatically detect and identify the best seasonal order or period for each
23
+ variable in a time series dataset. This detection helps to quantify periodic patterns and seasonality that reoccur
24
+ at fixed intervals in the data. Understanding the seasonality component can drastically improve prediction
25
+ accuracy, which is especially significant for forecasting-based models.
26
+
27
+ ### Test Mechanism
28
+
29
+ This test uses the seasonal decomposition method from the Statsmodels Python library. The function takes the
28
30
  'additive' model type for each variable and applies it within the prescribed range of 'min_period' and
29
- 'max_period'. The function decomposes the seasonality for each period in the range and calculates the mean residual
30
- error for each period. The seasonal period that results in the minimum residuals is marked as the 'Best Period'.
31
- The test results include the 'Best Period', the calculated residual errors, and a determination of 'Seasonality' or
32
- 'No Seasonality'.
31
+ 'max_period'. It decomposes the seasonality for each period in the range and calculates the mean residual error for
32
+ each period. The seasonal period that results in the minimum residuals is marked as the 'Best Period'. The test
33
+ results include the 'Best Period', the calculated residual errors, and a determination of 'Seasonality' or 'No
34
+ Seasonality'.
33
35
 
34
- **Signs of High Risk:**
36
+ ### Signs of High Risk
35
37
 
36
38
  - If the optimal seasonal period (or 'Best Period') is consistently at the maximum or minimum limit of the offered
37
39
  range for a majority of variables, it may suggest that the range set does not adequately capture the true seasonal
38
40
  pattern in the series.
39
41
  - A high average 'Residual Error' for the selected 'Best Period' could indicate issues with the model's performance.
40
42
 
41
- **Strengths:**
43
+ ### Strengths
42
44
 
43
45
  - The metric offers an automatic approach to identifying and quantifying the optimal seasonality, providing a
44
46
  robust method for analyzing time series datasets.
@@ -46,9 +48,9 @@ class AutoSeasonality(Metric):
46
48
  seasonality.
47
49
  - The use of concrete and measurable statistical methods improves the objectivity and reproducibility of the model.
48
50
 
49
- **Limitations:**
51
+ ### Limitations
50
52
 
51
- - This AutoSeasonality metric may not be suitable if the time series data exhibits random walk behaviour or lacks
53
+ - This AutoSeasonality metric may not be suitable if the time series data exhibits random walk behavior or lacks
52
54
  clear seasonality, as the seasonal decomposition model may not be appropriate.
53
55
  - The defined range for the seasonal period (min_period and max_period) can influence the outcomes. If the actual
54
56
  seasonality period lies outside this range, this method will not be able to identify the true seasonal order.
@@ -13,26 +13,30 @@ class AutoStationarity(Metric):
13
13
  """
14
14
  Automates Augmented Dickey-Fuller test to assess stationarity across multiple time series in a DataFrame.
15
15
 
16
- **Purpose**: The AutoStationarity metric is intended to automatically detect and evaluate the stationary nature of
17
- each time series in a DataFrame. It incorporates the Augmented Dickey-Fuller (ADF) test, a statistical approach
18
- used to assess stationarity. Stationarity is a fundamental property suggesting that statistic features like mean
19
- and variance remain unchanged over time. This is necessary for many time-series models.
20
-
21
- **Test Mechanism**: The mechanism for the AutoStationarity test involves applying the Augmented Dicky-Fuller test
22
- to each time series within the given dataframe to assess if they are stationary. Every series in the dataframe is
23
- looped, using the ADF test up to a defined maximum order (configurable and by default set to 5). The p-value
24
- resulting from the ADF test is compared against a predetermined threshold (also configurable and by default set to
25
- 0.05). The time series is deemed stationary at its current differencing order if the p-value is less than the
26
- threshold.
27
-
28
- **Signs of High Risk**:
16
+ ### Purpose
17
+
18
+ The AutoStationarity metric is intended to automatically detect and evaluate the stationary nature of each time
19
+ series in a DataFrame. It incorporates the Augmented Dickey-Fuller (ADF) test, a statistical approach used to
20
+ assess stationarity. Stationarity is a fundamental property suggesting that statistic features like mean and
21
+ variance remain unchanged over time. This is necessary for many time-series models.
22
+
23
+ ### Test Mechanism
24
+
25
+ The mechanism for the AutoStationarity test involves applying the Augmented Dicky-Fuller test to each time series
26
+ within the given dataframe to assess if they are stationary. Every series in the dataframe is looped, using the ADF
27
+ test up to a defined maximum order (configurable and by default set to 5). The p-value resulting from the ADF test
28
+ is compared against a predetermined threshold (also configurable and by default set to 0.05). The time series is
29
+ deemed stationary at its current differencing order if the p-value is less than the threshold.
30
+
31
+ ### Signs of High Risk
32
+
29
33
  - A significant number of series not achieving stationarity even at the maximum order of differencing can indicate
30
34
  high risk or potential failure in the model.
31
35
  - This could suggest the series may not be appropriately modeled by a stationary process, hence other modeling
32
36
  approaches might be required.
33
37
 
38
+ ### Strengths
34
39
 
35
- **Strengths**:
36
40
  - The key strength in this metric lies in the automation of the ADF test, enabling mass stationarity analysis
37
41
  across various time series and boosting the efficiency and credibility of the analysis.
38
42
  - The utilization of the ADF test, a widely accepted method for testing stationarity, lends authenticity to the
@@ -40,8 +44,9 @@ class AutoStationarity(Metric):
40
44
  - The introduction of the max order and threshold parameters give users the autonomy to determine their preferred
41
45
  levels of stringency in the tests.
42
46
 
43
- **Limitations**:
44
- - The Augumented Dicky-Fuller test and the stationarity test are not without their limitations. These tests are
47
+ ### Limitations
48
+
49
+ - The Augmented Dickey-Fuller test and the stationarity test are not without their limitations. These tests are
45
50
  premised on the assumption that the series can be modeled by an autoregressive process, which may not always hold
46
51
  true.
47
52
  - The stationarity check is highly sensitive to the choice of threshold for the significance level; an extremely
@@ -3,109 +3,80 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import itertools
6
- from dataclasses import dataclass
7
6
 
8
7
  import plotly.express as px
9
8
 
10
- from validmind.vm_models import Figure, Metric
9
+ from validmind import tags, tasks
11
10
 
12
11
 
13
- @dataclass
14
- class BivariateScatterPlots(Metric):
12
+ @tags("tabular_data", "numerical_data", "visualization")
13
+ @tasks("classification")
14
+ def BivariateScatterPlots(dataset):
15
15
  """
16
- Generates bivariate scatterplots to visually inspect relationships between pairs of predictor variables in machine
17
- learning classification tasks.
18
-
19
- **Purpose**: This metric is intended for visual inspection and monitoring of relationships between pairs of
20
- variables in a machine learning model targeting classification tasks. It is especially useful for understanding how
21
- predictor variables (features) behave in relation to each other and how they are distributed for different classes
22
- of the target variable, which could inform feature selection, model-building strategies, and even alert to possible
23
- biases and irregularities in the data.
24
-
25
- **Test Mechanism**: This metric operates by creating a scatter plot for each pair of the selected features in the
26
- dataset. If the parameters "selected_columns" are not specified, an error will be thrown. The metric offers
27
- flexibility by allowing the user to filter on a specific target class - specified by the "target_filter" parameter
28
- - for more granified insights. Each scatterplot is then color-coded based on the category of the target variable
29
- for better visual differentiation. The seaborn scatterplot library is used for generating the plots.
30
-
31
- **Signs of High Risk**:
32
- - Visual patterns which might suggest non-linear relationships, substantial skewness, multicollinearity,
33
- clustering, or isolated outlier points in the scatter plot.
34
- - Such issues could affect the assumptions and performance of some models, especially the ones assuming linearity
35
- like linear regression or logistic regression.
36
-
37
- **Strengths**:
38
- - Scatterplots are simple and intuitive for users to understand, providing a visual tool to pinpoint complex
39
- relationships between two variables.
40
- - They are useful for outlier detection, identification of variable associations and trends, including non-linear
41
- patterns which can be overlooked by other linear-focused metrics or tests.
42
- - The implementation also supports visualizing binary or multi-class classification datasets.
43
-
44
- **Limitations**:
45
- - Scatterplots are limited to bivariate analysis - the relationship of two variables at a time - and might not
46
- reveal the full picture in higher dimensions or where interactions are present.
47
- - They are not ideal for very large datasets as points will overlap and render the visualization less informative.
48
- - Scatterplots are more of an exploratory tool rather than a formal statistical test, so they don't provide any
49
- quantitative measure of model quality or performance.
50
- - Interpretation of scatterplots relies heavily on the domain knowledge and judgment of the viewer, which can
51
- introduce subjective bias.
16
+ Generates bivariate scatterplots to visually inspect relationships between pairs of numerical predictor variables
17
+ in machine learning classification tasks.
18
+
19
+ ### Purpose
20
+
21
+ This function is intended for visual inspection and monitoring of relationships between pairs of numerical
22
+ variables in a machine learning model targeting classification tasks. It helps in understanding how predictor
23
+ variables (features) interact with each other, which can inform feature selection, model-building strategies, and
24
+ identify potential biases or irregularities in the data.
25
+
26
+ ### Test Mechanism
27
+
28
+ The function creates scatter plots for each pair of numerical features in the dataset. It first filters out
29
+ non-numerical and binary features, ensuring the plots focus on meaningful numerical relationships. The resulting
30
+ scatterplots are color-coded uniformly to avoid visual distraction, and the function returns a tuple of Plotly
31
+ figure objects, each representing a scatter plot for a pair of features.
32
+
33
+ ### Signs of High Risk
34
+
35
+ - Visual patterns suggesting non-linear relationships, multicollinearity, clustering, or outlier points in the
36
+ scatter plots.
37
+ - Such issues could affect the assumptions and performance of certain models, especially those assuming linearity,
38
+ like logistic regression.
39
+
40
+ ### Strengths
41
+
42
+ - Scatterplots provide an intuitive and visual tool to explore relationships between two variables.
43
+ - They are useful for identifying outliers, variable associations, and trends, including non-linear patterns.
44
+ - Supports visualization of binary or multi-class classification datasets, focusing on numerical features.
45
+
46
+ ### Limitations
47
+
48
+ - Scatterplots are limited to bivariate analysis, showing relationships between only two variables at a time.
49
+ - Not ideal for very large datasets where overlapping points can reduce the clarity of the visualization.
50
+ - Scatterplots are exploratory tools and do not provide quantitative measures of model quality or performance.
51
+ - Interpretation is subjective and relies on the domain knowledge and judgment of the viewer.
52
52
  """
53
+ figures = []
53
54
 
54
- name = "bivariate_scatter_plots"
55
- required_inputs = ["dataset"]
56
- default_params = {"selected_columns": None}
57
- tasks = ["classification"]
58
- tags = [
59
- "tabular_data",
60
- "categorical_data",
61
- "binary_classification",
62
- "multiclass_classification",
63
- "visualization",
55
+ # Select numerical features
56
+ features = dataset.feature_columns_numeric
57
+
58
+ # Select non-binary features
59
+ features = [
60
+ feature for feature in features if len(dataset.df[feature].unique()) > 2
64
61
  ]
65
62
 
66
- def plot_bivariate_scatter(self, columns):
67
- figures = []
68
- df = self.inputs.dataset.df
69
-
70
- # Generate all pairs of columns
71
- features_pairs = list(itertools.combinations(columns, 2))
72
-
73
- for x, y in features_pairs:
74
- fig = px.scatter(
75
- df,
76
- x=x,
77
- y=y,
78
- title=f"{x} and {y}",
79
- labels={x: x, y: y},
80
- opacity=0.7,
81
- color_discrete_sequence=["blue"], # Use the same color for all points
82
- )
83
- fig.update_traces(marker=dict(color="blue"))
84
-
85
- figures.append(
86
- Figure(for_object=self, key=f"{self.key}:{x}_{y}", figure=fig)
87
- )
88
-
89
- return figures
90
-
91
- def run(self):
92
- selected_columns = self.params["selected_columns"]
93
-
94
- if selected_columns is None:
95
- # Use all columns if selected_columns is not provided
96
- selected_columns = self.inputs.dataset.df.columns.tolist()
97
- else:
98
- # Check if all selected columns exist in the dataframe
99
- missing_columns = [
100
- col
101
- for col in selected_columns
102
- if col not in self.inputs.dataset.df.columns
103
- ]
104
- if missing_columns:
105
- raise ValueError(
106
- f"The following selected columns are not in the dataframe: {missing_columns}"
107
- )
108
-
109
- figures = self.plot_bivariate_scatter(selected_columns)
110
-
111
- return self.cache_results(figures=figures)
63
+ df = dataset.df[features]
64
+
65
+ # Generate all pairs of columns
66
+ features_pairs = list(itertools.combinations(df.columns, 2))
67
+
68
+ for x, y in features_pairs:
69
+ fig = px.scatter(
70
+ df,
71
+ x=x,
72
+ y=y,
73
+ title=f"{x} and {y}",
74
+ labels={x: x, y: y},
75
+ opacity=0.7,
76
+ color_discrete_sequence=["blue"], # Use the same color for all points
77
+ )
78
+ fig.update_traces(marker=dict(color="blue"))
79
+
80
+ figures.append(fig)
81
+
82
+ return tuple(figures)
@@ -2,40 +2,47 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ import pandas as pd
5
6
  from statsmodels.stats.diagnostic import acorr_ljungbox
6
7
 
7
- from validmind.vm_models import Metric
8
+ from validmind import tags, tasks
8
9
 
9
10
 
10
- class BoxPierce(Metric):
11
+ @tasks("regression")
12
+ @tags("time_series_data", "forecasting", "statistical_test", "statsmodels")
13
+ def BoxPierce(dataset):
11
14
  """
12
15
  Detects autocorrelation in time-series data through the Box-Pierce test to validate model performance.
13
16
 
14
- **Purpose:** The Box-Pierce test is utilized to detect the presence of autocorrelation in a time-series dataset.
17
+ ### Purpose
18
+
19
+ The Box-Pierce test is utilized to detect the presence of autocorrelation in a time-series dataset.
15
20
  Autocorrelation, or serial correlation, refers to the degree of similarity between observations based on the
16
21
  temporal spacing between them. This test is essential for affirming the quality of a time-series model by ensuring
17
22
  that the error terms in the model are random and do not adhere to a specific pattern.
18
23
 
19
- **Test Mechanism:** The implementation of the Box-Pierce test involves calculating a test statistic along with a
20
- corresponding p-value derived from the dataset features. These quantities are used to test the null hypothesis that
21
- posits the data to be independently distributed. This is achieved by iterating over every feature column in the
22
- time-series data and applying the `acorr_ljungbox` function of the statsmodels library. The function yields the
23
- Box-Pierce test statistic as well as the respective p-value, all of which are cached as test results.
24
+ ### Test Mechanism
25
+
26
+ The implementation of the Box-Pierce test involves calculating a test statistic along with a corresponding p-value
27
+ derived from the dataset features. These quantities are used to test the null hypothesis that posits the data to be
28
+ independently distributed. This is achieved by iterating over every feature column in the time-series data and
29
+ applying the `acorr_ljungbox` function of the statsmodels library. The function yields the Box-Pierce test
30
+ statistic as well as the respective p-value, all of which are cached as test results.
24
31
 
25
- **Signs of High Risk:**
32
+ ### Signs of High Risk
26
33
 
27
34
  - A low p-value, typically under 0.05 as per statistical convention, throws the null hypothesis of independence
28
35
  into question. This implies that the dataset potentially houses autocorrelations, thus indicating a high-risk
29
36
  scenario concerning model performance.
30
37
  - Large Box-Pierce test statistic values may indicate the presence of autocorrelation.
31
38
 
32
- **Strengths:**
39
+ ### Strengths
33
40
 
34
41
  - Detects patterns in data that are supposed to be random, thereby ensuring no underlying autocorrelation.
35
42
  - Can be computed efficiently given its low computational complexity.
36
43
  - Can be widely applied to most regression problems, making it very versatile.
37
44
 
38
- **Limitations:**
45
+ ### Limitations
39
46
 
40
47
  - Assumes homoscedasticity (constant variance) and normality of residuals, which may not always be the case in
41
48
  real-world datasets.
@@ -43,29 +50,22 @@ class BoxPierce(Metric):
43
50
  correlations.
44
51
  - It only provides a general indication of the existence of autocorrelation, without providing specific insights
45
52
  into the nature or patterns of the detected autocorrelation.
46
- - In the presence of exhibits trends or seasonal patterns, the Box-Pierce test may yield misleading results.
53
+ - In the presence of trends or seasonal patterns, the Box-Pierce test may yield misleading results.
47
54
  - Applicability is limited to time-series data, which limits its overall utility.
48
55
  """
49
56
 
50
- name = "box_pierce"
51
- required_inputs = ["dataset"]
52
- tasks = ["regression"]
53
- tags = ["time_series_data", "forecasting", "statistical_test", "statsmodels"]
54
-
55
- def run(self):
56
- """
57
- Calculates Box-Pierce test for each of the dataset features
58
- """
59
- x_train = self.inputs.dataset.df
60
-
61
- box_pierce_values = {}
62
- for col in x_train.columns:
63
- bp_results = acorr_ljungbox(
64
- x_train[col].values, boxpierce=True, return_df=True
65
- )
66
- box_pierce_values[col] = {
67
- "stat": bp_results.iloc[0]["lb_stat"],
68
- "pvalue": bp_results.iloc[0]["lb_pvalue"],
69
- }
70
-
71
- return self.cache_results(box_pierce_values)
57
+ df = dataset.df
58
+
59
+ box_pierce_values = {}
60
+ for col in df.columns:
61
+ bp_results = acorr_ljungbox(df[col].values, boxpierce=True, return_df=True)
62
+ box_pierce_values[col] = {
63
+ "stat": bp_results.iloc[0]["lb_stat"],
64
+ "pvalue": bp_results.iloc[0]["lb_pvalue"],
65
+ }
66
+
67
+ box_pierce_df = pd.DataFrame.from_dict(box_pierce_values, orient="index")
68
+ box_pierce_df.reset_index(inplace=True)
69
+ box_pierce_df.columns = ["column", "stat", "pvalue"]
70
+
71
+ return box_pierce_df