validmind 2.5.6__py3-none-any.whl → 2.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +26 -7
  3. validmind/api_client.py +89 -43
  4. validmind/client.py +2 -2
  5. validmind/client_config.py +11 -14
  6. validmind/datasets/regression/fred_timeseries.py +67 -138
  7. validmind/template.py +1 -0
  8. validmind/test_suites/__init__.py +0 -2
  9. validmind/test_suites/statsmodels_timeseries.py +1 -1
  10. validmind/test_suites/summarization.py +0 -1
  11. validmind/test_suites/time_series.py +0 -43
  12. validmind/tests/__types__.py +3 -13
  13. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  14. validmind/tests/data_validation/ADF.py +31 -24
  15. validmind/tests/data_validation/AutoAR.py +9 -9
  16. validmind/tests/data_validation/AutoMA.py +23 -16
  17. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  18. validmind/tests/data_validation/AutoStationarity.py +21 -16
  19. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  20. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +82 -124
  21. validmind/tests/data_validation/ClassImbalance.py +15 -12
  22. validmind/tests/data_validation/DFGLSArch.py +19 -13
  23. validmind/tests/data_validation/DatasetDescription.py +17 -11
  24. validmind/tests/data_validation/DatasetSplit.py +7 -5
  25. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  26. validmind/tests/data_validation/Duplicates.py +33 -25
  27. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  28. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  29. validmind/tests/data_validation/HighCardinality.py +19 -12
  30. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  31. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  32. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  33. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  34. validmind/tests/data_validation/KPSS.py +34 -29
  35. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  36. validmind/tests/data_validation/MissingValues.py +32 -27
  37. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  38. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  39. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  40. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  41. validmind/tests/data_validation/ScatterPlot.py +63 -78
  42. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  43. validmind/tests/data_validation/Skewness.py +35 -37
  44. validmind/tests/data_validation/SpreadPlot.py +35 -35
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  47. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  48. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  49. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  50. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  51. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  52. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  53. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  54. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  55. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  56. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  57. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  58. validmind/tests/data_validation/UniqueRows.py +11 -6
  59. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  60. validmind/tests/data_validation/WOEBinTable.py +35 -30
  61. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  62. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  63. validmind/tests/data_validation/nlp/Hashtags.py +27 -20
  64. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  65. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  66. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  67. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  68. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  69. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  70. validmind/tests/data_validation/nlp/TextDescription.py +36 -35
  71. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  72. validmind/tests/decorator.py +81 -42
  73. validmind/tests/model_validation/BertScore.py +36 -27
  74. validmind/tests/model_validation/BleuScore.py +25 -19
  75. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  76. validmind/tests/model_validation/ContextualRecall.py +35 -13
  77. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  78. validmind/tests/model_validation/MeteorScore.py +46 -33
  79. validmind/tests/model_validation/ModelMetadata.py +32 -64
  80. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  81. validmind/tests/model_validation/RegardScore.py +30 -14
  82. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  83. validmind/tests/model_validation/RougeScore.py +36 -30
  84. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  85. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  86. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  87. validmind/tests/model_validation/TokenDisparity.py +31 -23
  88. validmind/tests/model_validation/ToxicityScore.py +26 -17
  89. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  90. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  91. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  92. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  93. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  94. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  95. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  96. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  97. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  98. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  101. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  102. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  103. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  104. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  105. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  106. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  107. validmind/tests/model_validation/ragas/AspectCritique.py +7 -0
  108. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  109. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  110. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  111. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  112. validmind/tests/model_validation/ragas/utils.py +6 -0
  113. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  114. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  115. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  116. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  117. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  118. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  119. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  120. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  121. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  122. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  123. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  124. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  125. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  126. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  127. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  128. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  129. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  130. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  131. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +31 -25
  132. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  133. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  134. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  135. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  136. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  137. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  138. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -93
  139. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  140. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +113 -73
  141. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -5
  142. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  143. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  144. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  145. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  146. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  147. validmind/tests/model_validation/statsmodels/BoxPierce.py +14 -10
  148. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  149. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +19 -12
  150. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  151. validmind/tests/model_validation/statsmodels/JarqueBera.py +27 -22
  152. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  153. validmind/tests/model_validation/statsmodels/LJungBox.py +32 -28
  154. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  155. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +87 -119
  156. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  157. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  158. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  159. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  160. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  161. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  162. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  163. validmind/tests/model_validation/statsmodels/RunsTest.py +32 -28
  164. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  165. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +15 -8
  166. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  167. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  168. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  169. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  170. validmind/tests/prompt_validation/Bias.py +14 -11
  171. validmind/tests/prompt_validation/Clarity.py +16 -14
  172. validmind/tests/prompt_validation/Conciseness.py +7 -5
  173. validmind/tests/prompt_validation/Delimitation.py +23 -22
  174. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  175. validmind/tests/prompt_validation/Robustness.py +12 -10
  176. validmind/tests/prompt_validation/Specificity.py +13 -11
  177. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  178. validmind/tests/run.py +68 -23
  179. validmind/unit_metrics/__init__.py +81 -144
  180. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  181. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  182. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  183. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  184. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  185. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  186. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  187. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  188. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  189. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  190. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  191. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  192. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  193. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  194. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  195. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  196. validmind/vm_models/dataset/dataset.py +2 -0
  197. validmind/vm_models/figure.py +5 -0
  198. validmind/vm_models/test/result_wrapper.py +93 -132
  199. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/METADATA +1 -1
  200. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/RECORD +203 -210
  201. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  202. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  203. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  204. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  205. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  206. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  207. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  208. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  209. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  210. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/LICENSE +0 -0
  211. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/WHEEL +0 -0
  212. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/entry_points.txt +0 -0
@@ -17,36 +17,43 @@ logger = get_logger(__name__)
17
17
  @dataclass
18
18
  class PhillipsPerronArch(Metric):
19
19
  """
20
- Executes Phillips-Perron test to assess the stationarity of time series data in each ML model feature.
21
-
22
- **Purpose**: The Phillips-Perron (PP) test is used to establish the order of integration in time series data,
23
- testing a null hypothesis that a time series is unit-root non-stationary. This is vital in forecasting and
24
- understanding the stochastic behavior of data within machine learning models. Essentially, the PP test aids in
25
- confirming the robustness of results and generating valid predictions from regression analysis models.
26
-
27
- **Test Mechanism**: The PP test is conducted for each feature in the dataset. A data frame is created from the
28
- dataset, and for each column in this frame, the PhillipsPerron method calculates the statistic value, p-value, used
29
- lags, and number of observations. This process computes the PP metric for each feature and stores the results for
30
- future reference.
31
-
32
- **Signs of High Risk**:
33
- - A high P-value could imply that the series has a unit root and is therefore non-stationary.
34
- - Test statistic values that surpass critical values indicate additional evidence of non-stationarity.
35
- - A high 'usedlag' value for a series could point towards autocorrelation issues which could further impede the
36
- model's performance.
37
-
38
- **Strengths**:
39
- - Resilience against heteroskedasticity in the error term is a significant strength of the PP test.
40
- - Its capacity to handle long time series data.
41
- - Its ability to determine whether the time series is stationary or not, influencing the selection of suitable
42
- models for forecasting.
43
-
44
- **Limitations**:
45
- - The PP test can only be employed within a univariate time series framework.
46
- - The test relies on asymptotic theory, which means the test's power can significantly diminish for small sample
47
- sizes.
48
- - The need to convert non-stationary time series into stationary series through differencing might lead to loss of
49
- vital data points.
20
+ Assesses the stationarity of time series data in each feature of the ML model using the Phillips-Perron test.
21
+
22
+ ### Purpose
23
+
24
+ The Phillips-Perron (PP) test is used to determine the stationarity of time series data for each feature in a
25
+ dataset, which is crucial for forecasting tasks. It tests the null hypothesis that a time series is unit-root
26
+ non-stationary. This is vital for understanding the stochastic behavior of the data and ensuring the robustness and
27
+ validity of predictions generated by regression analysis models.
28
+
29
+ ### Test Mechanism
30
+
31
+ The PP test is conducted for each feature in the dataset as follows:
32
+ - A data frame is created from the dataset.
33
+ - For each column, the Phillips-Perron method calculates the test statistic, p-value, lags used, and number of
34
+ observations.
35
+ - The results are then stored for each feature, providing a metric that indicates the stationarity of the time
36
+ series data.
37
+
38
+ ### Signs of High Risk
39
+
40
+ - A high p-value, indicating that the series has a unit root and is non-stationary.
41
+ - Test statistic values exceeding critical values, suggesting non-stationarity.
42
+ - High 'usedlag' value, pointing towards autocorrelation issues that may degrade model performance.
43
+
44
+ ### Strengths
45
+
46
+ - Resilience against heteroskedasticity in the error term.
47
+ - Effective for long time series data.
48
+ - Helps in determining whether the time series is stationary, aiding in the selection of suitable forecasting
49
+ models.
50
+
51
+ ### Limitations
52
+
53
+ - Applicable only within a univariate time series framework.
54
+ - Relies on asymptotic theory, which may reduce the test’s power for small sample sizes.
55
+ - Non-stationary time series must be converted to stationary series through differencing, potentially leading to
56
+ loss of important data points.
50
57
  """
51
58
 
52
59
  name = "phillips_perron"
@@ -10,41 +10,49 @@ from validmind.vm_models import Figure, Metric
10
10
 
11
11
  class RollingStatsPlot(Metric):
12
12
  """
13
- This test evaluates the stationarity of time series data by plotting its rolling mean and standard deviation.
14
-
15
- **Purpose**: The `RollingStatsPlot` metric is employed to gauge the stationarity of time series data in a given
16
- dataset. This metric specifically evaluates the rolling mean and rolling standard deviation of the dataset over a
17
- pre-specified window size. The rolling mean provides an understanding of the average trend in the data, while the
18
- rolling standard deviation gauges the volatility of the data within the window. It is critical in preparing time
19
- series data for modeling as it reveals key insights into data behavior across time.
20
-
21
- **Test Mechanism**: This mechanism is comprised of two steps. Initially, the rolling mean and standard deviation
22
- for each of the dataset's columns are calculated over a window size, which can be user-specified or by default set
23
- to 12 data points. Then, the calculated rolling mean and standard deviation are visualized via separate plots,
24
- illustrating the trends and volatility in the dataset. A straightforward check is conducted to ensure the existence
25
- of columns in the dataset, and to verify that the given dataset has been indexed by its date and time—a necessary
26
- prerequisites for time series analysis.
27
-
28
- **Signs of High Risk**:
13
+ Evaluates the stationarity of time series data by plotting its rolling mean and standard deviation over a specified
14
+ window.
15
+
16
+ ### Purpose
17
+
18
+ The `RollingStatsPlot` metric is employed to gauge the stationarity of time series data in a given dataset. This
19
+ metric specifically evaluates the rolling mean and rolling standard deviation of the dataset over a pre-specified
20
+ window size. The rolling mean provides an understanding of the average trend in the data, while the rolling
21
+ standard deviation gauges the volatility of the data within the window. It is critical in preparing time series
22
+ data for modeling as it reveals key insights into data behavior across time.
23
+
24
+ ### Test Mechanism
25
+
26
+ This mechanism is comprised of two steps. Initially, the rolling mean and standard deviation for each of the
27
+ dataset's columns are calculated over a window size, which can be user-specified or by default set to 12 data
28
+ points. Then, the calculated rolling mean and standard deviation are visualized via separate plots, illustrating
29
+ the trends and volatility in the dataset. A straightforward check is conducted to ensure the existence of columns
30
+ in the dataset, and to verify that the given dataset has been indexed by its date and time—a necessary prerequisite
31
+ for time series analysis.
32
+
33
+ ### Signs of High Risk
34
+
29
35
  - The presence of non-stationary patterns in either the rolling mean or the rolling standard deviation plots, which
30
36
  could indicate trends or seasonality in the data that may affect the performance of time series models.
31
37
  - Missing columns in the dataset, which would prevent the execution of this metric correctly.
32
38
  - The detection of NaN values in the dataset, which may need to be addressed before the metric can proceed
33
39
  successfully.
34
40
 
35
- **Strengths**:
36
- - Offers visualizations of trending behaviour and volatility within the data, facilitating a broader understanding
41
+ ### Strengths
42
+
43
+ - Offers visualizations of trending behavior and volatility within the data, facilitating a broader understanding
37
44
  of the dataset's inherent characteristics.
38
- - Checks of the dataset's integrity, such as existence of all required columns and the availability of a datetime
39
- index.
45
+ - Checks of the dataset's integrity, such as the existence of all required columns and the availability of a
46
+ datetime index.
40
47
  - Adjusts to accommodate various window sizes, thus allowing accurate analysis of data with differing temporal
41
48
  granularities.
42
49
  - Considers each column of the data individually, thereby accommodating multi-feature datasets.
43
50
 
44
- **Limitations**:
45
- - For all columns, a fixed-size window is utilised. This may not accurately capture patterns in datasets where
51
+ ### Limitations
52
+
53
+ - For all columns, a fixed-size window is utilized. This may not accurately capture patterns in datasets where
46
54
  different features may require different optimal window sizes.
47
- - Requires the dataset to be indexed by date and time, hence it may not be useable for datasets without a timestamp
55
+ - Requires the dataset to be indexed by date and time, hence it may not be usable for datasets without a timestamp
48
56
  index.
49
57
  - Primarily serves for data visualization as it does not facilitate any quantitative measures for stationarity,
50
58
  such as through statistical tests. Therefore, the interpretation is subjective and depends heavily on modeler
@@ -5,86 +5,71 @@
5
5
  import matplotlib.pyplot as plt
6
6
  import seaborn as sns
7
7
 
8
- from validmind.vm_models import Figure, Metric
8
+ from validmind import tags, tasks
9
9
 
10
10
 
11
- class ScatterPlot(Metric):
11
+ @tags("tabular_data", "visualization")
12
+ @tasks("classification", "regression")
13
+ def ScatterPlot(dataset):
12
14
  """
13
- Creates a scatter plot matrix to visually analyze feature relationships, patterns, and outliers in a dataset.
14
-
15
- **Purpose**: The ScatterPlot metric is designed to offer a visual analysis of a given dataset by constructing a
16
- scatter plot matrix encapsulating all the dataset's features (or columns). Its primary function lies in unearthing
17
- relationships, patterns, or outliers across different features, thus providing both quantitative and qualitative
18
- insights into the multidimensional relationships within the dataset. This visual assessment aids in understanding
19
- the efficacy of the chosen features for model training and their overall suitability.
20
-
21
- **Test Mechanism**: Using the seaborn library, the ScatterPlot class creates the scatter plot matrix. The process
22
- includes retrieving all columns from the dataset, verifying their existence, and subsequently generating a pairplot
23
- for these columns. A kernel density estimate (kde) is utilized to present a smoother, univariate distribution along
24
- the grid's diagonal. The final plot is housed in an array of Figure objects, each wrapping a matplotlib figure
25
- instance for storage and future usage.
26
-
27
- **Signs of High Risk**:
28
- - The emergence of non-linear or random patterns across different feature pairs. This may suggest intricate
29
- relationships unfit for linear presumptions.
30
- - A lack of clear patterns or clusters which might point to weak or non-existent correlations among features, thus
31
- creating a problem for certain model types.
32
- - The occurrence of outliers as visual outliers in your data can adversely influence the model's performance.
33
-
34
- **Strengths**:
35
- - It offers insight into the multidimensional relationships among multiple features.
36
- - It assists in identifying trends, correlations, and outliers which could potentially affect the model's
37
- performance.
38
- - As a diagnostic tool, it can validate whether certain assumptions made during the model-creation process, such as
39
- linearity, hold true.
40
- - The tool's versatility extends to its application for both regression and classification tasks.
41
-
42
- **Limitations**:
43
- - Scatter plot matrices may become cluttered and hard to decipher as the number of features escalates, resulting in
44
- complexity and confusion.
45
- - While extremely proficient in revealing pairwise relationships, these matrices may fail to illuminate complex
46
- interactions that involve three or more features.
47
- - These matrices are primarily visual tools, so the precision of quantitative analysis may be compromised.
48
- - If not clearly visible, outliers can be missed, which could negatively affect model performance.
49
- - It assumes that the dataset can fit into the computer's memory, which might not always be valid particularly for
50
- extremely large datasets.
15
+ Assesses visual relationships, patterns, and outliers among features in a dataset through scatter plot matrices.
16
+
17
+ ### Purpose
18
+
19
+ The ScatterPlot test aims to visually analyze a given dataset by constructing a scatter plot matrix of its
20
+ numerical features. The primary goal is to uncover relationships, patterns, and outliers across different features
21
+ to provide both quantitative and qualitative insights into multidimensional relationships within the dataset. This
22
+ visual assessment aids in understanding the efficacy of the chosen features for model training and their
23
+ suitability.
24
+
25
+ ### Test Mechanism
26
+
27
+ Using the Seaborn library, the ScatterPlot function creates the scatter plot matrix. The process involves
28
+ retrieving all numerical columns from the dataset and generating a scatter matrix for these columns. The resulting
29
+ scatter plot provides visual representations of feature relationships. The function also adjusts axis labels for
30
+ readability and returns the final plot as a Matplotlib Figure object for further analysis and visualization.
31
+
32
+ ### Signs of High Risk
33
+
34
+ - The emergence of non-linear or random patterns across different feature pairs, suggesting complex relationships
35
+ unsuitable for linear assumptions.
36
+ - Lack of clear patterns or clusters, indicating weak or non-existent correlations among features, which could
37
+ challenge certain model types.
38
+ - Presence of outliers, as visual outliers can adversely influence the model's performance.
39
+
40
+ ### Strengths
41
+
42
+ - Provides insight into the multidimensional relationships among multiple features.
43
+ - Assists in identifying trends, correlations, and outliers that could affect model performance.
44
+ - Validates assumptions made during model creation, such as linearity.
45
+ - Versatile for application in both regression and classification tasks.
46
+ - Using Seaborn facilitates an intuitive and detailed visual exploration of data.
47
+
48
+ ### Limitations
49
+
50
+ - Scatter plot matrices may become cluttered and hard to decipher as the number of features increases.
51
+ - Primarily reveals pairwise relationships and may fail to illuminate complex interactions involving three or more
52
+ features.
53
+ - Being a visual tool, precision in quantitative analysis might be compromised.
54
+ - Outliers not clearly visible in plots can be missed, affecting model performance.
55
+ - Assumes that the dataset can fit into the computer's memory, which might not be valid for extremely large
56
+ datasets.
51
57
  """
52
58
 
53
- name = "scatter_plot"
54
- required_inputs = ["dataset"]
55
- tasks = ["classification", "regression"]
56
- tags = ["tabular_data", "visualization"]
57
-
58
- def run(self):
59
- columns = list(self.inputs.dataset.df.columns)
60
-
61
- df = self.inputs.dataset.df[columns]
62
-
63
- if not set(columns).issubset(set(df.columns)):
64
- raise ValueError("Provided 'columns' must exist in the dataset")
65
-
66
- g = sns.pairplot(data=df, diag_kind="kde")
67
- for ax in g.axes.flatten():
68
- # rotate x axis labels
69
- ax.set_xlabel(ax.get_xlabel(), rotation=45)
70
- # rotate y axis labels
71
- ax.set_ylabel(ax.get_ylabel(), rotation=45)
72
- # set y labels alignment
73
- ax.yaxis.get_label().set_horizontalalignment("right")
74
- # Get the current figure
75
- fig = plt.gcf()
76
-
77
- figures = []
78
- figures.append(
79
- Figure(
80
- for_object=self,
81
- key=self.key,
82
- figure=fig,
83
- )
84
- )
85
-
86
- plt.close("all")
87
-
88
- return self.cache_results(
89
- figures=figures,
90
- )
59
+ g = sns.pairplot(data=dataset.df, diag_kind="kde")
60
+ for ax in g.axes.flatten():
61
+ # rotate x axis labels
62
+ ax.set_xlabel(ax.get_xlabel(), rotation=45)
63
+ # rotate y axis labels
64
+ ax.set_ylabel(ax.get_ylabel(), rotation=45)
65
+ # set y labels alignment
66
+ ax.yaxis.get_label().set_horizontalalignment("right")
67
+ # Get the current figure
68
+ fig = plt.gcf()
69
+
70
+ figures = []
71
+ figures.append(fig)
72
+
73
+ plt.close("all")
74
+
75
+ return tuple(figures)
@@ -19,41 +19,45 @@ logger = get_logger(__name__)
19
19
 
20
20
  class SeasonalDecompose(Metric):
21
21
  """
22
- Decomposes dataset features into observed, trend, seasonal, and residual components to identify patterns and
23
- validate dataset.
24
-
25
- **Purpose**: This test utilizes the Seasonal Decomposition of Time Series by Loess (STL) method to decompose a
26
- dataset into its fundamental components: observed, trend, seasonal, and residuals. The purpose is to identify
27
- implicit patterns, majorly any seasonality, in the dataset's features which aid in developing a more comprehensive
28
- understanding and effectively validating the dataset.
29
-
30
- **Test Mechanism**: The testing process exploits the `seasonal_decompose` function from the
31
- `statsmodels.tsa.seasonal` library to evaluate each feature in the dataset. It isolates each feature into four
32
- components: observed, trend, seasonal, and residuals, and generates essentially six subplot graphs per feature for
33
- visual interpretation of the results. Prior to the seasonal decomposition, non-finite values are scrutinized and
34
- removed thus, ensuring reliability in the analysis.
35
-
36
- **Signs of High Risk**:
37
- - **Non-Finiteness**: If a dataset carries too many non-finite values it might flag high risk as these values are
22
+ Assesses patterns and seasonality in a time series dataset by decomposing its features into foundational components.
23
+
24
+ ### Purpose
25
+
26
+ The Seasonal Decompose test aims to decompose the features of a time series dataset into their fundamental
27
+ components: observed, trend, seasonal, and residuals. By utilizing the Seasonal Decomposition of Time Series by
28
+ Loess (STL) method, the test identifies underlying patterns, predominantly seasonality, in the dataset's features.
29
+ This aids in developing a more comprehensive understanding of the dataset, which in turn facilitates more effective
30
+ model validation.
31
+
32
+ ### Test Mechanism
33
+
34
+ The testing process leverages the `seasonal_decompose` function from the `statsmodels.tsa.seasonal` library to
35
+ evaluate each feature in the dataset. It isolates each feature into four components—observed, trend, seasonal, and
36
+ residuals—and generates six subplot graphs per feature for visual interpretation. Prior to decomposition, the test
37
+ scrutinizes and removes any non-finite values, ensuring the reliability of the analysis.
38
+
39
+ ### Signs of High Risk
40
+
41
+ - **Non-Finiteness**: Datasets with a high number of non-finite values may flag as high risk since these values are
38
42
  omitted before conducting the seasonal decomposition.
39
- - **Frequent Warnings**: The test could be at risk if it chronically fails to infer frequency for a scrutinized
40
- feature.
41
- - **High Seasonality**: A high seasonal component could potentially render forecasts unreliable due to overwhelming
42
- seasonal variation.
43
-
44
- **Strengths**:
45
- - **Seasonality Detection**: The code aptly discerns hidden seasonality patterns in the features of datasets.
46
- - **Visualization**: The test facilitates interpretation and comprehension via graphical representations.
47
- - **Unrestricted Usage**: The code is not confined to any specific regression model, thereby promoting wide-ranging
48
- applicability.
49
-
50
- **Limitations**:
51
- - **Dependence on Assumptions**: The test presumes that features in the dataset are periodically distributed. If no
52
- frequency could be inferred for a variable, that feature is excluded from the test.
53
- - **Handling Non-finite Values**: The test disregards non-finite values during the analysis which could potentially
54
- result in incomplete understanding of the dataset.
55
- - **Unreliability with Noisy Datasets**: The test tends to produce unreliable results when used with heavy noise
56
- present in the dataset.
43
+ - **Frequent Warnings**: Chronic failure to infer the frequency for a scrutinized feature indicates high risk.
44
+ - **High Seasonality**: A significant seasonal component could potentially render forecasts unreliable due to
45
+ overwhelming seasonal variation.
46
+
47
+ ### Strengths
48
+
49
+ - **Seasonality Detection**: Accurately discerns hidden seasonality patterns in dataset features.
50
+ - **Visualization**: Facilitates interpretation and comprehension through graphical representations.
51
+ - **Unrestricted Usage**: Not confined to any specific regression model, promoting wide-ranging applicability.
52
+
53
+ ### Limitations
54
+
55
+ - **Dependence on Assumptions**: Assumes that dataset features are periodically distributed. Features with no
56
+ inferable frequency are excluded from the test.
57
+ - **Handling Non-Finite Values**: Disregards non-finite values during analysis, potentially resulting in an
58
+ incomplete understanding of the dataset.
59
+ - **Unreliability with Noisy Datasets**: Produces unreliable results when used with datasets that contain heavy
60
+ noise.
57
61
  """
58
62
 
59
63
  name = "seasonal_decompose"
@@ -20,43 +20,41 @@ from validmind.vm_models import (
20
20
  @dataclass
21
21
  class Skewness(ThresholdTest):
22
22
  """
23
- Evaluates the skewness of numerical data in a machine learning model and checks if it falls below a set maximum
24
- threshold.
25
-
26
- **Purpose**: The purpose of the Skewness test is to measure the asymmetry in the distribution of data within a
27
- predictive machine learning model. Specifically, it evaluates the divergence of said distribution from a normal
28
- distribution. In understanding the level of skewness, we can potentially identify issues with data quality, an
29
- essential component for optimizing the performance of traditional machine learning models in both classification
30
- and regression settings.
31
-
32
- **Test Mechanism**: This test calculates skewness of numerical columns in a dataset, which is extracted from the
33
- DataFrame, specifically focusing on numerical data types. The skewness value is then contrasted against a
34
- predetermined maximum threshold, set by default to 1. The skewness value under review is deemed to have passed the
35
- test only if it is less than this maximum threshold; otherwise, the test is considered 'fail'. Subsequently, the
36
- test results of each column, together with the skewness value and column name, are cached.
37
-
38
- **Signs of High Risk**:
39
-
40
- - The presence of substantial skewness levels that significantly exceed the maximum threshold is an indication of
41
- skewed data distribution and subsequently high model risk.
42
- - Persistent skewness in data could signify that the foundational assumptions of the machine learning model may not
43
- be applicable, potentially leading to subpar model performance, erroneous predictions, or biased inferences.
44
-
45
- **Strengths**:
46
-
47
- - Fast and efficient identification of unequal data
48
- - distributions within a machine learning model is enabled by the skewness test.
49
- - The maximum threshold parameter can be adjusted to meet the user's specific needs, enhancing the test's
50
- versatility.
51
-
52
- **Limitations**:
53
-
54
- - The test only evaluates numeric columns, which means that data in non-numeric columns could still include bias or
55
- problematic skewness that this test does not capture.
56
- - The test inherently assumes that the data should follow a normal distribution, an expectation which may not
57
- always be met in real-world data.
58
- - The risk grading is largely dependent on a subjective threshold, which may result in excessive strictness or
59
- leniency depending upon selection. This factor might require expert input and recurrent iterations for refinement.
23
+ Evaluates the skewness of numerical data in a dataset to check against a defined threshold, aiming to ensure data
24
+ quality and optimize model performance.
25
+
26
+ ### Purpose
27
+
28
+ The purpose of the Skewness test is to measure the asymmetry in the distribution of data within a predictive
29
+ machine learning model. Specifically, it evaluates the divergence of said distribution from a normal distribution.
30
+ Understanding the level of skewness helps identify data quality issues, which are crucial for optimizing the
31
+ performance of traditional machine learning models in both classification and regression settings.
32
+
33
+ ### Test Mechanism
34
+
35
+ This test calculates the skewness of numerical columns in the dataset, focusing specifically on numerical data
36
+ types. The calculated skewness value is then compared against a predetermined maximum threshold, which is set by
37
+ default to 1. If the skewness value is less than this maximum threshold, the test passes; otherwise, it fails. The
38
+ test results, along with the skewness values and column names, are then recorded for further analysis.
39
+
40
+ ### Signs of High Risk
41
+
42
+ - Substantial skewness levels that significantly exceed the maximum threshold.
43
+ - Persistent skewness in the data, indicating potential issues with the foundational assumptions of the machine
44
+ learning model.
45
+ - Subpar model performance, erroneous predictions, or biased inferences due to skewed data distributions.
46
+
47
+ ### Strengths
48
+
49
+ - Fast and efficient identification of unequal data distributions within a machine learning model.
50
+ - Adjustable maximum threshold parameter, allowing for customization based on user needs.
51
+ - Provides a clear quantitative measure to mitigate model risks related to data skewness.
52
+
53
+ ### Limitations
54
+
55
+ - Only evaluates numeric columns, potentially missing skewness or bias in non-numeric data.
56
+ - Assumes that data should follow a normal distribution, which may not always be applicable to real-world data.
57
+ - Subjective threshold for risk grading, requiring expert input and recurrent iterations for refinement.
60
58
  """
61
59
 
62
60
  name = "skewness"
@@ -10,46 +10,46 @@ from validmind.vm_models import Figure, Metric
10
10
 
11
11
  class SpreadPlot(Metric):
12
12
  """
13
- Visualizes the spread relationship between pairs of time-series variables in a dataset, thereby aiding in
14
- identification of potential correlations.
13
+ Assesses potential correlations between pairs of time series variables through visualization to enhance
14
+ understanding of their relationships.
15
15
 
16
- **Purpose**:
17
- The SpreadPlot metric is intended to graphically illustrate and analyse the relationships between pairs of time
18
- series variables within a given dataset. This facilitated understanding helps in identifying and assessing
19
- potential time series correlations, like cointegration, between the variables.
16
+ ### Purpose
20
17
 
21
- **Test Mechanism**:
22
- The SpreadPlot metric operates by computing and representing the spread between each pair of time series variables
23
- in the dataset. In particular, the difference between two variables is calculated and presented as a line graph.
24
- This method is iterated for each unique pair of variables in the dataset.
18
+ The SpreadPlot test aims to graphically illustrate and analyze the relationships between pairs of time series
19
+ variables within a given dataset. This facilitated understanding helps in identifying and assessing potential time
20
+ series correlations, such as cointegration, between the variables.
25
21
 
26
- **Signs of High Risk**:
27
- Potential indicators of high risk related to the SpreadPlot metric might include:
22
+ ### Test Mechanism
28
23
 
29
- - Large fluctuations in the spread over a given timespan
30
- - Unexpected patterns or trends that may signal a potential risk in the underlying correlations between the
31
- variables
24
+ The SpreadPlot test computes and represents the spread between each pair of time series variables in the dataset.
25
+ Specifically, the difference between two variables is calculated and presented as a line graph. This process is
26
+ iterated for each unique pair of variables in the dataset, allowing for comprehensive visualization of their
27
+ relationships.
28
+
29
+ ### Signs of High Risk
30
+
31
+ - Large fluctuations in the spread over a given timespan.
32
+ - Unexpected patterns or trends that may signal potential risks in the underlying correlations between the
33
+ variables.
32
34
  - Presence of significant missing data or extreme outlier values, which could potentially skew the spread and
33
- indicate high risk
34
-
35
- **Strengths**:
36
- The SpreadPlot metric provides several key advantages:
37
-
38
- - It allows for thorough visual examination and interpretation of the correlations between time-series pairs
39
- - It aids in revealing complex relationships like cointegration
40
- - It enhances interpretability by visualising the relationships, thereby helping in spotting outliers and trends
41
- - It is capable of handling numerous variable pairs from the dataset through a versatile and adaptable process
42
-
43
- **Limitations**:
44
- Despite its advantages, the SpreadPlot metric does have certain drawbacks:
45
-
46
- - It primarily serves as a visualisation tool and does not offer quantitative measurements or statistics to
47
- objectively determine relationships
48
- - It heavily relies on the quality and granularity of the data - missing data or outliers can notably disturb the
49
- interpretation of the relationships
50
- - It can become inefficient or difficult to interpret with a high number of variables due to the profuse number of
51
- plots
52
- - It might not completely capture intricate non-linear relationships between the variables
35
+ indicate high risk.
36
+
37
+ ### Strengths
38
+
39
+ - Allows for thorough visual examination and interpretation of the correlations between time-series pairs.
40
+ - Aids in revealing complex relationships like cointegration.
41
+ - Enhances interpretability by visualizing the relationships, thereby helping in spotting outliers and trends.
42
+ - Capable of handling numerous variable pairs from the dataset through a versatile and adaptable process.
43
+
44
+ ### Limitations
45
+
46
+ - Primarily serves as a visualization tool and does not offer quantitative measurements or statistics to
47
+ objectively determine relationships.
48
+ - Heavily relies on the quality and granularity of the data—missing data or outliers can notably disturb the
49
+ interpretation of relationships.
50
+ - Can become inefficient or difficult to interpret with a high number of variables due to the profuse number of
51
+ plots.
52
+ - Might not completely capture intricate non-linear relationships between the variables.
53
53
  """
54
54
 
55
55
  name = "spread_plot"