validmind 2.5.8__py3-none-any.whl → 2.5.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +26 -7
- validmind/api_client.py +89 -43
- validmind/client.py +2 -2
- validmind/client_config.py +11 -14
- validmind/datasets/regression/fred_timeseries.py +67 -138
- validmind/template.py +1 -0
- validmind/test_suites/__init__.py +0 -2
- validmind/test_suites/statsmodels_timeseries.py +1 -1
- validmind/test_suites/summarization.py +0 -1
- validmind/test_suites/time_series.py +0 -43
- validmind/tests/__types__.py +3 -13
- validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
- validmind/tests/data_validation/ADF.py +31 -24
- validmind/tests/data_validation/AutoAR.py +9 -9
- validmind/tests/data_validation/AutoMA.py +23 -16
- validmind/tests/data_validation/AutoSeasonality.py +18 -16
- validmind/tests/data_validation/AutoStationarity.py +21 -16
- validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +82 -124
- validmind/tests/data_validation/ClassImbalance.py +15 -12
- validmind/tests/data_validation/DFGLSArch.py +19 -13
- validmind/tests/data_validation/DatasetDescription.py +17 -11
- validmind/tests/data_validation/DatasetSplit.py +7 -5
- validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
- validmind/tests/data_validation/Duplicates.py +33 -25
- validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
- validmind/tests/data_validation/HighCardinality.py +19 -12
- validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
- validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
- validmind/tests/data_validation/IQROutliersTable.py +40 -36
- validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
- validmind/tests/data_validation/KPSS.py +34 -29
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
- validmind/tests/data_validation/MissingValues.py +32 -27
- validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
- validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
- validmind/tests/data_validation/RollingStatsPlot.py +31 -23
- validmind/tests/data_validation/ScatterPlot.py +63 -78
- validmind/tests/data_validation/SeasonalDecompose.py +38 -34
- validmind/tests/data_validation/Skewness.py +35 -37
- validmind/tests/data_validation/SpreadPlot.py +35 -35
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
- validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
- validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
- validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
- validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
- validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
- validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
- validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
- validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
- validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
- validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
- validmind/tests/data_validation/TooManyZeroValues.py +16 -11
- validmind/tests/data_validation/UniqueRows.py +11 -6
- validmind/tests/data_validation/WOEBinPlots.py +23 -16
- validmind/tests/data_validation/WOEBinTable.py +35 -30
- validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
- validmind/tests/data_validation/nlp/CommonWords.py +21 -14
- validmind/tests/data_validation/nlp/Hashtags.py +27 -20
- validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
- validmind/tests/data_validation/nlp/Mentions.py +21 -15
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
- validmind/tests/data_validation/nlp/Punctuations.py +24 -20
- validmind/tests/data_validation/nlp/Sentiment.py +27 -8
- validmind/tests/data_validation/nlp/StopWords.py +26 -19
- validmind/tests/data_validation/nlp/TextDescription.py +36 -35
- validmind/tests/data_validation/nlp/Toxicity.py +32 -9
- validmind/tests/decorator.py +81 -42
- validmind/tests/model_validation/BertScore.py +36 -27
- validmind/tests/model_validation/BleuScore.py +25 -19
- validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
- validmind/tests/model_validation/ContextualRecall.py +35 -13
- validmind/tests/model_validation/FeaturesAUC.py +32 -13
- validmind/tests/model_validation/MeteorScore.py +46 -33
- validmind/tests/model_validation/ModelMetadata.py +32 -64
- validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
- validmind/tests/model_validation/RegardScore.py +30 -14
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
- validmind/tests/model_validation/RougeScore.py +36 -30
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
- validmind/tests/model_validation/TokenDisparity.py +31 -23
- validmind/tests/model_validation/ToxicityScore.py +26 -17
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
- validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
- validmind/tests/model_validation/ragas/AspectCritique.py +7 -0
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
- validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
- validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
- validmind/tests/model_validation/ragas/utils.py +6 -0
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
- validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
- validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +31 -25
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
- validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
- validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
- validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -93
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -5
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
- validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
- validmind/tests/model_validation/statsmodels/BoxPierce.py +14 -10
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +19 -12
- validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
- validmind/tests/model_validation/statsmodels/JarqueBera.py +27 -22
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
- validmind/tests/model_validation/statsmodels/LJungBox.py +32 -28
- validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +87 -119
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
- validmind/tests/model_validation/statsmodels/RunsTest.py +32 -28
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
- validmind/tests/model_validation/statsmodels/ShapiroWilk.py +15 -8
- validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
- validmind/tests/prompt_validation/Bias.py +14 -11
- validmind/tests/prompt_validation/Clarity.py +16 -14
- validmind/tests/prompt_validation/Conciseness.py +7 -5
- validmind/tests/prompt_validation/Delimitation.py +23 -22
- validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
- validmind/tests/prompt_validation/Robustness.py +12 -10
- validmind/tests/prompt_validation/Specificity.py +13 -11
- validmind/tests/prompt_validation/ai_powered_test.py +6 -0
- validmind/tests/run.py +68 -23
- validmind/unit_metrics/__init__.py +81 -144
- validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
- validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
- validmind/unit_metrics/regression/HuberLoss.py +1 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
- validmind/vm_models/dataset/dataset.py +2 -0
- validmind/vm_models/figure.py +5 -0
- validmind/vm_models/test/result_wrapper.py +93 -132
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/METADATA +1 -1
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/RECORD +203 -210
- validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
- validmind/tests/data_validation/BivariateHistograms.py +0 -117
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
- validmind/tests/data_validation/MissingValuesRisk.py +0 -88
- validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/LICENSE +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/WHEEL +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.15.dist-info}/entry_points.txt +0 -0
@@ -28,6 +28,6 @@ class RegressionModelsEvaluation(TestSuite):
|
|
28
28
|
|
29
29
|
suite_id = "regression_models_evaluation"
|
30
30
|
tests = [
|
31
|
-
"validmind.model_validation.statsmodels.
|
31
|
+
"validmind.model_validation.statsmodels.RegressionModelCoeffs",
|
32
32
|
"validmind.model_validation.sklearn.RegressionModelsPerformanceComparison",
|
33
33
|
]
|
@@ -16,7 +16,6 @@ class SummarizationMetrics(TestSuite):
|
|
16
16
|
|
17
17
|
suite_id = "summarization_metrics"
|
18
18
|
tests = [
|
19
|
-
"validmind.model_validation.RougeMetrics",
|
20
19
|
"validmind.model_validation.TokenDisparity",
|
21
20
|
"validmind.model_validation.BleuScore",
|
22
21
|
"validmind.model_validation.BertScore",
|
@@ -77,39 +77,6 @@ class TimeSeriesMultivariate(TestSuite):
|
|
77
77
|
]
|
78
78
|
|
79
79
|
|
80
|
-
class TimeSeriesForecast(TestSuite):
|
81
|
-
"""
|
82
|
-
This test suite computes predictions from statsmodels OLS linear regression models
|
83
|
-
against a list of models and plots the historical data alongside the forecasted data.
|
84
|
-
The purpose of this test suite is to evaluate the performance of each model in predicting
|
85
|
-
future values of a time series based on historical data. By comparing the historical
|
86
|
-
values with the forecasted values, users can visually assess the accuracy of each model
|
87
|
-
and determine which one best fits the data. In addition, this test suite can help users
|
88
|
-
identify any discrepancies between the models and the actual data, allowing for potential
|
89
|
-
improvements in model selection and parameter tuning.
|
90
|
-
"""
|
91
|
-
|
92
|
-
suite_id = "time_series_forecast"
|
93
|
-
tests = ["validmind.model_validation.statsmodels.RegressionModelForecastPlotLevels"]
|
94
|
-
|
95
|
-
|
96
|
-
class TimeSeriesSensitivity(TestSuite):
|
97
|
-
"""
|
98
|
-
This test suite performs sensitivity analysis on a statsmodels OLS linear regression model
|
99
|
-
by applying distinct shocks to each input variable individually and then computing the
|
100
|
-
model's predictions. The aim of this test suite is to investigate the model's responsiveness
|
101
|
-
to variations in its inputs. By juxtaposing the model's predictions under baseline and shocked
|
102
|
-
conditions, users can visually evaluate the sensitivity of the model to changes in each
|
103
|
-
variable. This kind of analysis can also shed light on potential model limitations, including
|
104
|
-
over-reliance on specific variables or insufficient responsiveness to changes in inputs. As a
|
105
|
-
result, this test suite can provide insights that may be beneficial for refining the model
|
106
|
-
structure, improving its robustness, and ensuring a more reliable prediction performance.
|
107
|
-
"""
|
108
|
-
|
109
|
-
suite_id = "time_series_sensitivity"
|
110
|
-
tests = ["validmind.model_validation.statsmodels.RegressionModelSensitivityPlot"]
|
111
|
-
|
112
|
-
|
113
80
|
class TimeSeriesDataset(TestSuite):
|
114
81
|
"""
|
115
82
|
Test suite for time series datasets.
|
@@ -152,14 +119,4 @@ class TimeSeriesModelValidation(TestSuite):
|
|
152
119
|
"section_description": RegressionModelsEvaluation.__doc__,
|
153
120
|
"section_tests": RegressionModelsEvaluation.tests,
|
154
121
|
},
|
155
|
-
{
|
156
|
-
"section_id": TimeSeriesForecast.suite_id,
|
157
|
-
"section_description": TimeSeriesForecast.__doc__,
|
158
|
-
"section_tests": TimeSeriesForecast.tests,
|
159
|
-
},
|
160
|
-
{
|
161
|
-
"section_id": TimeSeriesSensitivity.suite_id,
|
162
|
-
"section_description": TimeSeriesSensitivity.__doc__,
|
163
|
-
"section_tests": TimeSeriesSensitivity.tests,
|
164
|
-
},
|
165
122
|
]
|
validmind/tests/__types__.py
CHANGED
@@ -33,7 +33,7 @@ TestID = Literal[
|
|
33
33
|
"validmind.model_validation.ClusterSizeDistribution",
|
34
34
|
"validmind.model_validation.TokenDisparity",
|
35
35
|
"validmind.model_validation.ToxicityScore",
|
36
|
-
"validmind.model_validation.
|
36
|
+
"validmind.model_validation.ModelMetadata",
|
37
37
|
"validmind.model_validation.TimeSeriesR2SquareBySegments",
|
38
38
|
"validmind.model_validation.embeddings.CosineSimilarityComparison",
|
39
39
|
"validmind.model_validation.embeddings.EmbeddingsVisualization2D",
|
@@ -81,21 +81,17 @@ TestID = Literal[
|
|
81
81
|
"validmind.model_validation.sklearn.RegressionR2Square",
|
82
82
|
"validmind.model_validation.sklearn.RegressionErrors",
|
83
83
|
"validmind.model_validation.sklearn.ClusterPerformance",
|
84
|
-
"validmind.model_validation.sklearn.
|
84
|
+
"validmind.model_validation.sklearn.FeatureImportance",
|
85
85
|
"validmind.model_validation.sklearn.TrainingTestDegradation",
|
86
|
-
"validmind.model_validation.sklearn.RegressionErrorsComparison",
|
87
86
|
"validmind.model_validation.sklearn.HyperParametersTuning",
|
88
87
|
"validmind.model_validation.sklearn.KMeansClustersOptimization",
|
89
88
|
"validmind.model_validation.sklearn.ModelsPerformanceComparison",
|
90
89
|
"validmind.model_validation.sklearn.WeakspotsDiagnosis",
|
91
|
-
"validmind.model_validation.sklearn.RegressionR2SquareComparison",
|
92
90
|
"validmind.model_validation.sklearn.PopulationStabilityIndex",
|
93
91
|
"validmind.model_validation.sklearn.MinimumAccuracy",
|
94
|
-
"validmind.model_validation.statsmodels.
|
92
|
+
"validmind.model_validation.statsmodels.RegressionModelCoeffs",
|
95
93
|
"validmind.model_validation.statsmodels.BoxPierce",
|
96
94
|
"validmind.model_validation.statsmodels.RegressionCoeffsPlot",
|
97
|
-
"validmind.model_validation.statsmodels.RegressionModelSensitivityPlot",
|
98
|
-
"validmind.model_validation.statsmodels.RegressionModelForecastPlotLevels",
|
99
95
|
"validmind.model_validation.statsmodels.ScorecardHistogram",
|
100
96
|
"validmind.model_validation.statsmodels.LJungBox",
|
101
97
|
"validmind.model_validation.statsmodels.JarqueBera",
|
@@ -110,15 +106,12 @@ TestID = Literal[
|
|
110
106
|
"validmind.model_validation.statsmodels.PredictionProbabilitiesHistogram",
|
111
107
|
"validmind.model_validation.statsmodels.AutoARIMA",
|
112
108
|
"validmind.model_validation.statsmodels.GINITable",
|
113
|
-
"validmind.model_validation.statsmodels.RegressionModelForecastPlot",
|
114
109
|
"validmind.model_validation.statsmodels.DurbinWatsonTest",
|
115
110
|
"validmind.ongoing_monitoring.PredictionCorrelation",
|
116
111
|
"validmind.ongoing_monitoring.PredictionAcrossEachFeature",
|
117
112
|
"validmind.ongoing_monitoring.FeatureDrift",
|
118
113
|
"validmind.ongoing_monitoring.TargetPredictionDistributionPlot",
|
119
|
-
"validmind.data_validation.MissingValuesRisk",
|
120
114
|
"validmind.data_validation.IQROutliersTable",
|
121
|
-
"validmind.data_validation.BivariateFeaturesBarPlots",
|
122
115
|
"validmind.data_validation.Skewness",
|
123
116
|
"validmind.data_validation.Duplicates",
|
124
117
|
"validmind.data_validation.MissingValuesBarPlot",
|
@@ -130,7 +123,6 @@ TestID = Literal[
|
|
130
123
|
"validmind.data_validation.AutoStationarity",
|
131
124
|
"validmind.data_validation.DescriptiveStatistics",
|
132
125
|
"validmind.data_validation.TimeSeriesDescription",
|
133
|
-
"validmind.data_validation.ANOVAOneWayTable",
|
134
126
|
"validmind.data_validation.TargetRateBarPlots",
|
135
127
|
"validmind.data_validation.PearsonCorrelationMatrix",
|
136
128
|
"validmind.data_validation.FeatureTargetCorrelationPlot",
|
@@ -147,9 +139,7 @@ TestID = Literal[
|
|
147
139
|
"validmind.data_validation.TooManyZeroValues",
|
148
140
|
"validmind.data_validation.HighPearsonCorrelation",
|
149
141
|
"validmind.data_validation.ACFandPACFPlot",
|
150
|
-
"validmind.data_validation.BivariateHistograms",
|
151
142
|
"validmind.data_validation.WOEBinTable",
|
152
|
-
"validmind.data_validation.HeatmapFeatureCorrelations",
|
153
143
|
"validmind.data_validation.TimeSeriesFrequency",
|
154
144
|
"validmind.data_validation.DatasetSplit",
|
155
145
|
"validmind.data_validation.SpreadPlot",
|
@@ -14,25 +14,27 @@ class ACFandPACFPlot(Metric):
|
|
14
14
|
Analyzes time series data using Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) plots to
|
15
15
|
reveal trends and correlations.
|
16
16
|
|
17
|
-
|
18
|
-
to analyze time series data in machine learning models. It illuminates the correlation of the data over time by
|
19
|
-
plotting the correlation of the series with its own lags (ACF), and the correlations after removing effects already
|
20
|
-
accounted for by earlier lags (PACF). This information can identify trends, such as seasonality, degrees of
|
21
|
-
autocorrelation, and inform the selection of order parameters for AutoRegressive Integrated Moving Average (ARIMA)
|
22
|
-
models.
|
17
|
+
### Purpose
|
23
18
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
19
|
+
The ACF (Autocorrelation Function) and PACF (Partial Autocorrelation Function) plot test is employed to analyze
|
20
|
+
time series data in machine learning models. It illuminates the correlation of the data over time by plotting the
|
21
|
+
correlation of the series with its own lags (ACF), and the correlations after removing effects already accounted
|
22
|
+
for by earlier lags (PACF). This information can identify trends, such as seasonality, degrees of autocorrelation,
|
23
|
+
and inform the selection of order parameters for AutoRegressive Integrated Moving Average (ARIMA) models.
|
28
24
|
|
29
|
-
|
25
|
+
### Test Mechanism
|
26
|
+
|
27
|
+
The `ACFandPACFPlot` test accepts a dataset with a time-based index. It first confirms the index is of a datetime
|
28
|
+
type, then handles any NaN values. The test subsequently generates ACF and PACF plots for each column in the
|
29
|
+
dataset, producing a subplot for each. If the dataset doesn't include key columns, an error is returned.
|
30
|
+
|
31
|
+
### Signs of High Risk
|
30
32
|
|
31
33
|
- Sudden drops in the correlation at a specific lag might signal a model at high risk.
|
32
34
|
- Consistent high correlation across multiple lags could also indicate non-stationarity in the data, which may
|
33
35
|
suggest that a model estimated on this data won't generalize well to future, unknown data.
|
34
36
|
|
35
|
-
|
37
|
+
### Strengths
|
36
38
|
|
37
39
|
- ACF and PACF plots offer clear graphical representations of the correlations in time series data.
|
38
40
|
- These plots are effective at revealing important data characteristics such as seasonality, trends, and
|
@@ -40,7 +42,7 @@ class ACFandPACFPlot(Metric):
|
|
40
42
|
- The insights from these plots aid in better model configuration, particularly in the selection of ARIMA model
|
41
43
|
parameters.
|
42
44
|
|
43
|
-
|
45
|
+
### Limitations
|
44
46
|
|
45
47
|
- ACF and PACF plots are exclusively for time series data and hence, can't be applied to all ML models.
|
46
48
|
- These plots require large, consistent datasets as gaps could lead to misleading results.
|
@@ -18,31 +18,38 @@ class ADF(Metric):
|
|
18
18
|
"""
|
19
19
|
Assesses the stationarity of a time series dataset using the Augmented Dickey-Fuller (ADF) test.
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
-
|
37
|
-
|
38
|
-
-
|
39
|
-
|
40
|
-
|
41
|
-
|
21
|
+
### Purpose
|
22
|
+
|
23
|
+
The Augmented Dickey-Fuller (ADF) test metric is used to determine the order of integration, i.e., the stationarity
|
24
|
+
of a given time series dataset. The stationary property of data is pivotal in many machine learning models as it
|
25
|
+
impacts the reliability and effectiveness of predictions and forecasts.
|
26
|
+
|
27
|
+
### Test Mechanism
|
28
|
+
|
29
|
+
The ADF test is executed using the `adfuller` function from the `statsmodels` library on each feature of the
|
30
|
+
dataset. Multiple outputs are generated for each run, including the ADF test statistic and p-value, count of lags
|
31
|
+
used, the number of observations considered in the test, critical values at various confidence levels, and the
|
32
|
+
information criterion. These results are stored for each feature for subsequent analysis.
|
33
|
+
|
34
|
+
### Signs of High Risk
|
35
|
+
|
36
|
+
- An inflated ADF statistic and high p-value (generally above 0.05) indicate a high risk to the model's performance
|
37
|
+
due to the presence of a unit root indicating non-stationarity.
|
38
|
+
- Non-stationarity might result in untrustworthy or insufficient forecasts.
|
39
|
+
|
40
|
+
### Strengths
|
41
|
+
|
42
|
+
- The ADF test is robust to sophisticated correlations within the data, making it suitable for settings where data
|
43
|
+
displays complex stochastic behavior.
|
44
|
+
- It provides explicit outputs like test statistics, critical values, and information criterion, enhancing
|
45
|
+
understanding and transparency in the model validation process.
|
46
|
+
|
47
|
+
### Limitations
|
48
|
+
|
42
49
|
- The ADF test might demonstrate low statistical power, making it challenging to differentiate between a unit root
|
43
|
-
and near-unit-root processes causing false negatives.
|
44
|
-
-
|
45
|
-
- The
|
50
|
+
and near-unit-root processes, potentially causing false negatives.
|
51
|
+
- It assumes the data follows an autoregressive process, which might not always be the case.
|
52
|
+
- The test struggles with time series data that have structural breaks.
|
46
53
|
"""
|
47
54
|
|
48
55
|
name = "adf"
|
@@ -16,7 +16,7 @@ class AutoAR(Metric):
|
|
16
16
|
"""
|
17
17
|
Automatically identifies the optimal Autoregressive (AR) order for a time series using BIC and AIC criteria.
|
18
18
|
|
19
|
-
|
19
|
+
### Purpose
|
20
20
|
|
21
21
|
The AutoAR test is intended to automatically identify the Autoregressive (AR) order of a time series by utilizing
|
22
22
|
the Bayesian Information Criterion (BIC) and Akaike Information Criterion (AIC). AR order is crucial in forecasting
|
@@ -24,30 +24,30 @@ class AutoAR(Metric):
|
|
24
24
|
objective is to select the most fitting AR model that encapsulates the trend and seasonality in the time series
|
25
25
|
data.
|
26
26
|
|
27
|
-
|
27
|
+
### Test Mechanism
|
28
28
|
|
29
29
|
The test mechanism operates by iterating through a possible range of AR orders up to a defined maximum. An AR model
|
30
30
|
is fitted for each order, and the corresponding BIC and AIC are computed. BIC and AIC statistical measures are
|
31
31
|
designed to penalize models for complexity, preferring simpler models that fit the data proficiently. To verify the
|
32
|
-
stationarity of the time series, the Augmented Dickey-Fuller test is executed. The AR order, BIC, and AIC findings
|
32
|
+
stationarity of the time series, the Augmented Dickey-Fuller test is executed. The AR order, BIC, and AIC findings
|
33
33
|
are compiled into a dataframe for effortless comparison. Then, the AR order with the smallest BIC is established as
|
34
34
|
the desirable order for each variable.
|
35
35
|
|
36
|
-
|
36
|
+
### Signs of High Risk
|
37
37
|
|
38
38
|
- An augmented Dickey Fuller test p-value > 0.05, indicating the time series isn't stationary, may lead to
|
39
39
|
inaccurate results.
|
40
40
|
- Problems with the model fitting procedure, such as computational or convergence issues.
|
41
|
-
- Continuous selection of the maximum specified AR order may suggest insufficient set limit.
|
41
|
+
- Continuous selection of the maximum specified AR order may suggest an insufficient set limit.
|
42
42
|
|
43
|
-
|
43
|
+
### Strengths
|
44
44
|
|
45
45
|
- The test independently pinpoints the optimal AR order, thereby reducing potential human bias.
|
46
46
|
- It strikes a balance between model simplicity and goodness-of-fit to avoid overfitting.
|
47
|
-
- Has the capability to account for stationarity in a time series, an essential aspect for dependable AR
|
48
|
-
- The results are aggregated into
|
47
|
+
- Has the capability to account for stationarity in a time series, an essential aspect for dependable AR modeling.
|
48
|
+
- The results are aggregated into a comprehensive table, enabling an easy interpretation.
|
49
49
|
|
50
|
-
|
50
|
+
### Limitations
|
51
51
|
|
52
52
|
- The tests need a stationary time series input.
|
53
53
|
- They presume a linear relationship between the series and its lags.
|
@@ -17,32 +17,39 @@ class AutoMA(Metric):
|
|
17
17
|
Automatically selects the optimal Moving Average (MA) order for each variable in a time series dataset based on
|
18
18
|
minimal BIC and AIC values.
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
20
|
+
### Purpose
|
21
|
+
|
22
|
+
The `AutoMA` metric serves an essential role of automated decision-making for selecting the optimal Moving Average
|
23
|
+
(MA) order for every variable in a given time series dataset. The selection is dependent on the minimalization of
|
24
|
+
BIC (Bayesian Information Criterion) and AIC (Akaike Information Criterion); these are established statistical
|
25
|
+
tools used for model selection. Furthermore, prior to the commencement of the model fitting process, the algorithm
|
26
|
+
conducts a stationarity test (Augmented Dickey-Fuller test) on each series.
|
27
|
+
|
28
|
+
### Test Mechanism
|
29
|
+
|
30
|
+
Starting off, the `AutoMA` algorithm checks whether the `max_ma_order` parameter has been provided. It consequently
|
31
|
+
loops through all variables in the dataset, carrying out the Dickey-Fuller test for stationarity. For each
|
32
|
+
stationary variable, it fits an ARIMA model for orders running from 0 to `max_ma_order`. The result is a list
|
33
|
+
showcasing the BIC and AIC values of the ARIMA models based on different orders. The MA order, which yields the
|
34
|
+
smallest BIC, is chosen as the 'best MA order' for every single variable. The final results include a table
|
35
|
+
summarizing the auto MA analysis and another table listing the best MA order for each variable.
|
36
|
+
|
37
|
+
### Signs of High Risk
|
38
|
+
|
34
39
|
- When a series is non-stationary (p-value>0.05 in the Dickey-Fuller test), the produced result could be inaccurate.
|
35
40
|
- Any error that arises in the process of fitting the ARIMA models, especially with a higher MA order, can
|
36
41
|
potentially indicate risks and might need further investigation.
|
37
42
|
|
38
|
-
|
43
|
+
### Strengths
|
44
|
+
|
39
45
|
- The metric facilitates automation in the process of selecting the MA order for time series forecasting. This
|
40
46
|
significantly saves time and reduces efforts conventionally necessary for manual hyperparameter tuning.
|
41
47
|
- The use of both BIC and AIC enhances the likelihood of selecting the most suitable model.
|
42
48
|
- The metric ascertains the stationarity of the series prior to model fitting, thus ensuring that the underlying
|
43
49
|
assumptions of the MA model are fulfilled.
|
44
50
|
|
45
|
-
|
51
|
+
### Limitations
|
52
|
+
|
46
53
|
- If the time series fails to be stationary, the metric may yield inaccurate results. Consequently, it necessitates
|
47
54
|
pre-processing steps to stabilize the series before fitting the ARIMA model.
|
48
55
|
- The metric adopts a rudimentary model selection process based on BIC and doesn't consider other potential model
|
@@ -17,28 +17,30 @@ class AutoSeasonality(Metric):
|
|
17
17
|
Automatically identifies and quantifies optimal seasonality in time series data to improve forecasting model
|
18
18
|
performance.
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
20
|
+
### Purpose
|
21
|
+
|
22
|
+
The AutoSeasonality test aims to automatically detect and identify the best seasonal order or period for each
|
23
|
+
variable in a time series dataset. This detection helps to quantify periodic patterns and seasonality that reoccur
|
24
|
+
at fixed intervals in the data. Understanding the seasonality component can drastically improve prediction
|
25
|
+
accuracy, which is especially significant for forecasting-based models.
|
26
|
+
|
27
|
+
### Test Mechanism
|
28
|
+
|
29
|
+
This test uses the seasonal decomposition method from the Statsmodels Python library. The function takes the
|
28
30
|
'additive' model type for each variable and applies it within the prescribed range of 'min_period' and
|
29
|
-
'max_period'.
|
30
|
-
|
31
|
-
|
32
|
-
|
31
|
+
'max_period'. It decomposes the seasonality for each period in the range and calculates the mean residual error for
|
32
|
+
each period. The seasonal period that results in the minimum residuals is marked as the 'Best Period'. The test
|
33
|
+
results include the 'Best Period', the calculated residual errors, and a determination of 'Seasonality' or 'No
|
34
|
+
Seasonality'.
|
33
35
|
|
34
|
-
|
36
|
+
### Signs of High Risk
|
35
37
|
|
36
38
|
- If the optimal seasonal period (or 'Best Period') is consistently at the maximum or minimum limit of the offered
|
37
39
|
range for a majority of variables, it may suggest that the range set does not adequately capture the true seasonal
|
38
40
|
pattern in the series.
|
39
41
|
- A high average 'Residual Error' for the selected 'Best Period' could indicate issues with the model's performance.
|
40
42
|
|
41
|
-
|
43
|
+
### Strengths
|
42
44
|
|
43
45
|
- The metric offers an automatic approach to identifying and quantifying the optimal seasonality, providing a
|
44
46
|
robust method for analyzing time series datasets.
|
@@ -46,9 +48,9 @@ class AutoSeasonality(Metric):
|
|
46
48
|
seasonality.
|
47
49
|
- The use of concrete and measurable statistical methods improves the objectivity and reproducibility of the model.
|
48
50
|
|
49
|
-
|
51
|
+
### Limitations
|
50
52
|
|
51
|
-
- This AutoSeasonality metric may not be suitable if the time series data exhibits random walk
|
53
|
+
- This AutoSeasonality metric may not be suitable if the time series data exhibits random walk behavior or lacks
|
52
54
|
clear seasonality, as the seasonal decomposition model may not be appropriate.
|
53
55
|
- The defined range for the seasonal period (min_period and max_period) can influence the outcomes. If the actual
|
54
56
|
seasonality period lies outside this range, this method will not be able to identify the true seasonal order.
|
@@ -13,26 +13,30 @@ class AutoStationarity(Metric):
|
|
13
13
|
"""
|
14
14
|
Automates Augmented Dickey-Fuller test to assess stationarity across multiple time series in a DataFrame.
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
16
|
+
### Purpose
|
17
|
+
|
18
|
+
The AutoStationarity metric is intended to automatically detect and evaluate the stationary nature of each time
|
19
|
+
series in a DataFrame. It incorporates the Augmented Dickey-Fuller (ADF) test, a statistical approach used to
|
20
|
+
assess stationarity. Stationarity is a fundamental property suggesting that statistic features like mean and
|
21
|
+
variance remain unchanged over time. This is necessary for many time-series models.
|
22
|
+
|
23
|
+
### Test Mechanism
|
24
|
+
|
25
|
+
The mechanism for the AutoStationarity test involves applying the Augmented Dicky-Fuller test to each time series
|
26
|
+
within the given dataframe to assess if they are stationary. Every series in the dataframe is looped, using the ADF
|
27
|
+
test up to a defined maximum order (configurable and by default set to 5). The p-value resulting from the ADF test
|
28
|
+
is compared against a predetermined threshold (also configurable and by default set to 0.05). The time series is
|
29
|
+
deemed stationary at its current differencing order if the p-value is less than the threshold.
|
30
|
+
|
31
|
+
### Signs of High Risk
|
32
|
+
|
29
33
|
- A significant number of series not achieving stationarity even at the maximum order of differencing can indicate
|
30
34
|
high risk or potential failure in the model.
|
31
35
|
- This could suggest the series may not be appropriately modeled by a stationary process, hence other modeling
|
32
36
|
approaches might be required.
|
33
37
|
|
38
|
+
### Strengths
|
34
39
|
|
35
|
-
**Strengths**:
|
36
40
|
- The key strength in this metric lies in the automation of the ADF test, enabling mass stationarity analysis
|
37
41
|
across various time series and boosting the efficiency and credibility of the analysis.
|
38
42
|
- The utilization of the ADF test, a widely accepted method for testing stationarity, lends authenticity to the
|
@@ -40,8 +44,9 @@ class AutoStationarity(Metric):
|
|
40
44
|
- The introduction of the max order and threshold parameters give users the autonomy to determine their preferred
|
41
45
|
levels of stringency in the tests.
|
42
46
|
|
43
|
-
|
44
|
-
|
47
|
+
### Limitations
|
48
|
+
|
49
|
+
- The Augmented Dickey-Fuller test and the stationarity test are not without their limitations. These tests are
|
45
50
|
premised on the assumption that the series can be modeled by an autoregressive process, which may not always hold
|
46
51
|
true.
|
47
52
|
- The stationarity check is highly sensitive to the choice of threshold for the significance level; an extremely
|