validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +80 -119
- validmind/ai/test_result_description/config.yaml +29 -0
- validmind/ai/test_result_description/context.py +73 -0
- validmind/ai/test_result_description/image_processing.py +124 -0
- validmind/ai/test_result_description/system.jinja +39 -0
- validmind/ai/test_result_description/user.jinja +25 -0
- validmind/api_client.py +89 -43
- validmind/client.py +2 -2
- validmind/client_config.py +11 -14
- validmind/datasets/credit_risk/__init__.py +1 -0
- validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
- validmind/datasets/credit_risk/lending_club_bias.py +142 -0
- validmind/datasets/regression/fred_timeseries.py +67 -138
- validmind/template.py +1 -0
- validmind/test_suites/__init__.py +0 -2
- validmind/test_suites/statsmodels_timeseries.py +1 -1
- validmind/test_suites/summarization.py +0 -1
- validmind/test_suites/time_series.py +0 -43
- validmind/tests/__types__.py +14 -15
- validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
- validmind/tests/data_validation/ADF.py +31 -24
- validmind/tests/data_validation/AutoAR.py +9 -9
- validmind/tests/data_validation/AutoMA.py +23 -16
- validmind/tests/data_validation/AutoSeasonality.py +18 -16
- validmind/tests/data_validation/AutoStationarity.py +21 -16
- validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
- validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +34 -34
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +85 -124
- validmind/tests/data_validation/ClassImbalance.py +15 -12
- validmind/tests/data_validation/DFGLSArch.py +19 -13
- validmind/tests/data_validation/DatasetDescription.py +17 -11
- validmind/tests/data_validation/DatasetSplit.py +7 -5
- validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
- validmind/tests/data_validation/Duplicates.py +33 -25
- validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
- validmind/tests/data_validation/HighCardinality.py +19 -12
- validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
- validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
- validmind/tests/data_validation/IQROutliersTable.py +40 -36
- validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
- validmind/tests/data_validation/JarqueBera.py +70 -0
- validmind/tests/data_validation/KPSS.py +34 -29
- validmind/tests/data_validation/LJungBox.py +66 -0
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
- validmind/tests/data_validation/MissingValues.py +32 -27
- validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
- validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
- validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
- validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
- validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
- validmind/tests/data_validation/RollingStatsPlot.py +31 -23
- validmind/tests/data_validation/RunsTest.py +72 -0
- validmind/tests/data_validation/ScatterPlot.py +63 -78
- validmind/tests/data_validation/SeasonalDecompose.py +38 -34
- validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +35 -30
- validmind/tests/data_validation/Skewness.py +35 -37
- validmind/tests/data_validation/SpreadPlot.py +35 -35
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
- validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
- validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
- validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
- validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
- validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
- validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
- validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
- validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
- validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
- validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
- validmind/tests/data_validation/TooManyZeroValues.py +16 -11
- validmind/tests/data_validation/UniqueRows.py +11 -6
- validmind/tests/data_validation/WOEBinPlots.py +23 -16
- validmind/tests/data_validation/WOEBinTable.py +35 -30
- validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
- validmind/tests/data_validation/nlp/CommonWords.py +21 -14
- validmind/tests/data_validation/nlp/Hashtags.py +42 -40
- validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
- validmind/tests/data_validation/nlp/Mentions.py +21 -15
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
- validmind/tests/data_validation/nlp/Punctuations.py +24 -20
- validmind/tests/data_validation/nlp/Sentiment.py +27 -8
- validmind/tests/data_validation/nlp/StopWords.py +26 -19
- validmind/tests/data_validation/nlp/TextDescription.py +39 -36
- validmind/tests/data_validation/nlp/Toxicity.py +32 -9
- validmind/tests/decorator.py +81 -42
- validmind/tests/model_validation/BertScore.py +36 -27
- validmind/tests/model_validation/BleuScore.py +25 -19
- validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
- validmind/tests/model_validation/ContextualRecall.py +38 -13
- validmind/tests/model_validation/FeaturesAUC.py +32 -13
- validmind/tests/model_validation/MeteorScore.py +46 -33
- validmind/tests/model_validation/ModelMetadata.py +32 -64
- validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
- validmind/tests/model_validation/RegardScore.py +30 -14
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
- validmind/tests/model_validation/RougeScore.py +36 -30
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
- validmind/tests/model_validation/TokenDisparity.py +31 -23
- validmind/tests/model_validation/ToxicityScore.py +26 -17
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
- validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
- validmind/tests/model_validation/ragas/AspectCritique.py +12 -6
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
- validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
- validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
- validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
- validmind/tests/model_validation/ragas/utils.py +6 -0
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
- validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
- validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +32 -26
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
- validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
- validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
- validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -94
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +66 -5
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
- validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +59 -32
- validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
- validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +86 -119
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
- validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
- validmind/tests/prompt_validation/Bias.py +14 -11
- validmind/tests/prompt_validation/Clarity.py +16 -14
- validmind/tests/prompt_validation/Conciseness.py +7 -5
- validmind/tests/prompt_validation/Delimitation.py +23 -22
- validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
- validmind/tests/prompt_validation/Robustness.py +12 -10
- validmind/tests/prompt_validation/Specificity.py +13 -11
- validmind/tests/prompt_validation/ai_powered_test.py +6 -0
- validmind/tests/run.py +68 -23
- validmind/unit_metrics/__init__.py +81 -144
- validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
- validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
- validmind/unit_metrics/regression/HuberLoss.py +1 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
- validmind/utils.py +4 -0
- validmind/vm_models/dataset/dataset.py +2 -0
- validmind/vm_models/figure.py +5 -0
- validmind/vm_models/test/metric.py +1 -0
- validmind/vm_models/test/result_wrapper.py +143 -158
- validmind/vm_models/test/threshold_test.py +1 -0
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/METADATA +4 -3
- validmind-2.5.18.dist-info/RECORD +324 -0
- validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
- validmind/tests/data_validation/BivariateHistograms.py +0 -117
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
- validmind/tests/data_validation/MissingValuesRisk.py +0 -88
- validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
- validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -73
- validmind/tests/model_validation/statsmodels/LJungBox.py +0 -66
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
- validmind/tests/model_validation/statsmodels/RunsTest.py +0 -71
- validmind-2.5.8.dist-info/RECORD +0 -318
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/LICENSE +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/WHEEL +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/entry_points.txt +0 -0
@@ -68,65 +68,56 @@ def convert_to_levels(diff_df, original_df, target_column):
|
|
68
68
|
return levels_df
|
69
69
|
|
70
70
|
|
71
|
-
def get_demo_test_config(
|
71
|
+
def get_demo_test_config():
|
72
72
|
|
73
73
|
default_config = {}
|
74
74
|
|
75
75
|
default_config["validmind.data_validation.TimeSeriesDescription"] = {
|
76
|
-
"
|
77
|
-
"dataset":
|
76
|
+
"input_grid": {
|
77
|
+
"dataset": [
|
78
|
+
"raw_ds",
|
79
|
+
"train_diff_ds",
|
80
|
+
"test_diff_ds",
|
81
|
+
"train_ds",
|
82
|
+
"test_ds",
|
83
|
+
],
|
78
84
|
}
|
79
85
|
}
|
80
86
|
default_config["validmind.data_validation.TimeSeriesLinePlot"] = {
|
81
|
-
"
|
82
|
-
"dataset": "raw_ds",
|
87
|
+
"input_grid": {
|
88
|
+
"dataset": ["raw_ds"],
|
83
89
|
}
|
84
90
|
}
|
85
91
|
default_config["validmind.data_validation.TimeSeriesMissingValues"] = {
|
86
|
-
"
|
87
|
-
"dataset":
|
92
|
+
"input_grid": {
|
93
|
+
"dataset": [
|
94
|
+
"raw_ds",
|
95
|
+
"train_diff_ds",
|
96
|
+
"test_diff_ds",
|
97
|
+
"train_ds",
|
98
|
+
"test_ds",
|
99
|
+
],
|
88
100
|
}
|
89
101
|
}
|
90
102
|
default_config["validmind.data_validation.SeasonalDecompose"] = {
|
91
|
-
"
|
92
|
-
"dataset": "raw_ds",
|
103
|
+
"input_grid": {
|
104
|
+
"dataset": ["raw_ds"],
|
93
105
|
}
|
94
106
|
}
|
95
|
-
default_config[
|
96
|
-
"
|
97
|
-
|
98
|
-
"inputs": {
|
99
|
-
"dataset": "train_diff_ds",
|
107
|
+
default_config["validmind.data_validation.TimeSeriesDescriptiveStatistics"] = {
|
108
|
+
"input_grid": {
|
109
|
+
"dataset": ["train_diff_ds", "test_diff_ds"],
|
100
110
|
}
|
101
111
|
}
|
102
|
-
default_config[
|
103
|
-
"
|
104
|
-
|
105
|
-
"inputs": {
|
106
|
-
"dataset": "test_diff_ds",
|
107
|
-
}
|
108
|
-
}
|
109
|
-
default_config["validmind.data_validation.TimeSeriesOutliers:train_diff_data"] = {
|
110
|
-
"inputs": {
|
111
|
-
"dataset": "train_diff_ds",
|
112
|
+
default_config["validmind.data_validation.TimeSeriesOutliers"] = {
|
113
|
+
"input_grid": {
|
114
|
+
"dataset": ["train_diff_ds", "test_diff_ds"],
|
112
115
|
},
|
113
116
|
"params": {"zscore_threshold": 4},
|
114
117
|
}
|
115
|
-
default_config["validmind.data_validation.
|
116
|
-
"
|
117
|
-
"dataset": "test_diff_ds",
|
118
|
-
},
|
119
|
-
"params": {"zscore_threshold": 4},
|
120
|
-
}
|
121
|
-
default_config["validmind.data_validation.TimeSeriesHistogram:train_diff_data"] = {
|
122
|
-
"inputs": {
|
123
|
-
"dataset": "train_diff_ds",
|
124
|
-
},
|
125
|
-
"params": {"nbins": 100},
|
126
|
-
}
|
127
|
-
default_config["validmind.data_validation.TimeSeriesHistogram:test_diff_data"] = {
|
128
|
-
"inputs": {
|
129
|
-
"dataset": "test_diff_ds",
|
118
|
+
default_config["validmind.data_validation.TimeSeriesHistogram"] = {
|
119
|
+
"input_grid": {
|
120
|
+
"dataset": ["train_diff_ds", "test_diff_ds"],
|
130
121
|
},
|
131
122
|
"params": {"nbins": 100},
|
132
123
|
}
|
@@ -135,57 +126,37 @@ def get_demo_test_config(test_suite=None):
|
|
135
126
|
"datasets": ["train_diff_ds", "test_diff_ds"],
|
136
127
|
}
|
137
128
|
}
|
138
|
-
default_config["validmind.model_validation.
|
139
|
-
"
|
140
|
-
"
|
141
|
-
}
|
142
|
-
}
|
143
|
-
default_config[
|
144
|
-
"validmind.model_validation.sklearn.RegressionErrorsComparison:train_data"
|
145
|
-
] = {
|
146
|
-
"inputs": {
|
147
|
-
"datasets": ["train_ds", "train_ds"],
|
148
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
129
|
+
default_config["validmind.model_validation.ModelMetadata"] = {
|
130
|
+
"input_grid": {
|
131
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
149
132
|
}
|
150
133
|
}
|
151
|
-
default_config[
|
152
|
-
"
|
153
|
-
|
154
|
-
|
155
|
-
"datasets": ["test_ds", "test_ds"],
|
156
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
134
|
+
default_config["validmind.model_validation.sklearn.RegressionErrors"] = {
|
135
|
+
"input_grid": {
|
136
|
+
"dataset": ["train_ds", "test_ds"],
|
137
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
157
138
|
}
|
158
139
|
}
|
159
|
-
default_config[
|
160
|
-
"
|
161
|
-
|
162
|
-
|
163
|
-
"datasets": ["train_ds", "train_ds"],
|
164
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
165
|
-
}
|
166
|
-
}
|
167
|
-
default_config[
|
168
|
-
"validmind.model_validation.sklearn.RegressionR2SquareComparison:test_data"
|
169
|
-
] = {
|
170
|
-
"inputs": {
|
171
|
-
"datasets": ["test_ds", "test_ds"],
|
172
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
140
|
+
default_config["validmind.model_validation.sklearn.RegressionR2Square"] = {
|
141
|
+
"input_grid": {
|
142
|
+
"dataset": ["train_ds", "test_ds"],
|
143
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
173
144
|
}
|
174
145
|
}
|
175
146
|
default_config[
|
176
147
|
"validmind.model_validation.TimeSeriesR2SquareBySegments:train_data"
|
177
148
|
] = {
|
178
|
-
"
|
179
|
-
"
|
180
|
-
"
|
149
|
+
"input_grid": {
|
150
|
+
"dataset": ["train_ds"],
|
151
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
181
152
|
}
|
182
153
|
}
|
183
154
|
default_config[
|
184
155
|
"validmind.model_validation.TimeSeriesR2SquareBySegments:test_data"
|
185
156
|
] = {
|
186
|
-
"
|
187
|
-
"
|
188
|
-
"
|
157
|
+
"input_grid": {
|
158
|
+
"dataset": ["test_ds"],
|
159
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
189
160
|
},
|
190
161
|
"params": {
|
191
162
|
"segments": {
|
@@ -194,78 +165,36 @@ def get_demo_test_config(test_suite=None):
|
|
194
165
|
}
|
195
166
|
},
|
196
167
|
}
|
197
|
-
default_config[
|
198
|
-
"
|
199
|
-
|
200
|
-
|
201
|
-
"datasets": ["train_ds", "train_ds"],
|
202
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
203
|
-
}
|
204
|
-
}
|
205
|
-
default_config["validmind.model_validation.TimeSeriesPredictionsPlot:test_data"] = {
|
206
|
-
"inputs": {
|
207
|
-
"datasets": ["test_ds", "test_ds"],
|
208
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
209
|
-
}
|
210
|
-
}
|
211
|
-
default_config[
|
212
|
-
"validmind.model_validation.TimeSeriesPredictionWithCI:random_forests_model"
|
213
|
-
] = {
|
214
|
-
"inputs": {
|
215
|
-
"dataset": "test_ds",
|
216
|
-
"model": "random_forests_model",
|
217
|
-
}
|
218
|
-
}
|
219
|
-
default_config[
|
220
|
-
"validmind.model_validation.TimeSeriesPredictionWithCI:gradient_boosting_model"
|
221
|
-
] = {
|
222
|
-
"inputs": {
|
223
|
-
"dataset": "test_ds",
|
224
|
-
"model": "gradient_boosting_model",
|
225
|
-
}
|
226
|
-
}
|
227
|
-
default_config["validmind.model_validation.ModelPredictionResiduals:train_data"] = {
|
228
|
-
"inputs": {
|
229
|
-
"datasets": ["train_ds", "train_ds"],
|
230
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
168
|
+
default_config["validmind.model_validation.TimeSeriesPredictionsPlot"] = {
|
169
|
+
"input_grid": {
|
170
|
+
"dataset": ["train_ds", "test_ds"],
|
171
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
231
172
|
}
|
232
173
|
}
|
233
|
-
default_config["validmind.model_validation.
|
234
|
-
"
|
235
|
-
"
|
236
|
-
"
|
237
|
-
}
|
238
|
-
}
|
239
|
-
default_config[
|
240
|
-
"validmind.model_validation.sklearn.FeatureImportanceComparison:train_data"
|
241
|
-
] = {
|
242
|
-
"inputs": {
|
243
|
-
"datasets": ["train_ds", "train_ds"],
|
244
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
174
|
+
default_config["validmind.model_validation.TimeSeriesPredictionWithCI"] = {
|
175
|
+
"input_grid": {
|
176
|
+
"dataset": ["train_ds", "test_ds"],
|
177
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
245
178
|
}
|
246
179
|
}
|
247
|
-
default_config[
|
248
|
-
"
|
249
|
-
|
250
|
-
|
251
|
-
"datasets": ["test_ds", "test_ds"],
|
252
|
-
"models": ["random_forests_model", "gradient_boosting_model"],
|
180
|
+
default_config["validmind.model_validation.ModelPredictionResiduals"] = {
|
181
|
+
"input_grid": {
|
182
|
+
"dataset": ["train_ds", "test_ds"],
|
183
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
253
184
|
}
|
254
185
|
}
|
255
|
-
default_config[
|
256
|
-
"
|
257
|
-
|
258
|
-
|
259
|
-
"dataset": "test_ds",
|
260
|
-
"model": "random_forests_model",
|
186
|
+
default_config["validmind.model_validation.sklearn.FeatureImportance"] = {
|
187
|
+
"input_grid": {
|
188
|
+
"dataset": ["train_ds", "test_ds"],
|
189
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
261
190
|
}
|
262
191
|
}
|
263
192
|
default_config[
|
264
|
-
"validmind.model_validation.sklearn.PermutationFeatureImportance
|
193
|
+
"validmind.model_validation.sklearn.PermutationFeatureImportance"
|
265
194
|
] = {
|
266
|
-
"
|
267
|
-
"dataset": "test_ds",
|
268
|
-
"model": "gradient_boosting_model",
|
195
|
+
"input_grid": {
|
196
|
+
"dataset": ["train_ds", "test_ds"],
|
197
|
+
"model": ["random_forests_model", "gradient_boosting_model"],
|
269
198
|
}
|
270
199
|
}
|
271
200
|
|
validmind/template.py
CHANGED
@@ -42,7 +42,6 @@ from .time_series import (
|
|
42
42
|
TimeSeriesDataset,
|
43
43
|
TimeSeriesModelValidation,
|
44
44
|
TimeSeriesMultivariate,
|
45
|
-
TimeSeriesSensitivity,
|
46
45
|
TimeSeriesUnivariate,
|
47
46
|
)
|
48
47
|
|
@@ -78,7 +77,6 @@ core_test_suites = {
|
|
78
77
|
TimeSeriesDataset.suite_id: TimeSeriesDataset,
|
79
78
|
TimeSeriesModelValidation.suite_id: TimeSeriesModelValidation,
|
80
79
|
TimeSeriesMultivariate.suite_id: TimeSeriesMultivariate,
|
81
|
-
TimeSeriesSensitivity.suite_id: TimeSeriesSensitivity,
|
82
80
|
TimeSeriesUnivariate.suite_id: TimeSeriesUnivariate,
|
83
81
|
}
|
84
82
|
|
@@ -28,6 +28,6 @@ class RegressionModelsEvaluation(TestSuite):
|
|
28
28
|
|
29
29
|
suite_id = "regression_models_evaluation"
|
30
30
|
tests = [
|
31
|
-
"validmind.model_validation.statsmodels.
|
31
|
+
"validmind.model_validation.statsmodels.RegressionModelCoeffs",
|
32
32
|
"validmind.model_validation.sklearn.RegressionModelsPerformanceComparison",
|
33
33
|
]
|
@@ -16,7 +16,6 @@ class SummarizationMetrics(TestSuite):
|
|
16
16
|
|
17
17
|
suite_id = "summarization_metrics"
|
18
18
|
tests = [
|
19
|
-
"validmind.model_validation.RougeMetrics",
|
20
19
|
"validmind.model_validation.TokenDisparity",
|
21
20
|
"validmind.model_validation.BleuScore",
|
22
21
|
"validmind.model_validation.BertScore",
|
@@ -77,39 +77,6 @@ class TimeSeriesMultivariate(TestSuite):
|
|
77
77
|
]
|
78
78
|
|
79
79
|
|
80
|
-
class TimeSeriesForecast(TestSuite):
|
81
|
-
"""
|
82
|
-
This test suite computes predictions from statsmodels OLS linear regression models
|
83
|
-
against a list of models and plots the historical data alongside the forecasted data.
|
84
|
-
The purpose of this test suite is to evaluate the performance of each model in predicting
|
85
|
-
future values of a time series based on historical data. By comparing the historical
|
86
|
-
values with the forecasted values, users can visually assess the accuracy of each model
|
87
|
-
and determine which one best fits the data. In addition, this test suite can help users
|
88
|
-
identify any discrepancies between the models and the actual data, allowing for potential
|
89
|
-
improvements in model selection and parameter tuning.
|
90
|
-
"""
|
91
|
-
|
92
|
-
suite_id = "time_series_forecast"
|
93
|
-
tests = ["validmind.model_validation.statsmodels.RegressionModelForecastPlotLevels"]
|
94
|
-
|
95
|
-
|
96
|
-
class TimeSeriesSensitivity(TestSuite):
|
97
|
-
"""
|
98
|
-
This test suite performs sensitivity analysis on a statsmodels OLS linear regression model
|
99
|
-
by applying distinct shocks to each input variable individually and then computing the
|
100
|
-
model's predictions. The aim of this test suite is to investigate the model's responsiveness
|
101
|
-
to variations in its inputs. By juxtaposing the model's predictions under baseline and shocked
|
102
|
-
conditions, users can visually evaluate the sensitivity of the model to changes in each
|
103
|
-
variable. This kind of analysis can also shed light on potential model limitations, including
|
104
|
-
over-reliance on specific variables or insufficient responsiveness to changes in inputs. As a
|
105
|
-
result, this test suite can provide insights that may be beneficial for refining the model
|
106
|
-
structure, improving its robustness, and ensuring a more reliable prediction performance.
|
107
|
-
"""
|
108
|
-
|
109
|
-
suite_id = "time_series_sensitivity"
|
110
|
-
tests = ["validmind.model_validation.statsmodels.RegressionModelSensitivityPlot"]
|
111
|
-
|
112
|
-
|
113
80
|
class TimeSeriesDataset(TestSuite):
|
114
81
|
"""
|
115
82
|
Test suite for time series datasets.
|
@@ -152,14 +119,4 @@ class TimeSeriesModelValidation(TestSuite):
|
|
152
119
|
"section_description": RegressionModelsEvaluation.__doc__,
|
153
120
|
"section_tests": RegressionModelsEvaluation.tests,
|
154
121
|
},
|
155
|
-
{
|
156
|
-
"section_id": TimeSeriesForecast.suite_id,
|
157
|
-
"section_description": TimeSeriesForecast.__doc__,
|
158
|
-
"section_tests": TimeSeriesForecast.tests,
|
159
|
-
},
|
160
|
-
{
|
161
|
-
"section_id": TimeSeriesSensitivity.suite_id,
|
162
|
-
"section_description": TimeSeriesSensitivity.__doc__,
|
163
|
-
"section_tests": TimeSeriesSensitivity.tests,
|
164
|
-
},
|
165
122
|
]
|
validmind/tests/__types__.py
CHANGED
@@ -33,7 +33,6 @@ TestID = Literal[
|
|
33
33
|
"validmind.model_validation.ClusterSizeDistribution",
|
34
34
|
"validmind.model_validation.TokenDisparity",
|
35
35
|
"validmind.model_validation.ToxicityScore",
|
36
|
-
"validmind.model_validation.ModelMetadataComparison",
|
37
36
|
"validmind.model_validation.TimeSeriesR2SquareBySegments",
|
38
37
|
"validmind.model_validation.embeddings.CosineSimilarityComparison",
|
39
38
|
"validmind.model_validation.embeddings.EmbeddingsVisualization2D",
|
@@ -53,12 +52,13 @@ TestID = Literal[
|
|
53
52
|
"validmind.model_validation.ragas.ContextEntityRecall",
|
54
53
|
"validmind.model_validation.ragas.Faithfulness",
|
55
54
|
"validmind.model_validation.ragas.AspectCritique",
|
55
|
+
"validmind.model_validation.ragas.NoiseSensitivity",
|
56
56
|
"validmind.model_validation.ragas.AnswerSimilarity",
|
57
57
|
"validmind.model_validation.ragas.AnswerCorrectness",
|
58
58
|
"validmind.model_validation.ragas.ContextRecall",
|
59
59
|
"validmind.model_validation.ragas.ContextPrecision",
|
60
60
|
"validmind.model_validation.ragas.AnswerRelevance",
|
61
|
-
"validmind.model_validation.
|
61
|
+
"validmind.model_validation.ragas.ContextUtilization",
|
62
62
|
"validmind.model_validation.sklearn.AdjustedMutualInformation",
|
63
63
|
"validmind.model_validation.sklearn.SilhouettePlot",
|
64
64
|
"validmind.model_validation.sklearn.RobustnessDiagnosis",
|
@@ -77,13 +77,14 @@ TestID = Literal[
|
|
77
77
|
"validmind.model_validation.sklearn.ClassifierPerformance",
|
78
78
|
"validmind.model_validation.sklearn.VMeasure",
|
79
79
|
"validmind.model_validation.sklearn.MinimumF1Score",
|
80
|
+
"validmind.model_validation.sklearn.RegressionPerformance",
|
80
81
|
"validmind.model_validation.sklearn.ROCCurve",
|
81
82
|
"validmind.model_validation.sklearn.RegressionR2Square",
|
82
83
|
"validmind.model_validation.sklearn.RegressionErrors",
|
83
84
|
"validmind.model_validation.sklearn.ClusterPerformance",
|
84
|
-
"validmind.model_validation.sklearn.FeatureImportanceComparison",
|
85
85
|
"validmind.model_validation.sklearn.TrainingTestDegradation",
|
86
86
|
"validmind.model_validation.sklearn.RegressionErrorsComparison",
|
87
|
+
"validmind.model_validation.sklearn.FeatureImportance",
|
87
88
|
"validmind.model_validation.sklearn.HyperParametersTuning",
|
88
89
|
"validmind.model_validation.sklearn.KMeansClustersOptimization",
|
89
90
|
"validmind.model_validation.sklearn.ModelsPerformanceComparison",
|
@@ -91,21 +92,15 @@ TestID = Literal[
|
|
91
92
|
"validmind.model_validation.sklearn.RegressionR2SquareComparison",
|
92
93
|
"validmind.model_validation.sklearn.PopulationStabilityIndex",
|
93
94
|
"validmind.model_validation.sklearn.MinimumAccuracy",
|
94
|
-
"validmind.model_validation.statsmodels.RegressionModelsCoeffs",
|
95
|
-
"validmind.model_validation.statsmodels.BoxPierce",
|
96
|
-
"validmind.model_validation.statsmodels.RegressionCoeffsPlot",
|
97
95
|
"validmind.model_validation.statsmodels.RegressionModelSensitivityPlot",
|
98
96
|
"validmind.model_validation.statsmodels.RegressionModelForecastPlotLevels",
|
99
97
|
"validmind.model_validation.statsmodels.ScorecardHistogram",
|
100
|
-
"validmind.model_validation.statsmodels.LJungBox",
|
101
|
-
"validmind.model_validation.statsmodels.JarqueBera",
|
102
98
|
"validmind.model_validation.statsmodels.KolmogorovSmirnov",
|
103
|
-
"validmind.model_validation.statsmodels.ShapiroWilk",
|
104
99
|
"validmind.model_validation.statsmodels.CumulativePredictionProbabilities",
|
105
100
|
"validmind.model_validation.statsmodels.RegressionFeatureSignificance",
|
106
101
|
"validmind.model_validation.statsmodels.RegressionModelSummary",
|
102
|
+
"validmind.model_validation.statsmodels.RegressionCoeffs",
|
107
103
|
"validmind.model_validation.statsmodels.Lilliefors",
|
108
|
-
"validmind.model_validation.statsmodels.RunsTest",
|
109
104
|
"validmind.model_validation.statsmodels.RegressionPermutationFeatureImportance",
|
110
105
|
"validmind.model_validation.statsmodels.PredictionProbabilitiesHistogram",
|
111
106
|
"validmind.model_validation.statsmodels.AutoARIMA",
|
@@ -116,13 +111,13 @@ TestID = Literal[
|
|
116
111
|
"validmind.ongoing_monitoring.PredictionAcrossEachFeature",
|
117
112
|
"validmind.ongoing_monitoring.FeatureDrift",
|
118
113
|
"validmind.ongoing_monitoring.TargetPredictionDistributionPlot",
|
119
|
-
"validmind.data_validation.MissingValuesRisk",
|
120
114
|
"validmind.data_validation.IQROutliersTable",
|
121
|
-
"validmind.data_validation.BivariateFeaturesBarPlots",
|
122
115
|
"validmind.data_validation.Skewness",
|
116
|
+
"validmind.data_validation.BoxPierce",
|
123
117
|
"validmind.data_validation.Duplicates",
|
124
118
|
"validmind.data_validation.MissingValuesBarPlot",
|
125
119
|
"validmind.data_validation.DatasetDescription",
|
120
|
+
"validmind.data_validation.ProtectedClassesCombination",
|
126
121
|
"validmind.data_validation.ZivotAndrewsArch",
|
127
122
|
"validmind.data_validation.ScatterPlot",
|
128
123
|
"validmind.data_validation.TimeSeriesOutliers",
|
@@ -130,8 +125,9 @@ TestID = Literal[
|
|
130
125
|
"validmind.data_validation.AutoStationarity",
|
131
126
|
"validmind.data_validation.DescriptiveStatistics",
|
132
127
|
"validmind.data_validation.TimeSeriesDescription",
|
133
|
-
"validmind.data_validation.
|
128
|
+
"validmind.data_validation.LJungBox",
|
134
129
|
"validmind.data_validation.TargetRateBarPlots",
|
130
|
+
"validmind.data_validation.JarqueBera",
|
135
131
|
"validmind.data_validation.PearsonCorrelationMatrix",
|
136
132
|
"validmind.data_validation.FeatureTargetCorrelationPlot",
|
137
133
|
"validmind.data_validation.TabularNumericalHistograms",
|
@@ -141,25 +137,27 @@ TestID = Literal[
|
|
141
137
|
"validmind.data_validation.MissingValues",
|
142
138
|
"validmind.data_validation.PhillipsPerronArch",
|
143
139
|
"validmind.data_validation.RollingStatsPlot",
|
140
|
+
"validmind.data_validation.ProtectedClassesDisparity",
|
144
141
|
"validmind.data_validation.TabularDescriptionTables",
|
145
142
|
"validmind.data_validation.AutoMA",
|
146
143
|
"validmind.data_validation.UniqueRows",
|
144
|
+
"validmind.data_validation.ShapiroWilk",
|
147
145
|
"validmind.data_validation.TooManyZeroValues",
|
148
146
|
"validmind.data_validation.HighPearsonCorrelation",
|
149
147
|
"validmind.data_validation.ACFandPACFPlot",
|
150
|
-
"validmind.data_validation.BivariateHistograms",
|
151
148
|
"validmind.data_validation.WOEBinTable",
|
152
|
-
"validmind.data_validation.HeatmapFeatureCorrelations",
|
153
149
|
"validmind.data_validation.TimeSeriesFrequency",
|
154
150
|
"validmind.data_validation.DatasetSplit",
|
155
151
|
"validmind.data_validation.SpreadPlot",
|
156
152
|
"validmind.data_validation.TimeSeriesLinePlot",
|
157
153
|
"validmind.data_validation.KPSS",
|
158
154
|
"validmind.data_validation.AutoSeasonality",
|
155
|
+
"validmind.data_validation.ProtectedClassesDescription",
|
159
156
|
"validmind.data_validation.BivariateScatterPlots",
|
160
157
|
"validmind.data_validation.EngleGrangerCoint",
|
161
158
|
"validmind.data_validation.TimeSeriesMissingValues",
|
162
159
|
"validmind.data_validation.TimeSeriesHistogram",
|
160
|
+
"validmind.data_validation.RunsTest",
|
163
161
|
"validmind.data_validation.LaggedCorrelationHeatmap",
|
164
162
|
"validmind.data_validation.SeasonalDecompose",
|
165
163
|
"validmind.data_validation.WOEBinPlots",
|
@@ -169,6 +167,7 @@ TestID = Literal[
|
|
169
167
|
"validmind.data_validation.TimeSeriesDescriptiveStatistics",
|
170
168
|
"validmind.data_validation.AutoAR",
|
171
169
|
"validmind.data_validation.TabularDateTimeHistograms",
|
170
|
+
"validmind.data_validation.ProtectedClassesThresholdOptimizer",
|
172
171
|
"validmind.data_validation.ADF",
|
173
172
|
"validmind.data_validation.nlp.Toxicity",
|
174
173
|
"validmind.data_validation.nlp.PolarityAndSubjectivity",
|
@@ -14,25 +14,27 @@ class ACFandPACFPlot(Metric):
|
|
14
14
|
Analyzes time series data using Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) plots to
|
15
15
|
reveal trends and correlations.
|
16
16
|
|
17
|
-
|
18
|
-
to analyze time series data in machine learning models. It illuminates the correlation of the data over time by
|
19
|
-
plotting the correlation of the series with its own lags (ACF), and the correlations after removing effects already
|
20
|
-
accounted for by earlier lags (PACF). This information can identify trends, such as seasonality, degrees of
|
21
|
-
autocorrelation, and inform the selection of order parameters for AutoRegressive Integrated Moving Average (ARIMA)
|
22
|
-
models.
|
17
|
+
### Purpose
|
23
18
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
19
|
+
The ACF (Autocorrelation Function) and PACF (Partial Autocorrelation Function) plot test is employed to analyze
|
20
|
+
time series data in machine learning models. It illuminates the correlation of the data over time by plotting the
|
21
|
+
correlation of the series with its own lags (ACF), and the correlations after removing effects already accounted
|
22
|
+
for by earlier lags (PACF). This information can identify trends, such as seasonality, degrees of autocorrelation,
|
23
|
+
and inform the selection of order parameters for AutoRegressive Integrated Moving Average (ARIMA) models.
|
28
24
|
|
29
|
-
|
25
|
+
### Test Mechanism
|
26
|
+
|
27
|
+
The `ACFandPACFPlot` test accepts a dataset with a time-based index. It first confirms the index is of a datetime
|
28
|
+
type, then handles any NaN values. The test subsequently generates ACF and PACF plots for each column in the
|
29
|
+
dataset, producing a subplot for each. If the dataset doesn't include key columns, an error is returned.
|
30
|
+
|
31
|
+
### Signs of High Risk
|
30
32
|
|
31
33
|
- Sudden drops in the correlation at a specific lag might signal a model at high risk.
|
32
34
|
- Consistent high correlation across multiple lags could also indicate non-stationarity in the data, which may
|
33
35
|
suggest that a model estimated on this data won't generalize well to future, unknown data.
|
34
36
|
|
35
|
-
|
37
|
+
### Strengths
|
36
38
|
|
37
39
|
- ACF and PACF plots offer clear graphical representations of the correlations in time series data.
|
38
40
|
- These plots are effective at revealing important data characteristics such as seasonality, trends, and
|
@@ -40,7 +42,7 @@ class ACFandPACFPlot(Metric):
|
|
40
42
|
- The insights from these plots aid in better model configuration, particularly in the selection of ARIMA model
|
41
43
|
parameters.
|
42
44
|
|
43
|
-
|
45
|
+
### Limitations
|
44
46
|
|
45
47
|
- ACF and PACF plots are exclusively for time series data and hence, can't be applied to all ML models.
|
46
48
|
- These plots require large, consistent datasets as gaps could lead to misleading results.
|
@@ -18,31 +18,38 @@ class ADF(Metric):
|
|
18
18
|
"""
|
19
19
|
Assesses the stationarity of a time series dataset using the Augmented Dickey-Fuller (ADF) test.
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
-
|
37
|
-
|
38
|
-
-
|
39
|
-
|
40
|
-
|
41
|
-
|
21
|
+
### Purpose
|
22
|
+
|
23
|
+
The Augmented Dickey-Fuller (ADF) test metric is used to determine the order of integration, i.e., the stationarity
|
24
|
+
of a given time series dataset. The stationary property of data is pivotal in many machine learning models as it
|
25
|
+
impacts the reliability and effectiveness of predictions and forecasts.
|
26
|
+
|
27
|
+
### Test Mechanism
|
28
|
+
|
29
|
+
The ADF test is executed using the `adfuller` function from the `statsmodels` library on each feature of the
|
30
|
+
dataset. Multiple outputs are generated for each run, including the ADF test statistic and p-value, count of lags
|
31
|
+
used, the number of observations considered in the test, critical values at various confidence levels, and the
|
32
|
+
information criterion. These results are stored for each feature for subsequent analysis.
|
33
|
+
|
34
|
+
### Signs of High Risk
|
35
|
+
|
36
|
+
- An inflated ADF statistic and high p-value (generally above 0.05) indicate a high risk to the model's performance
|
37
|
+
due to the presence of a unit root indicating non-stationarity.
|
38
|
+
- Non-stationarity might result in untrustworthy or insufficient forecasts.
|
39
|
+
|
40
|
+
### Strengths
|
41
|
+
|
42
|
+
- The ADF test is robust to sophisticated correlations within the data, making it suitable for settings where data
|
43
|
+
displays complex stochastic behavior.
|
44
|
+
- It provides explicit outputs like test statistics, critical values, and information criterion, enhancing
|
45
|
+
understanding and transparency in the model validation process.
|
46
|
+
|
47
|
+
### Limitations
|
48
|
+
|
42
49
|
- The ADF test might demonstrate low statistical power, making it challenging to differentiate between a unit root
|
43
|
-
and near-unit-root processes causing false negatives.
|
44
|
-
-
|
45
|
-
- The
|
50
|
+
and near-unit-root processes, potentially causing false negatives.
|
51
|
+
- It assumes the data follows an autoregressive process, which might not always be the case.
|
52
|
+
- The test struggles with time series data that have structural breaks.
|
46
53
|
"""
|
47
54
|
|
48
55
|
name = "adf"
|