validmind 2.5.24__py3-none-any.whl → 2.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -17
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +66 -85
- validmind/ai/test_result_description/context.py +2 -2
- validmind/ai/utils.py +26 -1
- validmind/api_client.py +43 -79
- validmind/client.py +5 -7
- validmind/client_config.py +1 -1
- validmind/datasets/__init__.py +1 -1
- validmind/datasets/classification/customer_churn.py +7 -5
- validmind/datasets/nlp/__init__.py +2 -2
- validmind/errors.py +6 -10
- validmind/html_templates/content_blocks.py +18 -16
- validmind/logging.py +21 -16
- validmind/tests/__init__.py +28 -5
- validmind/tests/__types__.py +186 -170
- validmind/tests/_store.py +7 -21
- validmind/tests/comparison.py +362 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
- validmind/tests/data_validation/ADF.py +49 -83
- validmind/tests/data_validation/AutoAR.py +59 -96
- validmind/tests/data_validation/AutoMA.py +59 -96
- validmind/tests/data_validation/AutoStationarity.py +66 -114
- validmind/tests/data_validation/ClassImbalance.py +48 -117
- validmind/tests/data_validation/DatasetDescription.py +180 -209
- validmind/tests/data_validation/DatasetSplit.py +50 -75
- validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
- validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
- validmind/tests/data_validation/Duplicates.py +21 -90
- validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
- validmind/tests/data_validation/HighCardinality.py +32 -80
- validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
- validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
- validmind/tests/data_validation/IQROutliersTable.py +40 -80
- validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
- validmind/tests/data_validation/KPSS.py +33 -81
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
- validmind/tests/data_validation/MissingValues.py +17 -58
- validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
- validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
- validmind/tests/data_validation/RollingStatsPlot.py +50 -81
- validmind/tests/data_validation/SeasonalDecompose.py +102 -184
- validmind/tests/data_validation/Skewness.py +27 -64
- validmind/tests/data_validation/SpreadPlot.py +34 -57
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
- validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
- validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
- validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
- validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
- validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
- validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
- validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
- validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
- validmind/tests/data_validation/TooManyZeroValues.py +21 -70
- validmind/tests/data_validation/UniqueRows.py +23 -62
- validmind/tests/data_validation/WOEBinPlots.py +83 -109
- validmind/tests/data_validation/WOEBinTable.py +28 -69
- validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
- validmind/tests/data_validation/nlp/CommonWords.py +49 -57
- validmind/tests/data_validation/nlp/Hashtags.py +27 -49
- validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
- validmind/tests/data_validation/nlp/Mentions.py +32 -63
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
- validmind/tests/data_validation/nlp/Punctuations.py +63 -47
- validmind/tests/data_validation/nlp/Sentiment.py +4 -0
- validmind/tests/data_validation/nlp/StopWords.py +62 -91
- validmind/tests/data_validation/nlp/TextDescription.py +116 -159
- validmind/tests/data_validation/nlp/Toxicity.py +12 -4
- validmind/tests/decorator.py +33 -242
- validmind/tests/load.py +212 -153
- validmind/tests/model_validation/BertScore.py +13 -7
- validmind/tests/model_validation/BleuScore.py +4 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/FeaturesAUC.py +43 -74
- validmind/tests/model_validation/MeteorScore.py +3 -0
- validmind/tests/model_validation/RegardScore.py +5 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
- validmind/tests/model_validation/embeddings/utils.py +53 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
- validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
- validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
- validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
- validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
- validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
- validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
- validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
- validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
- validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
- validmind/tests/output.py +120 -0
- validmind/tests/prompt_validation/Bias.py +55 -98
- validmind/tests/prompt_validation/Clarity.py +56 -99
- validmind/tests/prompt_validation/Conciseness.py +63 -101
- validmind/tests/prompt_validation/Delimitation.py +48 -89
- validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
- validmind/tests/prompt_validation/Robustness.py +80 -121
- validmind/tests/prompt_validation/Specificity.py +61 -95
- validmind/tests/prompt_validation/ai_powered_test.py +2 -2
- validmind/tests/run.py +314 -496
- validmind/tests/test_providers.py +109 -79
- validmind/tests/utils.py +91 -0
- validmind/unit_metrics/__init__.py +16 -155
- validmind/unit_metrics/classification/F1.py +1 -0
- validmind/unit_metrics/classification/Precision.py +1 -0
- validmind/unit_metrics/classification/ROC_AUC.py +1 -0
- validmind/unit_metrics/classification/Recall.py +1 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
- validmind/unit_metrics/regression/HuberLoss.py +1 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
- validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
- validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
- validmind/unit_metrics/regression/QuantileLoss.py +1 -0
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
- validmind/utils.py +66 -17
- validmind/vm_models/__init__.py +2 -17
- validmind/vm_models/dataset/dataset.py +31 -4
- validmind/vm_models/figure.py +7 -37
- validmind/vm_models/model.py +3 -0
- validmind/vm_models/result/__init__.py +7 -0
- validmind/vm_models/result/result.jinja +21 -0
- validmind/vm_models/result/result.py +337 -0
- validmind/vm_models/result/utils.py +160 -0
- validmind/vm_models/test_suite/runner.py +16 -54
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +43 -77
- validmind/vm_models/test_suite/test_suite.py +8 -40
- validmind-2.6.7.dist-info/METADATA +137 -0
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
- validmind/tests/data_validation/AutoSeasonality.py +0 -190
- validmind/tests/metadata.py +0 -59
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
- validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
- validmind/unit_metrics/composite.py +0 -238
- validmind/vm_models/test/metric.py +0 -98
- validmind/vm_models/test/metric_result.py +0 -61
- validmind/vm_models/test/output_template.py +0 -55
- validmind/vm_models/test/result_summary.py +0 -76
- validmind/vm_models/test/result_wrapper.py +0 -488
- validmind/vm_models/test/test.py +0 -103
- validmind/vm_models/test/threshold_test.py +0 -106
- validmind/vm_models/test/threshold_test_result.py +0 -75
- validmind/vm_models/test_context.py +0 -259
- validmind-2.5.24.dist-info/METADATA +0 -118
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -6,13 +6,16 @@ import pandas as pd
|
|
6
6
|
from statsmodels.tsa.ar_model import AutoReg
|
7
7
|
from statsmodels.tsa.stattools import adfuller
|
8
8
|
|
9
|
+
from validmind import tags, tasks
|
9
10
|
from validmind.logging import get_logger
|
10
|
-
from validmind.vm_models import
|
11
|
+
from validmind.vm_models import VMDataset
|
11
12
|
|
12
13
|
logger = get_logger(__name__)
|
13
14
|
|
14
15
|
|
15
|
-
|
16
|
+
@tags("time_series_data", "statsmodels", "forecasting", "statistical_test")
|
17
|
+
@tasks("regression")
|
18
|
+
def AutoAR(dataset: VMDataset, max_ar_order: int = 3):
|
16
19
|
"""
|
17
20
|
Automatically identifies the optimal Autoregressive (AR) order for a time series using BIC and AIC criteria.
|
18
21
|
|
@@ -56,98 +59,58 @@ class AutoAR(Metric):
|
|
56
59
|
- AIC and BIC may not always agree on the selection of the best model. This potentially requires the user to juggle
|
57
60
|
interpretational choices.
|
58
61
|
"""
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
62
|
+
df = dataset.df
|
63
|
+
|
64
|
+
summary_ar_analysis = pd.DataFrame()
|
65
|
+
best_ar_order = pd.DataFrame()
|
66
|
+
|
67
|
+
for col in df.columns:
|
68
|
+
series = df[col].dropna()
|
69
|
+
|
70
|
+
# Check for stationarity using the Augmented Dickey-Fuller test
|
71
|
+
adf_test = adfuller(series)
|
72
|
+
if adf_test[1] > 0.05:
|
73
|
+
logger.warning(
|
74
|
+
f"Warning: {col} is not stationary. Results may be inaccurate."
|
75
|
+
)
|
76
|
+
|
77
|
+
for ar_order in range(0, max_ar_order + 1):
|
78
|
+
try:
|
79
|
+
model = AutoReg(series, lags=ar_order, old_names=False)
|
80
|
+
model_fit = model.fit()
|
81
|
+
|
82
|
+
# Append the result of each AR order directly into the DataFrame
|
83
|
+
summary_ar_analysis = pd.concat(
|
84
|
+
[
|
85
|
+
summary_ar_analysis,
|
86
|
+
pd.DataFrame(
|
87
|
+
[
|
88
|
+
{
|
89
|
+
"Variable": col,
|
90
|
+
"AR Order": ar_order,
|
91
|
+
"BIC": model_fit.bic,
|
92
|
+
"AIC": model_fit.aic,
|
93
|
+
}
|
94
|
+
]
|
95
|
+
),
|
96
|
+
],
|
97
|
+
ignore_index=True,
|
87
98
|
)
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
]
|
107
|
-
),
|
108
|
-
],
|
109
|
-
ignore_index=True,
|
110
|
-
)
|
111
|
-
except Exception as e:
|
112
|
-
logger.error(f"Error fitting AR({ar_order}) model for {col}: {e}")
|
113
|
-
|
114
|
-
# Find the best AR Order for this variable based on the minimum BIC
|
115
|
-
variable_summary = summary_ar_analysis[
|
116
|
-
summary_ar_analysis["Variable"] == col
|
117
|
-
]
|
118
|
-
best_bic_row = variable_summary[
|
119
|
-
variable_summary["BIC"] == variable_summary["BIC"].min()
|
120
|
-
]
|
121
|
-
best_ar_order = pd.concat([best_ar_order, best_bic_row])
|
122
|
-
|
123
|
-
# Convert the 'AR Order' column to integer
|
124
|
-
summary_ar_analysis["AR Order"] = summary_ar_analysis["AR Order"].astype(int)
|
125
|
-
best_ar_order["AR Order"] = best_ar_order["AR Order"].astype(int)
|
126
|
-
|
127
|
-
return self.cache_results(
|
128
|
-
{
|
129
|
-
"auto_ar_analysis": summary_ar_analysis.to_dict(orient="records"),
|
130
|
-
"best_ar_order": best_ar_order.to_dict(orient="records"),
|
131
|
-
}
|
132
|
-
)
|
133
|
-
|
134
|
-
def summary(self, metric_value):
|
135
|
-
"""
|
136
|
-
Build one table for summarizing the auto AR results
|
137
|
-
and another for the best AR Order results
|
138
|
-
"""
|
139
|
-
summary_ar_analysis = metric_value["auto_ar_analysis"]
|
140
|
-
best_ar_order = metric_value["best_ar_order"]
|
141
|
-
|
142
|
-
return ResultSummary(
|
143
|
-
results=[
|
144
|
-
ResultTable(
|
145
|
-
data=summary_ar_analysis,
|
146
|
-
metadata=ResultTableMetadata(title="Auto AR Analysis Results"),
|
147
|
-
),
|
148
|
-
ResultTable(
|
149
|
-
data=best_ar_order,
|
150
|
-
metadata=ResultTableMetadata(title="Best AR Order Results"),
|
151
|
-
),
|
152
|
-
]
|
153
|
-
)
|
99
|
+
except Exception as e:
|
100
|
+
logger.error(f"Error fitting AR({ar_order}) model for {col}: {e}")
|
101
|
+
|
102
|
+
# Find the best AR Order for this variable based on the minimum BIC
|
103
|
+
variable_summary = summary_ar_analysis[summary_ar_analysis["Variable"] == col]
|
104
|
+
best_bic_row = variable_summary[
|
105
|
+
variable_summary["BIC"] == variable_summary["BIC"].min()
|
106
|
+
]
|
107
|
+
best_ar_order = pd.concat([best_ar_order, best_bic_row])
|
108
|
+
|
109
|
+
# Convert the 'AR Order' column to integer
|
110
|
+
summary_ar_analysis["AR Order"] = summary_ar_analysis["AR Order"].astype(int)
|
111
|
+
best_ar_order["AR Order"] = best_ar_order["AR Order"].astype(int)
|
112
|
+
|
113
|
+
return {
|
114
|
+
"Auto AR Analysis Results": summary_ar_analysis,
|
115
|
+
"Best AR Order Results": best_ar_order,
|
116
|
+
}
|
@@ -6,13 +6,16 @@ import pandas as pd
|
|
6
6
|
from statsmodels.tsa.arima.model import ARIMA
|
7
7
|
from statsmodels.tsa.stattools import adfuller
|
8
8
|
|
9
|
+
from validmind import tags, tasks
|
9
10
|
from validmind.logging import get_logger
|
10
|
-
from validmind.vm_models import
|
11
|
+
from validmind.vm_models import VMDataset
|
11
12
|
|
12
13
|
logger = get_logger(__name__)
|
13
14
|
|
14
15
|
|
15
|
-
|
16
|
+
@tags("time_series_data", "statsmodels", "forecasting", "statistical_test")
|
17
|
+
@tasks("regression")
|
18
|
+
def AutoMA(dataset: VMDataset, max_ma_order: int = 3):
|
16
19
|
"""
|
17
20
|
Automatically selects the optimal Moving Average (MA) order for each variable in a time series dataset based on
|
18
21
|
minimal BIC and AIC values.
|
@@ -59,98 +62,58 @@ class AutoMA(Metric):
|
|
59
62
|
- The computation time increases with the rise in `max_ma_order`, hence, the metric may become computationally
|
60
63
|
costly for larger values.
|
61
64
|
"""
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
65
|
+
df = dataset.df
|
66
|
+
|
67
|
+
summary_ma_analysis = pd.DataFrame()
|
68
|
+
best_ma_order = pd.DataFrame()
|
69
|
+
|
70
|
+
for col in df.columns:
|
71
|
+
series = df[col].dropna()
|
72
|
+
|
73
|
+
# Check for stationarity using the Augmented Dickey-Fuller test
|
74
|
+
adf_test = adfuller(series)
|
75
|
+
if adf_test[1] > 0.05:
|
76
|
+
logger.warning(
|
77
|
+
f"Warning: {col} is not stationary. Results may be inaccurate."
|
78
|
+
)
|
79
|
+
|
80
|
+
for ma_order in range(0, max_ma_order + 1):
|
81
|
+
try:
|
82
|
+
model = ARIMA(series, order=(0, 0, ma_order))
|
83
|
+
model_fit = model.fit()
|
84
|
+
|
85
|
+
# Append the result of each MA order directly into the DataFrame
|
86
|
+
summary_ma_analysis = pd.concat(
|
87
|
+
[
|
88
|
+
summary_ma_analysis,
|
89
|
+
pd.DataFrame(
|
90
|
+
[
|
91
|
+
{
|
92
|
+
"Variable": col,
|
93
|
+
"MA Order": ma_order,
|
94
|
+
"BIC": model_fit.bic,
|
95
|
+
"AIC": model_fit.aic,
|
96
|
+
}
|
97
|
+
]
|
98
|
+
),
|
99
|
+
],
|
100
|
+
ignore_index=True,
|
90
101
|
)
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
]
|
110
|
-
),
|
111
|
-
],
|
112
|
-
ignore_index=True,
|
113
|
-
)
|
114
|
-
except Exception as e:
|
115
|
-
logger.error(f"Error fitting MA({ma_order}) model for {col}: {e}")
|
116
|
-
|
117
|
-
# Find the best MA Order for this variable based on the minimum BIC
|
118
|
-
variable_summary = summary_ma_analysis[
|
119
|
-
summary_ma_analysis["Variable"] == col
|
120
|
-
]
|
121
|
-
best_bic_row = variable_summary[
|
122
|
-
variable_summary["BIC"] == variable_summary["BIC"].min()
|
123
|
-
]
|
124
|
-
best_ma_order = pd.concat([best_ma_order, best_bic_row])
|
125
|
-
|
126
|
-
# Convert the 'MA Order' column to integer
|
127
|
-
summary_ma_analysis["MA Order"] = summary_ma_analysis["MA Order"].astype(int)
|
128
|
-
best_ma_order["MA Order"] = best_ma_order["MA Order"].astype(int)
|
129
|
-
|
130
|
-
return self.cache_results(
|
131
|
-
{
|
132
|
-
"auto_ma_analysis": summary_ma_analysis.to_dict(orient="records"),
|
133
|
-
"best_ma_order": best_ma_order.to_dict(orient="records"),
|
134
|
-
}
|
135
|
-
)
|
136
|
-
|
137
|
-
def summary(self, metric_value):
|
138
|
-
"""
|
139
|
-
Build one table for summarizing the auto MA results
|
140
|
-
and another for the best MA Order results
|
141
|
-
"""
|
142
|
-
summary_ma_analysis = metric_value["auto_ma_analysis"]
|
143
|
-
best_ma_order = metric_value["best_ma_order"]
|
144
|
-
|
145
|
-
return ResultSummary(
|
146
|
-
results=[
|
147
|
-
ResultTable(
|
148
|
-
data=summary_ma_analysis,
|
149
|
-
metadata=ResultTableMetadata(title="Auto MA Analysis Results"),
|
150
|
-
),
|
151
|
-
ResultTable(
|
152
|
-
data=best_ma_order,
|
153
|
-
metadata=ResultTableMetadata(title="Best MA Order Results"),
|
154
|
-
),
|
155
|
-
]
|
156
|
-
)
|
102
|
+
except Exception as e:
|
103
|
+
logger.error(f"Error fitting MA({ma_order}) model for {col}: {e}")
|
104
|
+
|
105
|
+
# Find the best MA Order for this variable based on the minimum BIC
|
106
|
+
variable_summary = summary_ma_analysis[summary_ma_analysis["Variable"] == col]
|
107
|
+
best_bic_row = variable_summary[
|
108
|
+
variable_summary["BIC"] == variable_summary["BIC"].min()
|
109
|
+
]
|
110
|
+
best_ma_order = pd.concat([best_ma_order, best_bic_row])
|
111
|
+
|
112
|
+
# Convert the 'MA Order' column to integer
|
113
|
+
summary_ma_analysis["MA Order"] = summary_ma_analysis["MA Order"].astype(int)
|
114
|
+
best_ma_order["MA Order"] = best_ma_order["MA Order"].astype(int)
|
115
|
+
|
116
|
+
return {
|
117
|
+
"Auto MA Analysis Results": summary_ma_analysis,
|
118
|
+
"Best MA Order Results": best_ma_order,
|
119
|
+
}
|
@@ -6,10 +6,13 @@ import numpy as np
|
|
6
6
|
import pandas as pd
|
7
7
|
from statsmodels.tsa.stattools import adfuller
|
8
8
|
|
9
|
-
from validmind
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset
|
10
11
|
|
11
12
|
|
12
|
-
|
13
|
+
@tags("time_series_data", "statsmodels", "forecasting", "statistical_test")
|
14
|
+
@tasks("regression")
|
15
|
+
def AutoStationarity(dataset: VMDataset, max_order: int = 5, threshold: float = 0.05):
|
13
16
|
"""
|
14
17
|
Automates Augmented Dickey-Fuller test to assess stationarity across multiple time series in a DataFrame.
|
15
18
|
|
@@ -54,65 +57,62 @@ class AutoStationarity(Metric):
|
|
54
57
|
- There's also a risk of over-differencing if the maximum order is set too high, which could induce unnecessary
|
55
58
|
cycles.
|
56
59
|
"""
|
60
|
+
df = dataset.df.dropna()
|
57
61
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
# Append the result of each test directly into the DataFrame
|
104
|
-
summary_stationarity = pd.concat(
|
62
|
+
summary_stationarity = pd.DataFrame()
|
63
|
+
best_integration_order = pd.DataFrame()
|
64
|
+
|
65
|
+
# Loop over each column in the input DataFrame and perform stationarity tests
|
66
|
+
for col in df.columns:
|
67
|
+
is_stationary = False
|
68
|
+
order = 0
|
69
|
+
|
70
|
+
while not is_stationary and order <= max_order:
|
71
|
+
series = df[col]
|
72
|
+
|
73
|
+
if order == 0:
|
74
|
+
adf_result = adfuller(series)
|
75
|
+
else:
|
76
|
+
adf_result = adfuller(np.diff(series, n=order))
|
77
|
+
|
78
|
+
adf_pvalue = adf_result[1]
|
79
|
+
adf_pass_fail = adf_pvalue < threshold
|
80
|
+
adf_decision = "Stationary" if adf_pass_fail else "Non-stationary"
|
81
|
+
|
82
|
+
# Append the result of each test directly into the DataFrame
|
83
|
+
summary_stationarity = pd.concat(
|
84
|
+
[
|
85
|
+
summary_stationarity,
|
86
|
+
pd.DataFrame(
|
87
|
+
[
|
88
|
+
{
|
89
|
+
"Variable": col,
|
90
|
+
"Integration Order": order,
|
91
|
+
"Test": "ADF",
|
92
|
+
"p-value": adf_pvalue,
|
93
|
+
"Threshold": threshold,
|
94
|
+
"Pass/Fail": "Pass" if adf_pass_fail else "Fail",
|
95
|
+
"Decision": adf_decision,
|
96
|
+
}
|
97
|
+
]
|
98
|
+
),
|
99
|
+
],
|
100
|
+
ignore_index=True,
|
101
|
+
)
|
102
|
+
|
103
|
+
if adf_pass_fail:
|
104
|
+
is_stationary = True
|
105
|
+
best_integration_order = pd.concat(
|
105
106
|
[
|
106
|
-
|
107
|
+
best_integration_order,
|
107
108
|
pd.DataFrame(
|
108
109
|
[
|
109
110
|
{
|
110
111
|
"Variable": col,
|
111
|
-
"Integration Order": order,
|
112
|
+
"Best Integration Order": order,
|
112
113
|
"Test": "ADF",
|
113
114
|
"p-value": adf_pvalue,
|
114
115
|
"Threshold": threshold,
|
115
|
-
"Pass/Fail": "Pass" if adf_pass_fail else "Fail",
|
116
116
|
"Decision": adf_decision,
|
117
117
|
}
|
118
118
|
]
|
@@ -121,65 +121,17 @@ class AutoStationarity(Metric):
|
|
121
121
|
ignore_index=True,
|
122
122
|
)
|
123
123
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
}
|
139
|
-
]
|
140
|
-
),
|
141
|
-
],
|
142
|
-
ignore_index=True,
|
143
|
-
)
|
144
|
-
|
145
|
-
order += 1
|
146
|
-
|
147
|
-
# Convert the 'Integration Order' and 'Best Integration Order' column to integer
|
148
|
-
summary_stationarity["Integration Order"] = summary_stationarity[
|
149
|
-
"Integration Order"
|
150
|
-
].astype(int)
|
151
|
-
best_integration_order["Best Integration Order"] = best_integration_order[
|
152
|
-
"Best Integration Order"
|
153
|
-
].astype(int)
|
154
|
-
|
155
|
-
return self.cache_results(
|
156
|
-
{
|
157
|
-
"stationarity_analysis": summary_stationarity.to_dict(orient="records"),
|
158
|
-
"best_integration_order": best_integration_order.to_dict(
|
159
|
-
orient="records"
|
160
|
-
),
|
161
|
-
}
|
162
|
-
)
|
163
|
-
|
164
|
-
def summary(self, metric_value):
|
165
|
-
"""
|
166
|
-
Build one table for summarizing the stationarity results
|
167
|
-
and another for the best integration order results
|
168
|
-
"""
|
169
|
-
summary_stationarity = metric_value["stationarity_analysis"]
|
170
|
-
best_integration_order = metric_value["best_integration_order"]
|
171
|
-
|
172
|
-
return ResultSummary(
|
173
|
-
results=[
|
174
|
-
ResultTable(
|
175
|
-
data=summary_stationarity,
|
176
|
-
metadata=ResultTableMetadata(title="Stationarity Analysis Results"),
|
177
|
-
),
|
178
|
-
ResultTable(
|
179
|
-
data=best_integration_order,
|
180
|
-
metadata=ResultTableMetadata(
|
181
|
-
title="Best Integration Order Results"
|
182
|
-
),
|
183
|
-
),
|
184
|
-
]
|
185
|
-
)
|
124
|
+
order += 1
|
125
|
+
|
126
|
+
# Convert the 'Integration Order' and 'Best Integration Order' column to integer
|
127
|
+
summary_stationarity["Integration Order"] = summary_stationarity[
|
128
|
+
"Integration Order"
|
129
|
+
].astype(int)
|
130
|
+
best_integration_order["Best Integration Order"] = best_integration_order[
|
131
|
+
"Best Integration Order"
|
132
|
+
].astype(int)
|
133
|
+
|
134
|
+
return {
|
135
|
+
"Stationarity Analysis Results": summary_stationarity,
|
136
|
+
"Best Integration Order Results": best_integration_order,
|
137
|
+
}
|