validmind 2.5.25__py3-none-any.whl → 2.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. validmind/__init__.py +8 -17
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +66 -85
  4. validmind/ai/test_result_description/context.py +2 -2
  5. validmind/ai/utils.py +26 -1
  6. validmind/api_client.py +43 -79
  7. validmind/client.py +5 -7
  8. validmind/client_config.py +1 -1
  9. validmind/datasets/__init__.py +1 -1
  10. validmind/datasets/classification/customer_churn.py +7 -5
  11. validmind/datasets/nlp/__init__.py +2 -2
  12. validmind/errors.py +6 -10
  13. validmind/html_templates/content_blocks.py +18 -16
  14. validmind/logging.py +21 -16
  15. validmind/tests/__init__.py +28 -5
  16. validmind/tests/__types__.py +186 -170
  17. validmind/tests/_store.py +7 -21
  18. validmind/tests/comparison.py +362 -0
  19. validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
  20. validmind/tests/data_validation/ADF.py +49 -83
  21. validmind/tests/data_validation/AutoAR.py +59 -96
  22. validmind/tests/data_validation/AutoMA.py +59 -96
  23. validmind/tests/data_validation/AutoStationarity.py +66 -114
  24. validmind/tests/data_validation/ClassImbalance.py +48 -117
  25. validmind/tests/data_validation/DatasetDescription.py +180 -209
  26. validmind/tests/data_validation/DatasetSplit.py +50 -75
  27. validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
  28. validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
  29. validmind/tests/data_validation/Duplicates.py +21 -90
  30. validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
  31. validmind/tests/data_validation/HighCardinality.py +32 -80
  32. validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
  33. validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
  34. validmind/tests/data_validation/IQROutliersTable.py +40 -80
  35. validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
  36. validmind/tests/data_validation/KPSS.py +33 -81
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
  38. validmind/tests/data_validation/MissingValues.py +17 -58
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
  40. validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
  41. validmind/tests/data_validation/RollingStatsPlot.py +50 -81
  42. validmind/tests/data_validation/SeasonalDecompose.py +102 -184
  43. validmind/tests/data_validation/Skewness.py +27 -64
  44. validmind/tests/data_validation/SpreadPlot.py +34 -57
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
  48. validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
  49. validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
  50. validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
  51. validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
  52. validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
  53. validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
  54. validmind/tests/data_validation/TooManyZeroValues.py +21 -70
  55. validmind/tests/data_validation/UniqueRows.py +23 -62
  56. validmind/tests/data_validation/WOEBinPlots.py +83 -109
  57. validmind/tests/data_validation/WOEBinTable.py +28 -69
  58. validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
  59. validmind/tests/data_validation/nlp/CommonWords.py +49 -57
  60. validmind/tests/data_validation/nlp/Hashtags.py +27 -49
  61. validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
  62. validmind/tests/data_validation/nlp/Mentions.py +32 -63
  63. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
  64. validmind/tests/data_validation/nlp/Punctuations.py +63 -47
  65. validmind/tests/data_validation/nlp/Sentiment.py +4 -0
  66. validmind/tests/data_validation/nlp/StopWords.py +62 -91
  67. validmind/tests/data_validation/nlp/TextDescription.py +116 -159
  68. validmind/tests/data_validation/nlp/Toxicity.py +12 -4
  69. validmind/tests/decorator.py +33 -242
  70. validmind/tests/load.py +212 -153
  71. validmind/tests/model_validation/BertScore.py +13 -7
  72. validmind/tests/model_validation/BleuScore.py +4 -0
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
  74. validmind/tests/model_validation/ContextualRecall.py +3 -0
  75. validmind/tests/model_validation/FeaturesAUC.py +43 -74
  76. validmind/tests/model_validation/MeteorScore.py +3 -0
  77. validmind/tests/model_validation/RegardScore.py +5 -1
  78. validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
  79. validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
  80. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
  81. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
  82. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
  83. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
  84. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
  85. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
  86. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
  87. validmind/tests/model_validation/embeddings/utils.py +53 -0
  88. validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
  89. validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
  90. validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
  91. validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
  92. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
  93. validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
  94. validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
  95. validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
  96. validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
  97. validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
  98. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
  99. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
  100. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
  101. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
  102. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
  103. validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
  104. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
  105. validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
  106. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
  107. validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
  108. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
  109. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
  110. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
  111. validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
  112. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
  113. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
  114. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
  115. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
  116. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
  117. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
  118. validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
  119. validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
  120. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
  121. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
  122. validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
  123. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
  124. validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
  125. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
  126. validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
  127. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
  128. validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
  129. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
  130. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
  132. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
  133. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
  134. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
  135. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
  136. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
  137. validmind/tests/output.py +120 -0
  138. validmind/tests/prompt_validation/Bias.py +55 -98
  139. validmind/tests/prompt_validation/Clarity.py +56 -99
  140. validmind/tests/prompt_validation/Conciseness.py +63 -101
  141. validmind/tests/prompt_validation/Delimitation.py +48 -89
  142. validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
  143. validmind/tests/prompt_validation/Robustness.py +80 -121
  144. validmind/tests/prompt_validation/Specificity.py +61 -95
  145. validmind/tests/prompt_validation/ai_powered_test.py +2 -2
  146. validmind/tests/run.py +314 -496
  147. validmind/tests/test_providers.py +109 -79
  148. validmind/tests/utils.py +91 -0
  149. validmind/unit_metrics/__init__.py +16 -155
  150. validmind/unit_metrics/classification/F1.py +1 -0
  151. validmind/unit_metrics/classification/Precision.py +1 -0
  152. validmind/unit_metrics/classification/ROC_AUC.py +1 -0
  153. validmind/unit_metrics/classification/Recall.py +1 -0
  154. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
  155. validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
  156. validmind/unit_metrics/regression/HuberLoss.py +1 -0
  157. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
  158. validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
  159. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
  160. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
  161. validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
  162. validmind/unit_metrics/regression/QuantileLoss.py +1 -0
  163. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  164. validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
  165. validmind/utils.py +66 -17
  166. validmind/vm_models/__init__.py +2 -17
  167. validmind/vm_models/dataset/dataset.py +31 -4
  168. validmind/vm_models/figure.py +7 -37
  169. validmind/vm_models/model.py +3 -0
  170. validmind/vm_models/result/__init__.py +7 -0
  171. validmind/vm_models/result/result.jinja +21 -0
  172. validmind/vm_models/result/result.py +337 -0
  173. validmind/vm_models/result/utils.py +160 -0
  174. validmind/vm_models/test_suite/runner.py +16 -54
  175. validmind/vm_models/test_suite/summary.py +3 -3
  176. validmind/vm_models/test_suite/test.py +43 -77
  177. validmind/vm_models/test_suite/test_suite.py +8 -40
  178. validmind-2.6.8.dist-info/METADATA +137 -0
  179. {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/RECORD +182 -189
  180. validmind/tests/data_validation/AutoSeasonality.py +0 -190
  181. validmind/tests/metadata.py +0 -59
  182. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
  183. validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
  184. validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
  185. validmind/unit_metrics/composite.py +0 -238
  186. validmind/vm_models/test/metric.py +0 -98
  187. validmind/vm_models/test/metric_result.py +0 -61
  188. validmind/vm_models/test/output_template.py +0 -55
  189. validmind/vm_models/test/result_summary.py +0 -76
  190. validmind/vm_models/test/result_wrapper.py +0 -488
  191. validmind/vm_models/test/test.py +0 -103
  192. validmind/vm_models/test/threshold_test.py +0 -106
  193. validmind/vm_models/test/threshold_test_result.py +0 -75
  194. validmind/vm_models/test_context.py +0 -259
  195. validmind-2.5.25.dist-info/METADATA +0 -118
  196. {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/LICENSE +0 -0
  197. {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/WHEEL +0 -0
  198. {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/entry_points.txt +0 -0
@@ -6,13 +6,16 @@ import pandas as pd
6
6
  from statsmodels.tsa.ar_model import AutoReg
7
7
  from statsmodels.tsa.stattools import adfuller
8
8
 
9
+ from validmind import tags, tasks
9
10
  from validmind.logging import get_logger
10
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
11
+ from validmind.vm_models import VMDataset
11
12
 
12
13
  logger = get_logger(__name__)
13
14
 
14
15
 
15
- class AutoAR(Metric):
16
+ @tags("time_series_data", "statsmodels", "forecasting", "statistical_test")
17
+ @tasks("regression")
18
+ def AutoAR(dataset: VMDataset, max_ar_order: int = 3):
16
19
  """
17
20
  Automatically identifies the optimal Autoregressive (AR) order for a time series using BIC and AIC criteria.
18
21
 
@@ -56,98 +59,58 @@ class AutoAR(Metric):
56
59
  - AIC and BIC may not always agree on the selection of the best model. This potentially requires the user to juggle
57
60
  interpretational choices.
58
61
  """
59
-
60
- type = "dataset"
61
- name = "auto_ar"
62
- required_inputs = ["dataset"]
63
- default_params = {"max_ar_order": 3}
64
- tasks = ["regression"]
65
- tags = ["time_series_data", "statsmodels", "forecasting", "statistical_test"]
66
-
67
- def run(self):
68
- if "max_ar_order" not in self.params:
69
- raise ValueError("max_ar_order must be provided in params")
70
-
71
- max_ar_order = int(self.params["max_ar_order"])
72
-
73
- df = self.inputs.dataset.df
74
-
75
- # Create empty DataFrames to store the results
76
- summary_ar_analysis = pd.DataFrame()
77
- best_ar_order = pd.DataFrame()
78
-
79
- for col in df.columns:
80
- series = df[col].dropna()
81
-
82
- # Check for stationarity using the Augmented Dickey-Fuller test
83
- adf_test = adfuller(series)
84
- if adf_test[1] > 0.05:
85
- logger.warning(
86
- f"Warning: {col} is not stationary. Results may be inaccurate."
62
+ df = dataset.df
63
+
64
+ summary_ar_analysis = pd.DataFrame()
65
+ best_ar_order = pd.DataFrame()
66
+
67
+ for col in df.columns:
68
+ series = df[col].dropna()
69
+
70
+ # Check for stationarity using the Augmented Dickey-Fuller test
71
+ adf_test = adfuller(series)
72
+ if adf_test[1] > 0.05:
73
+ logger.warning(
74
+ f"Warning: {col} is not stationary. Results may be inaccurate."
75
+ )
76
+
77
+ for ar_order in range(0, max_ar_order + 1):
78
+ try:
79
+ model = AutoReg(series, lags=ar_order, old_names=False)
80
+ model_fit = model.fit()
81
+
82
+ # Append the result of each AR order directly into the DataFrame
83
+ summary_ar_analysis = pd.concat(
84
+ [
85
+ summary_ar_analysis,
86
+ pd.DataFrame(
87
+ [
88
+ {
89
+ "Variable": col,
90
+ "AR Order": ar_order,
91
+ "BIC": model_fit.bic,
92
+ "AIC": model_fit.aic,
93
+ }
94
+ ]
95
+ ),
96
+ ],
97
+ ignore_index=True,
87
98
  )
88
-
89
- for ar_order in range(0, max_ar_order + 1):
90
- try:
91
- model = AutoReg(series, lags=ar_order, old_names=False)
92
- model_fit = model.fit()
93
-
94
- # Append the result of each AR order directly into the DataFrame
95
- summary_ar_analysis = pd.concat(
96
- [
97
- summary_ar_analysis,
98
- pd.DataFrame(
99
- [
100
- {
101
- "Variable": col,
102
- "AR Order": ar_order,
103
- "BIC": model_fit.bic,
104
- "AIC": model_fit.aic,
105
- }
106
- ]
107
- ),
108
- ],
109
- ignore_index=True,
110
- )
111
- except Exception as e:
112
- logger.error(f"Error fitting AR({ar_order}) model for {col}: {e}")
113
-
114
- # Find the best AR Order for this variable based on the minimum BIC
115
- variable_summary = summary_ar_analysis[
116
- summary_ar_analysis["Variable"] == col
117
- ]
118
- best_bic_row = variable_summary[
119
- variable_summary["BIC"] == variable_summary["BIC"].min()
120
- ]
121
- best_ar_order = pd.concat([best_ar_order, best_bic_row])
122
-
123
- # Convert the 'AR Order' column to integer
124
- summary_ar_analysis["AR Order"] = summary_ar_analysis["AR Order"].astype(int)
125
- best_ar_order["AR Order"] = best_ar_order["AR Order"].astype(int)
126
-
127
- return self.cache_results(
128
- {
129
- "auto_ar_analysis": summary_ar_analysis.to_dict(orient="records"),
130
- "best_ar_order": best_ar_order.to_dict(orient="records"),
131
- }
132
- )
133
-
134
- def summary(self, metric_value):
135
- """
136
- Build one table for summarizing the auto AR results
137
- and another for the best AR Order results
138
- """
139
- summary_ar_analysis = metric_value["auto_ar_analysis"]
140
- best_ar_order = metric_value["best_ar_order"]
141
-
142
- return ResultSummary(
143
- results=[
144
- ResultTable(
145
- data=summary_ar_analysis,
146
- metadata=ResultTableMetadata(title="Auto AR Analysis Results"),
147
- ),
148
- ResultTable(
149
- data=best_ar_order,
150
- metadata=ResultTableMetadata(title="Best AR Order Results"),
151
- ),
152
- ]
153
- )
99
+ except Exception as e:
100
+ logger.error(f"Error fitting AR({ar_order}) model for {col}: {e}")
101
+
102
+ # Find the best AR Order for this variable based on the minimum BIC
103
+ variable_summary = summary_ar_analysis[summary_ar_analysis["Variable"] == col]
104
+ best_bic_row = variable_summary[
105
+ variable_summary["BIC"] == variable_summary["BIC"].min()
106
+ ]
107
+ best_ar_order = pd.concat([best_ar_order, best_bic_row])
108
+
109
+ # Convert the 'AR Order' column to integer
110
+ summary_ar_analysis["AR Order"] = summary_ar_analysis["AR Order"].astype(int)
111
+ best_ar_order["AR Order"] = best_ar_order["AR Order"].astype(int)
112
+
113
+ return {
114
+ "Auto AR Analysis Results": summary_ar_analysis,
115
+ "Best AR Order Results": best_ar_order,
116
+ }
@@ -6,13 +6,16 @@ import pandas as pd
6
6
  from statsmodels.tsa.arima.model import ARIMA
7
7
  from statsmodels.tsa.stattools import adfuller
8
8
 
9
+ from validmind import tags, tasks
9
10
  from validmind.logging import get_logger
10
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
11
+ from validmind.vm_models import VMDataset
11
12
 
12
13
  logger = get_logger(__name__)
13
14
 
14
15
 
15
- class AutoMA(Metric):
16
+ @tags("time_series_data", "statsmodels", "forecasting", "statistical_test")
17
+ @tasks("regression")
18
+ def AutoMA(dataset: VMDataset, max_ma_order: int = 3):
16
19
  """
17
20
  Automatically selects the optimal Moving Average (MA) order for each variable in a time series dataset based on
18
21
  minimal BIC and AIC values.
@@ -59,98 +62,58 @@ class AutoMA(Metric):
59
62
  - The computation time increases with the rise in `max_ma_order`, hence, the metric may become computationally
60
63
  costly for larger values.
61
64
  """
62
-
63
- type = "dataset"
64
- name = "auto_ma"
65
- required_inputs = ["dataset"]
66
- default_params = {"max_ma_order": 3}
67
- tasks = ["regression"]
68
- tags = ["time_series_data", "statsmodels", "forecasting", "statistical_test"]
69
-
70
- def run(self):
71
- if "max_ma_order" not in self.params:
72
- raise ValueError("max_ma_order must be provided in params")
73
-
74
- max_ma_order = int(self.params["max_ma_order"])
75
-
76
- df = self.inputs.dataset.df
77
-
78
- # Create empty DataFrames to store the results
79
- summary_ma_analysis = pd.DataFrame()
80
- best_ma_order = pd.DataFrame()
81
-
82
- for col in df.columns:
83
- series = df[col].dropna()
84
-
85
- # Check for stationarity using the Augmented Dickey-Fuller test
86
- adf_test = adfuller(series)
87
- if adf_test[1] > 0.05:
88
- logger.warning(
89
- f"Warning: {col} is not stationary. Results may be inaccurate."
65
+ df = dataset.df
66
+
67
+ summary_ma_analysis = pd.DataFrame()
68
+ best_ma_order = pd.DataFrame()
69
+
70
+ for col in df.columns:
71
+ series = df[col].dropna()
72
+
73
+ # Check for stationarity using the Augmented Dickey-Fuller test
74
+ adf_test = adfuller(series)
75
+ if adf_test[1] > 0.05:
76
+ logger.warning(
77
+ f"Warning: {col} is not stationary. Results may be inaccurate."
78
+ )
79
+
80
+ for ma_order in range(0, max_ma_order + 1):
81
+ try:
82
+ model = ARIMA(series, order=(0, 0, ma_order))
83
+ model_fit = model.fit()
84
+
85
+ # Append the result of each MA order directly into the DataFrame
86
+ summary_ma_analysis = pd.concat(
87
+ [
88
+ summary_ma_analysis,
89
+ pd.DataFrame(
90
+ [
91
+ {
92
+ "Variable": col,
93
+ "MA Order": ma_order,
94
+ "BIC": model_fit.bic,
95
+ "AIC": model_fit.aic,
96
+ }
97
+ ]
98
+ ),
99
+ ],
100
+ ignore_index=True,
90
101
  )
91
-
92
- for ma_order in range(0, max_ma_order + 1):
93
- try:
94
- model = ARIMA(series, order=(0, 0, ma_order))
95
- model_fit = model.fit()
96
-
97
- # Append the result of each MA order directly into the DataFrame
98
- summary_ma_analysis = pd.concat(
99
- [
100
- summary_ma_analysis,
101
- pd.DataFrame(
102
- [
103
- {
104
- "Variable": col,
105
- "MA Order": ma_order,
106
- "BIC": model_fit.bic,
107
- "AIC": model_fit.aic,
108
- }
109
- ]
110
- ),
111
- ],
112
- ignore_index=True,
113
- )
114
- except Exception as e:
115
- logger.error(f"Error fitting MA({ma_order}) model for {col}: {e}")
116
-
117
- # Find the best MA Order for this variable based on the minimum BIC
118
- variable_summary = summary_ma_analysis[
119
- summary_ma_analysis["Variable"] == col
120
- ]
121
- best_bic_row = variable_summary[
122
- variable_summary["BIC"] == variable_summary["BIC"].min()
123
- ]
124
- best_ma_order = pd.concat([best_ma_order, best_bic_row])
125
-
126
- # Convert the 'MA Order' column to integer
127
- summary_ma_analysis["MA Order"] = summary_ma_analysis["MA Order"].astype(int)
128
- best_ma_order["MA Order"] = best_ma_order["MA Order"].astype(int)
129
-
130
- return self.cache_results(
131
- {
132
- "auto_ma_analysis": summary_ma_analysis.to_dict(orient="records"),
133
- "best_ma_order": best_ma_order.to_dict(orient="records"),
134
- }
135
- )
136
-
137
- def summary(self, metric_value):
138
- """
139
- Build one table for summarizing the auto MA results
140
- and another for the best MA Order results
141
- """
142
- summary_ma_analysis = metric_value["auto_ma_analysis"]
143
- best_ma_order = metric_value["best_ma_order"]
144
-
145
- return ResultSummary(
146
- results=[
147
- ResultTable(
148
- data=summary_ma_analysis,
149
- metadata=ResultTableMetadata(title="Auto MA Analysis Results"),
150
- ),
151
- ResultTable(
152
- data=best_ma_order,
153
- metadata=ResultTableMetadata(title="Best MA Order Results"),
154
- ),
155
- ]
156
- )
102
+ except Exception as e:
103
+ logger.error(f"Error fitting MA({ma_order}) model for {col}: {e}")
104
+
105
+ # Find the best MA Order for this variable based on the minimum BIC
106
+ variable_summary = summary_ma_analysis[summary_ma_analysis["Variable"] == col]
107
+ best_bic_row = variable_summary[
108
+ variable_summary["BIC"] == variable_summary["BIC"].min()
109
+ ]
110
+ best_ma_order = pd.concat([best_ma_order, best_bic_row])
111
+
112
+ # Convert the 'MA Order' column to integer
113
+ summary_ma_analysis["MA Order"] = summary_ma_analysis["MA Order"].astype(int)
114
+ best_ma_order["MA Order"] = best_ma_order["MA Order"].astype(int)
115
+
116
+ return {
117
+ "Auto MA Analysis Results": summary_ma_analysis,
118
+ "Best MA Order Results": best_ma_order,
119
+ }
@@ -6,10 +6,13 @@ import numpy as np
6
6
  import pandas as pd
7
7
  from statsmodels.tsa.stattools import adfuller
8
8
 
9
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
9
+ from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset
10
11
 
11
12
 
12
- class AutoStationarity(Metric):
13
+ @tags("time_series_data", "statsmodels", "forecasting", "statistical_test")
14
+ @tasks("regression")
15
+ def AutoStationarity(dataset: VMDataset, max_order: int = 5, threshold: float = 0.05):
13
16
  """
14
17
  Automates Augmented Dickey-Fuller test to assess stationarity across multiple time series in a DataFrame.
15
18
 
@@ -54,65 +57,62 @@ class AutoStationarity(Metric):
54
57
  - There's also a risk of over-differencing if the maximum order is set too high, which could induce unnecessary
55
58
  cycles.
56
59
  """
60
+ df = dataset.df.dropna()
57
61
 
58
- type = "dataset"
59
- name = "auto_stationarity"
60
- required_inputs = ["dataset"]
61
- default_params = {"max_order": 5, "threshold": 0.05}
62
- tasks = ["regression"]
63
- tags = [
64
- "time_series_data",
65
- "statsmodels",
66
- "forecasting",
67
- "statistical_test",
68
- "stationarity",
69
- ]
70
-
71
- def run(self):
72
- if "max_order" not in self.params:
73
- raise ValueError("max_order must be provided in params")
74
- max_order = self.params["max_order"]
75
-
76
- if "threshold" not in self.params:
77
- raise ValueError("threshold must be provided in params")
78
- threshold = self.params["threshold"]
79
-
80
- df = self.inputs.dataset.df.dropna()
81
-
82
- # Create an empty DataFrame to store the results
83
- summary_stationarity = pd.DataFrame()
84
- best_integration_order = pd.DataFrame() # New DataFrame
85
-
86
- # Loop over each column in the input DataFrame and perform stationarity tests
87
- for col in df.columns:
88
- is_stationary = False
89
- order = 0
90
-
91
- while not is_stationary and order <= max_order:
92
- series = df[col]
93
-
94
- if order == 0:
95
- adf_result = adfuller(series)
96
- else:
97
- adf_result = adfuller(np.diff(series, n=order - 1))
98
-
99
- adf_pvalue = adf_result[1]
100
- adf_pass_fail = adf_pvalue < threshold
101
- adf_decision = "Stationary" if adf_pass_fail else "Non-stationary"
102
-
103
- # Append the result of each test directly into the DataFrame
104
- summary_stationarity = pd.concat(
62
+ summary_stationarity = pd.DataFrame()
63
+ best_integration_order = pd.DataFrame()
64
+
65
+ # Loop over each column in the input DataFrame and perform stationarity tests
66
+ for col in df.columns:
67
+ is_stationary = False
68
+ order = 0
69
+
70
+ while not is_stationary and order <= max_order:
71
+ series = df[col]
72
+
73
+ if order == 0:
74
+ adf_result = adfuller(series)
75
+ else:
76
+ adf_result = adfuller(np.diff(series, n=order))
77
+
78
+ adf_pvalue = adf_result[1]
79
+ adf_pass_fail = adf_pvalue < threshold
80
+ adf_decision = "Stationary" if adf_pass_fail else "Non-stationary"
81
+
82
+ # Append the result of each test directly into the DataFrame
83
+ summary_stationarity = pd.concat(
84
+ [
85
+ summary_stationarity,
86
+ pd.DataFrame(
87
+ [
88
+ {
89
+ "Variable": col,
90
+ "Integration Order": order,
91
+ "Test": "ADF",
92
+ "p-value": adf_pvalue,
93
+ "Threshold": threshold,
94
+ "Pass/Fail": "Pass" if adf_pass_fail else "Fail",
95
+ "Decision": adf_decision,
96
+ }
97
+ ]
98
+ ),
99
+ ],
100
+ ignore_index=True,
101
+ )
102
+
103
+ if adf_pass_fail:
104
+ is_stationary = True
105
+ best_integration_order = pd.concat(
105
106
  [
106
- summary_stationarity,
107
+ best_integration_order,
107
108
  pd.DataFrame(
108
109
  [
109
110
  {
110
111
  "Variable": col,
111
- "Integration Order": order,
112
+ "Best Integration Order": order,
112
113
  "Test": "ADF",
113
114
  "p-value": adf_pvalue,
114
115
  "Threshold": threshold,
115
- "Pass/Fail": "Pass" if adf_pass_fail else "Fail",
116
116
  "Decision": adf_decision,
117
117
  }
118
118
  ]
@@ -121,65 +121,17 @@ class AutoStationarity(Metric):
121
121
  ignore_index=True,
122
122
  )
123
123
 
124
- if adf_pass_fail:
125
- is_stationary = True
126
- best_integration_order = pd.concat(
127
- [
128
- best_integration_order,
129
- pd.DataFrame(
130
- [
131
- {
132
- "Variable": col,
133
- "Best Integration Order": order,
134
- "Test": "ADF",
135
- "p-value": adf_pvalue,
136
- "Threshold": threshold,
137
- "Decision": adf_decision,
138
- }
139
- ]
140
- ),
141
- ],
142
- ignore_index=True,
143
- )
144
-
145
- order += 1
146
-
147
- # Convert the 'Integration Order' and 'Best Integration Order' column to integer
148
- summary_stationarity["Integration Order"] = summary_stationarity[
149
- "Integration Order"
150
- ].astype(int)
151
- best_integration_order["Best Integration Order"] = best_integration_order[
152
- "Best Integration Order"
153
- ].astype(int)
154
-
155
- return self.cache_results(
156
- {
157
- "stationarity_analysis": summary_stationarity.to_dict(orient="records"),
158
- "best_integration_order": best_integration_order.to_dict(
159
- orient="records"
160
- ),
161
- }
162
- )
163
-
164
- def summary(self, metric_value):
165
- """
166
- Build one table for summarizing the stationarity results
167
- and another for the best integration order results
168
- """
169
- summary_stationarity = metric_value["stationarity_analysis"]
170
- best_integration_order = metric_value["best_integration_order"]
171
-
172
- return ResultSummary(
173
- results=[
174
- ResultTable(
175
- data=summary_stationarity,
176
- metadata=ResultTableMetadata(title="Stationarity Analysis Results"),
177
- ),
178
- ResultTable(
179
- data=best_integration_order,
180
- metadata=ResultTableMetadata(
181
- title="Best Integration Order Results"
182
- ),
183
- ),
184
- ]
185
- )
124
+ order += 1
125
+
126
+ # Convert the 'Integration Order' and 'Best Integration Order' column to integer
127
+ summary_stationarity["Integration Order"] = summary_stationarity[
128
+ "Integration Order"
129
+ ].astype(int)
130
+ best_integration_order["Best Integration Order"] = best_integration_order[
131
+ "Best Integration Order"
132
+ ].astype(int)
133
+
134
+ return {
135
+ "Stationarity Analysis Results": summary_stationarity,
136
+ "Best Integration Order Results": best_integration_order,
137
+ }