validmind 2.3.3__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +8 -1
  3. validmind/ai/utils.py +2 -1
  4. validmind/client.py +1 -0
  5. validmind/datasets/regression/fred_timeseries.py +272 -0
  6. validmind/tests/__init__.py +14 -468
  7. validmind/tests/__types__.py +10 -0
  8. validmind/tests/_store.py +102 -0
  9. validmind/tests/data_validation/ACFandPACFPlot.py +7 -9
  10. validmind/tests/data_validation/ADF.py +8 -10
  11. validmind/tests/data_validation/ANOVAOneWayTable.py +8 -10
  12. validmind/tests/data_validation/AutoAR.py +2 -4
  13. validmind/tests/data_validation/AutoMA.py +2 -4
  14. validmind/tests/data_validation/AutoSeasonality.py +8 -10
  15. validmind/tests/data_validation/AutoStationarity.py +8 -10
  16. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +8 -10
  17. validmind/tests/data_validation/BivariateHistograms.py +8 -10
  18. validmind/tests/data_validation/BivariateScatterPlots.py +8 -10
  19. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +8 -10
  20. validmind/tests/data_validation/ClassImbalance.py +2 -4
  21. validmind/tests/data_validation/DFGLSArch.py +2 -4
  22. validmind/tests/data_validation/DatasetDescription.py +7 -9
  23. validmind/tests/data_validation/DatasetSplit.py +8 -9
  24. validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
  25. validmind/tests/data_validation/Duplicates.py +2 -4
  26. validmind/tests/data_validation/EngleGrangerCoint.py +2 -4
  27. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +2 -4
  28. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +2 -4
  29. validmind/tests/data_validation/HighCardinality.py +2 -4
  30. validmind/tests/data_validation/HighPearsonCorrelation.py +2 -4
  31. validmind/tests/data_validation/IQROutliersBarPlot.py +2 -4
  32. validmind/tests/data_validation/IQROutliersTable.py +2 -4
  33. validmind/tests/data_validation/IsolationForestOutliers.py +2 -4
  34. validmind/tests/data_validation/KPSS.py +8 -10
  35. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -4
  36. validmind/tests/data_validation/MissingValues.py +2 -4
  37. validmind/tests/data_validation/MissingValuesBarPlot.py +2 -4
  38. validmind/tests/data_validation/MissingValuesRisk.py +2 -4
  39. validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -4
  40. validmind/tests/data_validation/PhillipsPerronArch.py +7 -9
  41. validmind/tests/data_validation/RollingStatsPlot.py +2 -4
  42. validmind/tests/data_validation/ScatterPlot.py +2 -4
  43. validmind/tests/data_validation/SeasonalDecompose.py +70 -44
  44. validmind/tests/data_validation/Skewness.py +2 -4
  45. validmind/tests/data_validation/SpreadPlot.py +2 -4
  46. validmind/tests/data_validation/TabularCategoricalBarPlots.py +2 -4
  47. validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -4
  48. validmind/tests/data_validation/TabularDescriptionTables.py +2 -4
  49. validmind/tests/data_validation/TabularNumericalHistograms.py +2 -4
  50. validmind/tests/data_validation/TargetRateBarPlots.py +2 -4
  51. validmind/tests/data_validation/TimeSeriesDescription.py +74 -0
  52. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +76 -0
  53. validmind/tests/data_validation/TimeSeriesFrequency.py +2 -4
  54. validmind/tests/data_validation/TimeSeriesHistogram.py +29 -45
  55. validmind/tests/data_validation/TimeSeriesLinePlot.py +2 -4
  56. validmind/tests/data_validation/TimeSeriesMissingValues.py +2 -4
  57. validmind/tests/data_validation/TimeSeriesOutliers.py +32 -45
  58. validmind/tests/data_validation/TooManyZeroValues.py +2 -4
  59. validmind/tests/data_validation/UniqueRows.py +2 -4
  60. validmind/tests/data_validation/WOEBinPlots.py +2 -4
  61. validmind/tests/data_validation/WOEBinTable.py +2 -4
  62. validmind/tests/data_validation/ZivotAndrewsArch.py +2 -4
  63. validmind/tests/data_validation/nlp/CommonWords.py +2 -4
  64. validmind/tests/data_validation/nlp/Hashtags.py +2 -4
  65. validmind/tests/data_validation/nlp/Mentions.py +2 -4
  66. validmind/tests/data_validation/nlp/Punctuations.py +2 -4
  67. validmind/tests/data_validation/nlp/StopWords.py +2 -4
  68. validmind/tests/data_validation/nlp/TextDescription.py +2 -4
  69. validmind/tests/decorator.py +10 -8
  70. validmind/tests/load.py +264 -0
  71. validmind/tests/metadata.py +59 -0
  72. validmind/tests/model_validation/ClusterSizeDistribution.py +5 -7
  73. validmind/tests/model_validation/FeaturesAUC.py +6 -8
  74. validmind/tests/model_validation/ModelMetadata.py +8 -9
  75. validmind/tests/model_validation/ModelMetadataComparison.py +59 -0
  76. validmind/tests/model_validation/ModelPredictionResiduals.py +103 -0
  77. validmind/tests/model_validation/RegressionResidualsPlot.py +2 -6
  78. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +131 -0
  79. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +76 -0
  80. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +103 -0
  81. validmind/tests/model_validation/embeddings/ClusterDistribution.py +2 -4
  82. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +2 -4
  83. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -4
  84. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -4
  85. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +2 -4
  86. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -7
  87. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -7
  88. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +7 -9
  89. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -7
  90. validmind/tests/model_validation/sklearn/ClusterPerformance.py +5 -7
  91. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +2 -7
  92. validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -7
  93. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +19 -10
  94. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +83 -0
  95. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -7
  96. validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -7
  97. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -7
  98. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +4 -7
  99. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +7 -9
  100. validmind/tests/model_validation/sklearn/MinimumF1Score.py +7 -9
  101. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +7 -9
  102. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +8 -10
  103. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +7 -9
  104. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +9 -11
  105. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +7 -9
  106. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +8 -10
  107. validmind/tests/model_validation/sklearn/ROCCurve.py +10 -11
  108. validmind/tests/model_validation/sklearn/RegressionErrors.py +5 -7
  109. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +76 -0
  110. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +5 -7
  111. validmind/tests/model_validation/sklearn/RegressionR2Square.py +5 -7
  112. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +63 -0
  113. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +10 -14
  114. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +8 -10
  115. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -7
  116. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +8 -10
  117. validmind/tests/model_validation/sklearn/VMeasure.py +5 -7
  118. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +8 -10
  119. validmind/tests/model_validation/statsmodels/AutoARIMA.py +2 -4
  120. validmind/tests/model_validation/statsmodels/BoxPierce.py +2 -4
  121. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +3 -4
  122. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +2 -4
  123. validmind/tests/model_validation/statsmodels/GINITable.py +2 -4
  124. validmind/tests/model_validation/statsmodels/JarqueBera.py +7 -9
  125. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +7 -9
  126. validmind/tests/model_validation/statsmodels/LJungBox.py +2 -4
  127. validmind/tests/model_validation/statsmodels/Lilliefors.py +7 -9
  128. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +2 -4
  129. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +2 -4
  130. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -9
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -4
  132. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -4
  133. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -4
  134. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +2 -4
  135. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -4
  136. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +6 -8
  137. validmind/tests/model_validation/statsmodels/RunsTest.py +2 -4
  138. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +3 -4
  139. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +2 -4
  140. validmind/tests/prompt_validation/Bias.py +2 -4
  141. validmind/tests/prompt_validation/Clarity.py +2 -4
  142. validmind/tests/prompt_validation/Conciseness.py +2 -4
  143. validmind/tests/prompt_validation/Delimitation.py +2 -4
  144. validmind/tests/prompt_validation/NegativeInstruction.py +2 -4
  145. validmind/tests/prompt_validation/Robustness.py +2 -4
  146. validmind/tests/prompt_validation/Specificity.py +2 -4
  147. validmind/tests/run.py +394 -0
  148. validmind/tests/test_providers.py +12 -0
  149. validmind/tests/utils.py +16 -0
  150. validmind/unit_metrics/__init__.py +12 -4
  151. validmind/unit_metrics/composite.py +3 -0
  152. validmind/vm_models/test/metric.py +8 -5
  153. validmind/vm_models/test/result_wrapper.py +2 -1
  154. validmind/vm_models/test/test.py +14 -11
  155. validmind/vm_models/test/threshold_test.py +1 -0
  156. validmind/vm_models/test_suite/runner.py +1 -0
  157. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/METADATA +70 -36
  158. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/RECORD +162 -146
  159. /validmind/datasets/regression/datasets/{lending_club_loan_rates.csv → leanding_club_loan_rates.csv} +0 -0
  160. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/LICENSE +0 -0
  161. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/WHEEL +0 -0
  162. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/entry_points.txt +0 -0
@@ -47,16 +47,14 @@ class ADF(Metric):
47
47
 
48
48
  name = "adf"
49
49
  required_inputs = ["dataset"]
50
- metadata = {
51
- "task_types": ["regression"],
52
- "tags": [
53
- "time_series_data",
54
- "statsmodels",
55
- "forecasting",
56
- "statistical_test",
57
- "stationarity",
58
- ],
59
- }
50
+ tasks = ["regression"]
51
+ tags = [
52
+ "time_series_data",
53
+ "statsmodels",
54
+ "forecasting",
55
+ "statistical_test",
56
+ "stationarity",
57
+ ]
60
58
 
61
59
  def summary(self, metric_value: dict):
62
60
  table = pd.DataFrame.from_dict(metric_value, orient="index")
@@ -57,16 +57,14 @@ class ANOVAOneWayTable(Metric):
57
57
  name = "anova_one_way_table"
58
58
  required_inputs = ["dataset"]
59
59
  default_params = {"features": None, "p_threshold": 0.05}
60
- metadata = {
61
- "task_types": ["classification"],
62
- "tags": [
63
- "tabular_data",
64
- "statistical_test",
65
- "multiclass_classification",
66
- "binary_classification",
67
- "numerical_data",
68
- ],
69
- }
60
+ tasks = ["classification"]
61
+ tags = [
62
+ "tabular_data",
63
+ "statistical_test",
64
+ "multiclass_classification",
65
+ "binary_classification",
66
+ "numerical_data",
67
+ ]
70
68
 
71
69
  def run(self):
72
70
  features = self.params["features"]
@@ -61,10 +61,8 @@ class AutoAR(Metric):
61
61
  name = "auto_ar"
62
62
  required_inputs = ["dataset"]
63
63
  default_params = {"max_ar_order": 3}
64
- metadata = {
65
- "task_types": ["regression"],
66
- "tags": ["time_series_data", "statsmodels", "forecasting", "statistical_test"],
67
- }
64
+ tasks = ["regression"]
65
+ tags = ["time_series_data", "statsmodels", "forecasting", "statistical_test"]
68
66
 
69
67
  def run(self):
70
68
  if "max_ar_order" not in self.params:
@@ -57,10 +57,8 @@ class AutoMA(Metric):
57
57
  name = "auto_ma"
58
58
  required_inputs = ["dataset"]
59
59
  default_params = {"max_ma_order": 3}
60
- metadata = {
61
- "task_types": ["regression"],
62
- "tags": ["time_series_data", "statsmodels", "forecasting", "statistical_test"],
63
- }
60
+ tasks = ["regression"]
61
+ tags = ["time_series_data", "statsmodels", "forecasting", "statistical_test"]
64
62
 
65
63
  def run(self):
66
64
  if "max_ma_order" not in self.params:
@@ -61,16 +61,14 @@ class AutoSeasonality(Metric):
61
61
  name = "auto_seasonality"
62
62
  required_inputs = ["dataset"]
63
63
  default_params = {"min_period": 1, "max_period": 4}
64
- metadata = {
65
- "task_types": ["regression"],
66
- "tags": [
67
- "time_series_data",
68
- "forecasting",
69
- "statistical_test",
70
- "statsmodels",
71
- "seasonality",
72
- ],
73
- }
64
+ tasks = ["regression"]
65
+ tags = [
66
+ "time_series_data",
67
+ "forecasting",
68
+ "statistical_test",
69
+ "statsmodels",
70
+ "seasonality",
71
+ ]
74
72
 
75
73
  def evaluate_seasonal_periods(self, series, min_period, max_period):
76
74
  seasonal_periods = []
@@ -54,16 +54,14 @@ class AutoStationarity(Metric):
54
54
  name = "auto_stationarity"
55
55
  required_inputs = ["dataset"]
56
56
  default_params = {"max_order": 5, "threshold": 0.05}
57
- metadata = {
58
- "task_types": ["regression"],
59
- "tags": [
60
- "time_series_data",
61
- "statsmodels",
62
- "forecasting",
63
- "statistical_test",
64
- "stationarity",
65
- ],
66
- }
57
+ tasks = ["regression"]
58
+ tags = [
59
+ "time_series_data",
60
+ "statsmodels",
61
+ "forecasting",
62
+ "statistical_test",
63
+ "stationarity",
64
+ ]
67
65
 
68
66
  def run(self):
69
67
  if "max_order" not in self.params:
@@ -56,16 +56,14 @@ class BivariateFeaturesBarPlots(Metric):
56
56
  name = "bivariate_features_bar_plots"
57
57
  required_inputs = ["dataset"]
58
58
  default_params = {"features_pairs": None}
59
- metadata = {
60
- "task_types": ["classification"],
61
- "tags": [
62
- "tabular_data",
63
- "categorical_data",
64
- "binary_classification",
65
- "multiclass_classification",
66
- "visualization",
67
- ],
68
- }
59
+ tasks = ["classification"]
60
+ tags = [
61
+ "tabular_data",
62
+ "categorical_data",
63
+ "binary_classification",
64
+ "multiclass_classification",
65
+ "visualization",
66
+ ]
69
67
 
70
68
  def run(self):
71
69
  features_pairs = self.params["features_pairs"]
@@ -55,16 +55,14 @@ class BivariateHistograms(Metric):
55
55
  name = "bivariate_histograms"
56
56
  required_inputs = ["dataset"]
57
57
  default_params = {"features_pairs": None, "target_filter": None}
58
- metadata = {
59
- "task_types": ["classification"],
60
- "tags": [
61
- "tabular_data",
62
- "categorical_data",
63
- "binary_classification",
64
- "multiclass_classification",
65
- "visualization",
66
- ],
67
- }
58
+ tasks = ["classification"]
59
+ tags = [
60
+ "tabular_data",
61
+ "categorical_data",
62
+ "binary_classification",
63
+ "multiclass_classification",
64
+ "visualization",
65
+ ]
68
66
 
69
67
  def plot_bivariate_histogram(self, features_pairs, target_filter):
70
68
  status_var = self.inputs.dataset.target_column
@@ -54,16 +54,14 @@ class BivariateScatterPlots(Metric):
54
54
  name = "bivariate_scatter_plots"
55
55
  required_inputs = ["dataset"]
56
56
  default_params = {"selected_columns": None}
57
- metadata = {
58
- "task_types": ["classification"],
59
- "tags": [
60
- "tabular_data",
61
- "categorical_data",
62
- "binary_classification",
63
- "multiclass_classification",
64
- "visualization",
65
- ],
66
- }
57
+ tasks = ["classification"]
58
+ tags = [
59
+ "tabular_data",
60
+ "categorical_data",
61
+ "binary_classification",
62
+ "multiclass_classification",
63
+ "visualization",
64
+ ]
67
65
 
68
66
  def plot_bivariate_scatter(self, columns):
69
67
  figures = []
@@ -54,16 +54,14 @@ class ChiSquaredFeaturesTable(Metric):
54
54
  name = "chi_squared_features_table"
55
55
  required_inputs = ["dataset"]
56
56
  default_params = {"cat_features": None, "p_threshold": 0.05}
57
- metadata = {
58
- "task_types": ["classification"],
59
- "tags": [
60
- "tabular_data",
61
- "categorical_data",
62
- "statistical_test",
63
- "binary_classification",
64
- "multiclass_classification",
65
- ],
66
- }
57
+ tasks = ["classification"]
58
+ tags = [
59
+ "tabular_data",
60
+ "categorical_data",
61
+ "statistical_test",
62
+ "binary_classification",
63
+ "multiclass_classification",
64
+ ]
67
65
 
68
66
  def run(self):
69
67
  target_column = self.inputs.dataset.target_column
@@ -73,10 +73,8 @@ class ClassImbalance(ThresholdTest):
73
73
  name = "class_imbalance"
74
74
  required_inputs = ["dataset"]
75
75
  default_params = {"min_percent_threshold": 10}
76
- metadata = {
77
- "task_types": ["classification"],
78
- "tags": ["tabular_data", "binary_classification", "multiclass_classification"],
79
- }
76
+ tasks = ["classification"]
77
+ tags = ["tabular_data", "binary_classification", "multiclass_classification"]
80
78
 
81
79
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
82
80
  return ResultSummary(
@@ -53,10 +53,8 @@ class DFGLSArch(Metric):
53
53
 
54
54
  name = "dickey_fuller_gls"
55
55
  required_inputs = ["dataset"]
56
- metadata = {
57
- "task_types": ["regression"],
58
- "tags": ["time_series_data", "forecasting", "unit_root_test"],
59
- }
56
+ tasks = ["regression"]
57
+ tags = ["time_series_data", "forecasting", "unit_root_test"]
60
58
 
61
59
  def run(self):
62
60
  """
@@ -69,15 +69,13 @@ class DatasetDescription(Metric):
69
69
 
70
70
  name = "dataset_description"
71
71
  required_inputs = ["dataset"]
72
- metadata = {
73
- "task_types": [
74
- "classification",
75
- "regression",
76
- "text_classification",
77
- "text_summarization",
78
- ],
79
- "tags": ["tabular_data", "time_series_data", "text_data"],
80
- }
72
+ tasks = [
73
+ "classification",
74
+ "regression",
75
+ "text_classification",
76
+ "text_summarization",
77
+ ]
78
+ tags = ["tabular_data", "time_series_data", "text_data"]
81
79
 
82
80
  def summary(self, metric_value):
83
81
  """
@@ -48,15 +48,14 @@ class DatasetSplit(Metric):
48
48
 
49
49
  name = "dataset_split"
50
50
  required_inputs = ["datasets"]
51
- metadata = {
52
- "task_types": [
53
- "classification",
54
- "regression",
55
- "text_classification",
56
- "text_summarization",
57
- ],
58
- "tags": ["tabular_data", "time_series_data", "text_data"],
59
- }
51
+ tasks = [
52
+ "classification",
53
+ "regression",
54
+ "text_classification",
55
+ "text_summarization",
56
+ ]
57
+
58
+ tags = ["tabular_data", "time_series_data", "text_data"]
60
59
 
61
60
  dataset_labels = {
62
61
  "train_ds": "Training",
@@ -53,10 +53,8 @@ class DescriptiveStatistics(Metric):
53
53
 
54
54
  name = "descriptive_statistics"
55
55
  required_inputs = ["dataset"]
56
- metadata = {
57
- "task_types": ["classification", "regression"],
58
- "tags": ["tabular_data", "time_series_data"],
59
- }
56
+ tasks = ["classification", "regression"]
57
+ tags = ["tabular_data", "time_series_data"]
60
58
 
61
59
  def get_summary_statistics_numerical(self, df, numerical_fields):
62
60
  percentiles = [0.25, 0.5, 0.75, 0.90, 0.95]
@@ -55,10 +55,8 @@ class Duplicates(ThresholdTest):
55
55
  name = "duplicates"
56
56
  required_inputs = ["dataset"]
57
57
  default_params = {"min_threshold": 1}
58
- metadata = {
59
- "task_types": ["classification", "regression"],
60
- "tags": ["tabular_data", "data_quality", "text_data"],
61
- }
58
+ tasks = ["classification", "regression"]
59
+ tags = ["tabular_data", "data_quality", "text_data"]
62
60
 
63
61
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
64
62
  """
@@ -51,10 +51,8 @@ class EngleGrangerCoint(Metric):
51
51
  name = "engle_granger_coint"
52
52
  required_inputs = ["dataset"]
53
53
  default_params = {"threshold": 0.05}
54
- metadata = {
55
- "task_types": ["regression"],
56
- "tags": ["time_series_data", "statistical_test", "forecasting"],
57
- }
54
+ tasks = ["regression"]
55
+ tags = ["time_series_data", "statistical_test", "forecasting"]
58
56
 
59
57
  def run(self):
60
58
  threshold = self.params["threshold"]
@@ -48,10 +48,8 @@ class FeatureTargetCorrelationPlot(Metric):
48
48
  name = "feature_target_correlation_plot"
49
49
  required_inputs = ["dataset"]
50
50
  default_params = {"features": None, "fig_height": 600}
51
- metadata = {
52
- "task_types": ["classification", "regression"],
53
- "tags": ["tabular_data", "visualization", "feature_importance", "correlation"],
54
- }
51
+ tasks = ["classification", "regression"]
52
+ tags = ["tabular_data", "visualization", "feature_importance", "correlation"]
55
53
 
56
54
  def run(self):
57
55
  fig_height = self.params["fig_height"]
@@ -56,10 +56,8 @@ class HeatmapFeatureCorrelations(Metric):
56
56
  name = "heatmap_feature_correlations"
57
57
  required_inputs = ["dataset"]
58
58
  default_params = {"declutter": None, "fontsize": None, "num_features": None}
59
- metadata = {
60
- "task_types": ["classification", "regression"],
61
- "tags": ["tabular_data", "visualization", "correlation"],
62
- }
59
+ tasks = ["classification", "regression"]
60
+ tags = ["tabular_data", "visualization", "correlation"]
63
61
 
64
62
  def run(self):
65
63
  features = self.params.get("features")
@@ -57,10 +57,8 @@ class HighCardinality(ThresholdTest):
57
57
  "percent_threshold": 0.1,
58
58
  "threshold_type": "percent", # or "num"
59
59
  }
60
- metadata = {
61
- "task_types": ["classification", "regression"],
62
- "tags": ["tabular_data", "data_quality", "categorical_data"],
63
- }
60
+ tasks = ["classification", "regression"]
61
+ tags = ["tabular_data", "data_quality", "categorical_data"]
64
62
 
65
63
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
66
64
  """
@@ -59,10 +59,8 @@ class HighPearsonCorrelation(ThresholdTest):
59
59
  name = "pearson_correlation"
60
60
  required_inputs = ["dataset"]
61
61
  default_params = {"max_threshold": 0.3}
62
- metadata = {
63
- "task_types": ["classification", "regression"],
64
- "tags": ["tabular_data", "data_quality", "correlation"],
65
- }
62
+ tasks = ["classification", "regression"]
63
+ tags = ["tabular_data", "data_quality", "correlation"]
66
64
 
67
65
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
68
66
  """The high pearson correlation test returns results like these:
@@ -63,10 +63,8 @@ class IQROutliersBarPlot(Metric):
63
63
  name = "iqr_outliers_bar_plot"
64
64
  required_inputs = ["dataset"]
65
65
  default_params = {"threshold": 1.5, "num_features": None, "fig_width": 800}
66
- metadata = {
67
- "task_types": ["classification", "regression"],
68
- "tags": ["tabular_data", "visualization", "numerical_data"],
69
- }
66
+ tasks = ["classification", "regression"]
67
+ tags = ["tabular_data", "visualization", "numerical_data"]
70
68
 
71
69
  def run(self):
72
70
  df = self.inputs.dataset.df
@@ -54,10 +54,8 @@ class IQROutliersTable(Metric):
54
54
  name = "iqr_outliers_table"
55
55
  required_inputs = ["dataset"]
56
56
  default_params = {"features": None, "threshold": 1.5}
57
- metadata = {
58
- "task_types": ["classification", "regression"],
59
- "tags": ["tabular_data", "numerical_data"],
60
- }
57
+ tasks = ["classification", "regression"]
58
+ tags = ["tabular_data", "numerical_data"]
61
59
 
62
60
  def run(self):
63
61
  features = self.params["features"]
@@ -55,10 +55,8 @@ class IsolationForestOutliers(Metric):
55
55
  "contamination": 0.1,
56
56
  "features_columns": None,
57
57
  }
58
- metadata = {
59
- "task_types": ["classification"],
60
- "tags": ["tabular_data", "anomaly_detection"],
61
- }
58
+ tasks = ["classification"]
59
+ tags = ["tabular_data", "anomaly_detection"]
62
60
 
63
61
  required_inputs = ["dataset"]
64
62
 
@@ -51,16 +51,14 @@ class KPSS(Metric):
51
51
 
52
52
  name = "kpss"
53
53
  required_inputs = ["dataset"]
54
- metadata = {
55
- "task_types": ["regression"],
56
- "tags": [
57
- "time_series_data",
58
- "forecasting",
59
- "stationarity",
60
- "unit_root_test",
61
- "statsmodels",
62
- ],
63
- }
54
+ tasks = ["regression"]
55
+ tags = [
56
+ "time_series_data",
57
+ "forecasting",
58
+ "stationarity",
59
+ "unit_root_test",
60
+ "statsmodels",
61
+ ]
64
62
 
65
63
  def run(self):
66
64
  """
@@ -51,10 +51,8 @@ class LaggedCorrelationHeatmap(Metric):
51
51
 
52
52
  name = "lagged_correlation_heatmap"
53
53
  required_inputs = ["dataset"]
54
- metadata = {
55
- "task_types": ["regression"],
56
- "tags": ["time_series_data", "visualization"],
57
- }
54
+ tasks = ["regression"]
55
+ tags = ["time_series_data", "visualization"]
58
56
 
59
57
  def _compute_correlations(self, df, target_col, independent_vars, num_lags):
60
58
  correlations = np.zeros((len(independent_vars), num_lags + 1))
@@ -52,10 +52,8 @@ class MissingValues(ThresholdTest):
52
52
  name = "missing"
53
53
  required_inputs = ["dataset"]
54
54
  default_params = {"min_threshold": 1}
55
- metadata = {
56
- "task_types": ["classification", "regression"],
57
- "tags": ["tabular_data", "data_quality"],
58
- }
55
+ tasks = ["classification", "regression"]
56
+ tags = ["tabular_data", "data_quality"]
59
57
 
60
58
  def summary(self, results: List[ThresholdTestResult], all_passed: bool):
61
59
  """
@@ -55,10 +55,8 @@ class MissingValuesBarPlot(Metric):
55
55
  name = "missing_values_bar_plot"
56
56
  required_inputs = ["dataset"]
57
57
  default_params = {"threshold": 80, "fig_height": 600}
58
- metadata = {
59
- "task_types": ["classification", "regression"],
60
- "tags": ["tabular_data", "data_quality", "visualization"],
61
- }
58
+ tasks = ["classification", "regression"]
59
+ tags = ["tabular_data", "data_quality", "visualization"]
62
60
 
63
61
  def run(self):
64
62
  threshold = self.params["threshold"]
@@ -52,10 +52,8 @@ class MissingValuesRisk(Metric):
52
52
 
53
53
  name = "missing_values_risk"
54
54
  required_inputs = ["dataset"]
55
- metadata = {
56
- "task_types": ["classification", "regression"],
57
- "tags": ["tabular_data", "data_quality", "risk_analysis"],
58
- }
55
+ tasks = ["classification", "regression"]
56
+ tags = ["tabular_data", "data_quality", "risk_analysis"]
59
57
 
60
58
  def run(self):
61
59
  total_cells = self.inputs.dataset.df.size
@@ -50,10 +50,8 @@ class PearsonCorrelationMatrix(Metric):
50
50
 
51
51
  name = "pearson_correlation_matrix"
52
52
  required_inputs = ["dataset"]
53
- metadata = {
54
- "task_types": ["classification", "regression"],
55
- "tags": ["tabular_data", "numerical_data", "correlation"],
56
- }
53
+ tasks = ["classification", "regression"]
54
+ tags = ["tabular_data", "numerical_data", "correlation"]
57
55
 
58
56
  def run(self):
59
57
  columns = self.params.get("columns", list(self.inputs.dataset.df.columns))
@@ -51,15 +51,13 @@ class PhillipsPerronArch(Metric):
51
51
 
52
52
  name = "phillips_perron"
53
53
  required_inputs = ["dataset"]
54
- metadata = {
55
- "task_types": ["regression"],
56
- "tags": [
57
- "time_series_data",
58
- "forecasting",
59
- "statistical_test",
60
- "unit_root_test",
61
- ],
62
- }
54
+ tasks = ["regression"]
55
+ tags = [
56
+ "time_series_data",
57
+ "forecasting",
58
+ "statistical_test",
59
+ "unit_root_test",
60
+ ]
63
61
 
64
62
  def run(self):
65
63
  """
@@ -54,10 +54,8 @@ class RollingStatsPlot(Metric):
54
54
  name = "rolling_stats_plot"
55
55
  required_inputs = ["dataset"]
56
56
  default_params = {"window_size": 12}
57
- metadata = {
58
- "task_types": ["regression"],
59
- "tags": ["time_series_data", "visualization", "stationarity"],
60
- }
57
+ tasks = ["regression"]
58
+ tags = ["time_series_data", "visualization", "stationarity"]
61
59
 
62
60
  def plot_rolling_statistics(self, col, window_size=12):
63
61
  """
@@ -52,10 +52,8 @@ class ScatterPlot(Metric):
52
52
 
53
53
  name = "scatter_plot"
54
54
  required_inputs = ["dataset"]
55
- metadata = {
56
- "task_types": ["classification", "regression"],
57
- "tags": ["tabular_data", "visualization"],
58
- }
55
+ tasks = ["classification", "regression"]
56
+ tags = ["tabular_data", "visualization"]
59
57
 
60
58
  def run(self):
61
59
  columns = list(self.inputs.dataset.df.columns)