validmind 2.3.3__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +8 -1
  3. validmind/ai/utils.py +2 -1
  4. validmind/client.py +1 -0
  5. validmind/datasets/regression/fred_timeseries.py +272 -0
  6. validmind/tests/__init__.py +14 -468
  7. validmind/tests/__types__.py +10 -0
  8. validmind/tests/_store.py +102 -0
  9. validmind/tests/data_validation/ACFandPACFPlot.py +7 -9
  10. validmind/tests/data_validation/ADF.py +8 -10
  11. validmind/tests/data_validation/ANOVAOneWayTable.py +8 -10
  12. validmind/tests/data_validation/AutoAR.py +2 -4
  13. validmind/tests/data_validation/AutoMA.py +2 -4
  14. validmind/tests/data_validation/AutoSeasonality.py +8 -10
  15. validmind/tests/data_validation/AutoStationarity.py +8 -10
  16. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +8 -10
  17. validmind/tests/data_validation/BivariateHistograms.py +8 -10
  18. validmind/tests/data_validation/BivariateScatterPlots.py +8 -10
  19. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +8 -10
  20. validmind/tests/data_validation/ClassImbalance.py +2 -4
  21. validmind/tests/data_validation/DFGLSArch.py +2 -4
  22. validmind/tests/data_validation/DatasetDescription.py +7 -9
  23. validmind/tests/data_validation/DatasetSplit.py +8 -9
  24. validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
  25. validmind/tests/data_validation/Duplicates.py +2 -4
  26. validmind/tests/data_validation/EngleGrangerCoint.py +2 -4
  27. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +2 -4
  28. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +2 -4
  29. validmind/tests/data_validation/HighCardinality.py +2 -4
  30. validmind/tests/data_validation/HighPearsonCorrelation.py +2 -4
  31. validmind/tests/data_validation/IQROutliersBarPlot.py +2 -4
  32. validmind/tests/data_validation/IQROutliersTable.py +2 -4
  33. validmind/tests/data_validation/IsolationForestOutliers.py +2 -4
  34. validmind/tests/data_validation/KPSS.py +8 -10
  35. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -4
  36. validmind/tests/data_validation/MissingValues.py +2 -4
  37. validmind/tests/data_validation/MissingValuesBarPlot.py +2 -4
  38. validmind/tests/data_validation/MissingValuesRisk.py +2 -4
  39. validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -4
  40. validmind/tests/data_validation/PhillipsPerronArch.py +7 -9
  41. validmind/tests/data_validation/RollingStatsPlot.py +2 -4
  42. validmind/tests/data_validation/ScatterPlot.py +2 -4
  43. validmind/tests/data_validation/SeasonalDecompose.py +70 -44
  44. validmind/tests/data_validation/Skewness.py +2 -4
  45. validmind/tests/data_validation/SpreadPlot.py +2 -4
  46. validmind/tests/data_validation/TabularCategoricalBarPlots.py +2 -4
  47. validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -4
  48. validmind/tests/data_validation/TabularDescriptionTables.py +2 -4
  49. validmind/tests/data_validation/TabularNumericalHistograms.py +2 -4
  50. validmind/tests/data_validation/TargetRateBarPlots.py +2 -4
  51. validmind/tests/data_validation/TimeSeriesDescription.py +74 -0
  52. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +76 -0
  53. validmind/tests/data_validation/TimeSeriesFrequency.py +2 -4
  54. validmind/tests/data_validation/TimeSeriesHistogram.py +29 -45
  55. validmind/tests/data_validation/TimeSeriesLinePlot.py +2 -4
  56. validmind/tests/data_validation/TimeSeriesMissingValues.py +2 -4
  57. validmind/tests/data_validation/TimeSeriesOutliers.py +32 -45
  58. validmind/tests/data_validation/TooManyZeroValues.py +2 -4
  59. validmind/tests/data_validation/UniqueRows.py +2 -4
  60. validmind/tests/data_validation/WOEBinPlots.py +2 -4
  61. validmind/tests/data_validation/WOEBinTable.py +2 -4
  62. validmind/tests/data_validation/ZivotAndrewsArch.py +2 -4
  63. validmind/tests/data_validation/nlp/CommonWords.py +2 -4
  64. validmind/tests/data_validation/nlp/Hashtags.py +2 -4
  65. validmind/tests/data_validation/nlp/Mentions.py +2 -4
  66. validmind/tests/data_validation/nlp/Punctuations.py +2 -4
  67. validmind/tests/data_validation/nlp/StopWords.py +2 -4
  68. validmind/tests/data_validation/nlp/TextDescription.py +2 -4
  69. validmind/tests/decorator.py +10 -8
  70. validmind/tests/load.py +264 -0
  71. validmind/tests/metadata.py +59 -0
  72. validmind/tests/model_validation/ClusterSizeDistribution.py +5 -7
  73. validmind/tests/model_validation/FeaturesAUC.py +6 -8
  74. validmind/tests/model_validation/ModelMetadata.py +8 -9
  75. validmind/tests/model_validation/ModelMetadataComparison.py +59 -0
  76. validmind/tests/model_validation/ModelPredictionResiduals.py +103 -0
  77. validmind/tests/model_validation/RegressionResidualsPlot.py +2 -6
  78. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +131 -0
  79. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +76 -0
  80. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +103 -0
  81. validmind/tests/model_validation/embeddings/ClusterDistribution.py +2 -4
  82. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +2 -4
  83. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -4
  84. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -4
  85. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +2 -4
  86. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -7
  87. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -7
  88. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +7 -9
  89. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -7
  90. validmind/tests/model_validation/sklearn/ClusterPerformance.py +5 -7
  91. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +2 -7
  92. validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -7
  93. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +19 -10
  94. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +83 -0
  95. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -7
  96. validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -7
  97. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -7
  98. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +4 -7
  99. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +7 -9
  100. validmind/tests/model_validation/sklearn/MinimumF1Score.py +7 -9
  101. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +7 -9
  102. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +8 -10
  103. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +7 -9
  104. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +9 -11
  105. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +7 -9
  106. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +8 -10
  107. validmind/tests/model_validation/sklearn/ROCCurve.py +10 -11
  108. validmind/tests/model_validation/sklearn/RegressionErrors.py +5 -7
  109. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +76 -0
  110. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +5 -7
  111. validmind/tests/model_validation/sklearn/RegressionR2Square.py +5 -7
  112. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +63 -0
  113. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +10 -14
  114. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +8 -10
  115. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -7
  116. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +8 -10
  117. validmind/tests/model_validation/sklearn/VMeasure.py +5 -7
  118. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +8 -10
  119. validmind/tests/model_validation/statsmodels/AutoARIMA.py +2 -4
  120. validmind/tests/model_validation/statsmodels/BoxPierce.py +2 -4
  121. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +3 -4
  122. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +2 -4
  123. validmind/tests/model_validation/statsmodels/GINITable.py +2 -4
  124. validmind/tests/model_validation/statsmodels/JarqueBera.py +7 -9
  125. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +7 -9
  126. validmind/tests/model_validation/statsmodels/LJungBox.py +2 -4
  127. validmind/tests/model_validation/statsmodels/Lilliefors.py +7 -9
  128. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +2 -4
  129. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +2 -4
  130. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -9
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -4
  132. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -4
  133. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -4
  134. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +2 -4
  135. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -4
  136. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +6 -8
  137. validmind/tests/model_validation/statsmodels/RunsTest.py +2 -4
  138. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +3 -4
  139. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +2 -4
  140. validmind/tests/prompt_validation/Bias.py +2 -4
  141. validmind/tests/prompt_validation/Clarity.py +2 -4
  142. validmind/tests/prompt_validation/Conciseness.py +2 -4
  143. validmind/tests/prompt_validation/Delimitation.py +2 -4
  144. validmind/tests/prompt_validation/NegativeInstruction.py +2 -4
  145. validmind/tests/prompt_validation/Robustness.py +2 -4
  146. validmind/tests/prompt_validation/Specificity.py +2 -4
  147. validmind/tests/run.py +394 -0
  148. validmind/tests/test_providers.py +12 -0
  149. validmind/tests/utils.py +16 -0
  150. validmind/unit_metrics/__init__.py +12 -4
  151. validmind/unit_metrics/composite.py +3 -0
  152. validmind/vm_models/test/metric.py +8 -5
  153. validmind/vm_models/test/result_wrapper.py +2 -1
  154. validmind/vm_models/test/test.py +14 -11
  155. validmind/vm_models/test/threshold_test.py +1 -0
  156. validmind/vm_models/test_suite/runner.py +1 -0
  157. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/METADATA +70 -36
  158. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/RECORD +162 -146
  159. /validmind/datasets/regression/datasets/{lending_club_loan_rates.csv → leanding_club_loan_rates.csv} +0 -0
  160. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/LICENSE +0 -0
  161. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/WHEEL +0 -0
  162. {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/entry_points.txt +0 -0
@@ -61,13 +61,11 @@ class RegressionModelsPerformanceComparison(Metric):
61
61
  name = "models_performance_comparison"
62
62
  required_inputs = ["dataset", "models"]
63
63
 
64
- metadata = {
65
- "task_types": ["regression"],
66
- "tags": [
67
- "sklearn",
68
- "model_performance",
69
- ],
70
- }
64
+ tasks = ["regression"]
65
+ tags = [
66
+ "sklearn",
67
+ "model_performance",
68
+ ]
71
69
 
72
70
  def regression_errors(self, y_true_test, y_pred_test):
73
71
  mae_test = mean_absolute_error(y_true_test, y_pred_test)
@@ -43,13 +43,11 @@ class RegressionR2Square(Metric):
43
43
 
44
44
  name = "regression_errors_r2_square"
45
45
  required_inputs = ["model", "datasets"]
46
- metadata = {
47
- "task_types": ["regression"],
48
- "tags": [
49
- "sklearn",
50
- "model_performance",
51
- ],
52
- }
46
+ tasks = ["regression"]
47
+ tags = [
48
+ "sklearn",
49
+ "model_performance",
50
+ ]
53
51
 
54
52
  def summary(self, raw_results):
55
53
  """
@@ -0,0 +1,63 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import pandas as pd
6
+ from sklearn import metrics
7
+
8
+ from validmind import tags, tasks
9
+ from validmind.tests.model_validation.statsmodels.statsutils import adj_r2_score
10
+
11
+
12
+ @tags("model_performance", "sklearn")
13
+ @tasks("regression", "time_series_forecasting")
14
+ def RegressionR2SquareComparison(datasets, models):
15
+ """
16
+ Compare R-Squared and Adjusted R-Squared values for each model and generate a summary table
17
+ with the results.
18
+
19
+ **Purpose**: The purpose of this function is to compare the R-Squared and Adjusted R-Squared values for different models applied to various datasets.
20
+
21
+ **Test Mechanism**: The function iterates through each dataset-model pair, calculates the R-Squared and Adjusted R-Squared values, and generates a summary table with these results.
22
+
23
+ **Signs of High Risk**:
24
+ - If the R-Squared values are significantly low, it could indicate that the model is not explaining much of the variability in the dataset.
25
+ - A significant difference between R-Squared and Adjusted R-Squared values might indicate that the model includes irrelevant features.
26
+
27
+ **Strengths**:
28
+ - Provides a quantitative measure of model performance in terms of variance explained.
29
+ - Adjusted R-Squared accounts for the number of predictors, making it a more reliable measure when comparing models with different numbers of features.
30
+
31
+ **Limitations**:
32
+ - Assumes that the dataset is provided as a DataFrameDataset object with `y`, `y_pred`, and `feature_columns` attributes.
33
+ - The function relies on `adj_r2_score` from the `statsmodels.statsutils` module, which should be correctly implemented and imported.
34
+ - Requires that `dataset.y_pred(model)` returns the predicted values for the model.
35
+
36
+ """
37
+ results_list = []
38
+
39
+ for dataset, model in zip(datasets, models):
40
+ dataset_name = dataset.input_id
41
+ model_name = model.input_id
42
+
43
+ y_true = dataset.y
44
+ y_pred = dataset.y_pred(model) # Assuming dataset has X for features
45
+ y_true = y_true.astype(y_pred.dtype)
46
+
47
+ r2s = metrics.r2_score(y_true, y_pred)
48
+ X_columns = dataset.feature_columns
49
+ adj_r2 = adj_r2_score(y_true, y_pred, len(y_true), len(X_columns))
50
+
51
+ # Append results to the list
52
+ results_list.append(
53
+ {
54
+ "Model": model_name,
55
+ "Dataset": dataset_name,
56
+ "R-Squared": r2s,
57
+ "Adjusted R-Squared": adj_r2,
58
+ }
59
+ )
60
+
61
+ # Convert results list to a DataFrame
62
+ results_df = pd.DataFrame(results_list)
63
+ return results_df
@@ -75,20 +75,16 @@ class RobustnessDiagnosis(ThresholdTest):
75
75
  "scaling_factor_std_dev_list": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
76
76
  "accuracy_decay_threshold": 4,
77
77
  }
78
- metadata = {
79
- "task_types": ["classification", "text_classification"],
80
- "tags": [
81
- "sklearn",
82
- "binary_classification",
83
- "multiclass_classification",
84
- "model_diagnosis",
85
- "visualization",
86
- ],
87
- }
88
-
89
- default_metrics = {
90
- "accuracy": metrics.accuracy_score,
91
- }
78
+ tasks = ["classification", "text_classification"]
79
+ tags = [
80
+ "sklearn",
81
+ "binary_classification",
82
+ "multiclass_classification",
83
+ "model_diagnosis",
84
+ "visualization",
85
+ ]
86
+
87
+ default_metrics = {"accuracy": metrics.accuracy_score}
92
88
 
93
89
  def run(self):
94
90
  # Validate X std deviation parameter
@@ -65,16 +65,14 @@ class SHAPGlobalImportance(Metric):
65
65
 
66
66
  name = "shap"
67
67
  required_inputs = ["model", "dataset"]
68
- metadata = {
69
- "task_types": ["classification", "text_classification"],
70
- "tags": [
71
- "sklearn",
72
- "binary_classification",
73
- "multiclass_classification",
74
- "feature_importance",
75
- "visualization",
76
- ],
77
- }
68
+ tasks = ["classification", "text_classification"]
69
+ tags = [
70
+ "sklearn",
71
+ "binary_classification",
72
+ "multiclass_classification",
73
+ "feature_importance",
74
+ "visualization",
75
+ ]
78
76
  default_params = {
79
77
  "kernel_explainer_samples": 10,
80
78
  "tree_or_linear_explainer_samples": 200,
@@ -60,13 +60,11 @@ class SilhouettePlot(Metric):
60
60
 
61
61
  name = "silhouette_plot"
62
62
  required_inputs = ["model", "dataset"]
63
- metadata = {
64
- "task_types": ["clustering"],
65
- "tags": [
66
- "sklearn",
67
- "model_performance",
68
- ],
69
- }
63
+ tasks = ["clustering"]
64
+ tags = [
65
+ "sklearn",
66
+ "model_performance",
67
+ ]
70
68
 
71
69
  def run(self):
72
70
  y_pred_train = self.inputs.dataset.y_pred(self.inputs.model)
@@ -72,16 +72,14 @@ class TrainingTestDegradation(ThresholdTest):
72
72
  "max_threshold": 0.10, # Maximum 10% degradation
73
73
  }
74
74
 
75
- metadata = {
76
- "task_types": ["classification", "text_classification"],
77
- "tags": [
78
- "sklearn",
79
- "binary_classification",
80
- "multiclass_classification",
81
- "model_performance",
82
- "visualization",
83
- ],
84
- }
75
+ tasks = ["classification", "text_classification"]
76
+ tags = [
77
+ "sklearn",
78
+ "binary_classification",
79
+ "multiclass_classification",
80
+ "model_performance",
81
+ "visualization",
82
+ ]
85
83
 
86
84
  default_metrics = {
87
85
  "accuracy": metrics.accuracy_score,
@@ -50,13 +50,11 @@ class VMeasure(ClusterPerformance):
50
50
 
51
51
  name = "v_measure_score"
52
52
  required_inputs = ["model", "datasets"]
53
- metadata = {
54
- "task_types": ["clustering"],
55
- "tags": [
56
- "sklearn",
57
- "model_performance",
58
- ],
59
- }
53
+ tasks = ["clustering"]
54
+ tags = [
55
+ "sklearn",
56
+ "model_performance",
57
+ ]
60
58
 
61
59
  def metric_info(self):
62
60
  return {"V Measure": metrics.v_measure_score}
@@ -85,16 +85,14 @@ class WeakspotsDiagnosis(ThresholdTest):
85
85
  },
86
86
  }
87
87
 
88
- metadata = {
89
- "task_types": ["classification", "text_classification"],
90
- "tags": [
91
- "sklearn",
92
- "binary_classification",
93
- "multiclass_classification",
94
- "model_diagnosis",
95
- "visualization",
96
- ],
97
- }
88
+ tasks = ["classification", "text_classification"]
89
+ tags = [
90
+ "sklearn",
91
+ "binary_classification",
92
+ "multiclass_classification",
93
+ "model_diagnosis",
94
+ "visualization",
95
+ ]
98
96
 
99
97
  # TODO: allow configuring
100
98
  default_metrics = {
@@ -58,10 +58,8 @@ class AutoARIMA(Metric):
58
58
 
59
59
  name = "auto_arima"
60
60
  required_inputs = ["dataset"]
61
- metadata = {
62
- "task_types": ["regression"],
63
- "tags": ["time_series_data", "forecasting", "model_selection", "statsmodels"],
64
- }
61
+ tasks = ["regression"]
62
+ tags = ["time_series_data", "forecasting", "model_selection", "statsmodels"]
65
63
 
66
64
  max_p = 3
67
65
  max_d = 2
@@ -49,10 +49,8 @@ class BoxPierce(Metric):
49
49
 
50
50
  name = "box_pierce"
51
51
  required_inputs = ["dataset"]
52
- metadata = {
53
- "task_types": ["regression"],
54
- "tags": ["time_series_data", "forecasting", "statistical_test", "statsmodels"],
55
- }
52
+ tasks = ["regression"]
53
+ tags = ["time_series_data", "forecasting", "statistical_test", "statsmodels"]
56
54
 
57
55
  def run(self):
58
56
  """
@@ -57,10 +57,9 @@ class CumulativePredictionProbabilities(Metric):
57
57
 
58
58
  name = "cumulative_prediction_probabilities"
59
59
  required_inputs = ["model", "datasets"]
60
- metadata = {
61
- "task_types": ["classification"],
62
- "tags": ["logistic_regression", "visualization"],
63
- }
60
+ tasks = ["classification"]
61
+ tags = ["logistic_regression", "visualization"]
62
+
64
63
  default_params = {"title": "Cumulative Probabilities"}
65
64
 
66
65
  @staticmethod
@@ -44,10 +44,8 @@ class DurbinWatsonTest(Metric):
44
44
 
45
45
  name = "durbin_watson"
46
46
  required_inputs = ["dataset"]
47
- metadata = {
48
- "task_types": ["regression"],
49
- "tags": ["time_series_data", "forecasting", "statistical_test", "statsmodels"],
50
- }
47
+ tasks = ["regression"]
48
+ tags = ["time_series_data", "forecasting", "statistical_test", "statsmodels"]
51
49
 
52
50
  def run(self):
53
51
  """
@@ -59,10 +59,8 @@ class GINITable(Metric):
59
59
 
60
60
  name = "gini_table"
61
61
  required_inputs = ["model", "datasets"]
62
- metadata = {
63
- "task_types": ["classification"],
64
- "tags": ["visualization", "model_performance"],
65
- }
62
+ tasks = ["classification"]
63
+ tags = ["visualization", "model_performance"]
66
64
 
67
65
  def run(self):
68
66
 
@@ -45,15 +45,13 @@ class JarqueBera(Metric):
45
45
 
46
46
  name = "jarque_bera"
47
47
  required_inputs = ["dataset"]
48
- metadata = {
49
- "task_types": ["classification", "regression"],
50
- "tags": [
51
- "tabular_data",
52
- "data_distribution",
53
- "statistical_test",
54
- "statsmodels",
55
- ],
56
- }
48
+ tasks = ["classification", "regression"]
49
+ tags = [
50
+ "tabular_data",
51
+ "data_distribution",
52
+ "statistical_test",
53
+ "statsmodels",
54
+ ]
57
55
 
58
56
  def run(self):
59
57
  """
@@ -52,15 +52,13 @@ class KolmogorovSmirnov(Metric):
52
52
  name = "kolmogorov_smirnov"
53
53
  required_inputs = ["dataset"]
54
54
  default_params = {"dist": "norm"}
55
- metadata = {
56
- "task_types": ["classification", "regression"],
57
- "tags": [
58
- "tabular_data",
59
- "data_distribution",
60
- "statistical_test",
61
- "statsmodels",
62
- ],
63
- }
55
+ tasks = ["classification", "regression"]
56
+ tags = [
57
+ "tabular_data",
58
+ "data_distribution",
59
+ "statistical_test",
60
+ "statsmodels",
61
+ ]
64
62
 
65
63
  def summary(self, metric_value):
66
64
  results_table = metric_value["metrics_summary"]
@@ -45,10 +45,8 @@ class LJungBox(Metric):
45
45
 
46
46
  name = "ljung_box"
47
47
  required_inputs = ["dataset"]
48
- metadata = {
49
- "task_types": ["regression"],
50
- "tags": ["time_series_data", "forecasting", "statistical_test", "statsmodels"],
51
- }
48
+ tasks = ["regression"]
49
+ tags = ["time_series_data", "forecasting", "statistical_test", "statsmodels"]
52
50
 
53
51
  def run(self):
54
52
  """
@@ -56,15 +56,13 @@ class Lilliefors(Metric):
56
56
 
57
57
  name = "lilliefors_test"
58
58
  required_inputs = ["dataset"]
59
- metadata = {
60
- "task_types": ["classification", "regression"],
61
- "tags": [
62
- "tabular_data",
63
- "data_distribution",
64
- "statistical_test",
65
- "statsmodels",
66
- ],
67
- }
59
+ tasks = ["classification", "regression"]
60
+ tags = [
61
+ "tabular_data",
62
+ "data_distribution",
63
+ "statistical_test",
64
+ "statsmodels",
65
+ ]
68
66
 
69
67
  def run(self):
70
68
  """
@@ -58,10 +58,8 @@ class PredictionProbabilitiesHistogram(Metric):
58
58
 
59
59
  name = "prediction_probabilities_histogram"
60
60
  required_inputs = ["model", "datasets"]
61
- metadata = {
62
- "task_types": ["classification"],
63
- "tags": ["tabular_data", "visualization", "credit_risk", "logistic_regression"],
64
- }
61
+ tasks = ["classification"]
62
+ tags = ["tabular_data", "visualization", "credit_risk", "logistic_regression"]
65
63
 
66
64
  default_params = {"title": "Histogram of Predictive Probabilities"}
67
65
 
@@ -55,10 +55,8 @@ class RegressionCoeffsPlot(Metric):
55
55
 
56
56
  name = "regression_coeffs_plot"
57
57
  required_inputs = ["models"]
58
- metadata = {
59
- "task_types": ["regression"],
60
- "tags": ["tabular_data", "visualization", "model_interpretation"],
61
- }
58
+ tasks = ["regression"]
59
+ tags = ["tabular_data", "visualization", "model_interpretation"]
62
60
 
63
61
  @staticmethod
64
62
  def plot_coefficients_with_ci(model, model_name):
@@ -57,15 +57,13 @@ class RegressionFeatureSignificance(Metric):
57
57
  required_inputs = ["models"]
58
58
 
59
59
  default_params = {"fontsize": 10, "p_threshold": 0.05}
60
- metadata = {
61
- "task_types": ["regression"],
62
- "tags": [
63
- "statistical_test",
64
- "model_interpretation",
65
- "visualization",
66
- "feature_importance",
67
- ],
68
- }
60
+ tasks = ["regression"]
61
+ tags = [
62
+ "statistical_test",
63
+ "model_interpretation",
64
+ "visualization",
65
+ "feature_importance",
66
+ ]
69
67
 
70
68
  def run(self):
71
69
  fontsize = self.params["fontsize"]
@@ -58,10 +58,8 @@ class RegressionModelForecastPlot(Metric):
58
58
  name = "regression_forecast_plot"
59
59
  required_inputs = ["models", "datasets"]
60
60
  default_params = {"start_date": None, "end_date": None}
61
- metadata = {
62
- "task_types": ["regression"],
63
- "tags": ["forecasting", "visualization"],
64
- }
61
+ tasks = ["regression"]
62
+ tags = ["forecasting", "visualization"]
65
63
 
66
64
  def run(self):
67
65
  start_date = self.params["start_date"]
@@ -64,10 +64,8 @@ class RegressionModelForecastPlotLevels(Metric):
64
64
  default_params = {
65
65
  "transformation": None,
66
66
  }
67
- metadata = {
68
- "task_types": ["regression"],
69
- "tags": ["forecasting", "visualization"],
70
- }
67
+ tasks = ["regression"]
68
+ tags = ["forecasting", "visualization"]
71
69
 
72
70
  def run(self):
73
71
  transformation = self.params["transformation"]
@@ -62,10 +62,8 @@ class RegressionModelSensitivityPlot(Metric):
62
62
  "transformation": None,
63
63
  "shocks": [0.1],
64
64
  }
65
- metadata = {
66
- "task_types": ["regression"],
67
- "tags": ["senstivity_analysis", "visualization"],
68
- }
65
+ tasks = ["regression"]
66
+ tags = ["senstivity_analysis", "visualization"]
69
67
 
70
68
  def run(self):
71
69
  logger.info(self.params)
@@ -51,10 +51,8 @@ class RegressionModelSummary(Metric):
51
51
 
52
52
  name = "regression_model_summary"
53
53
  required_inputs = ["model", "dataset"]
54
- metadata = {
55
- "task_types": ["regression"],
56
- "tags": ["model_metadata", "model_comparison"],
57
- }
54
+ tasks = ["regression"]
55
+ tags = ["model_metadata", "model_comparison"]
58
56
 
59
57
  def run(self):
60
58
  X_columns = self.inputs.dataset.feature_columns
@@ -46,10 +46,8 @@ class RegressionModelsCoeffs(Metric):
46
46
 
47
47
  name = "regression_models_coefficients"
48
48
  required_inputs = ["models"]
49
- metadata = {
50
- "task_types": ["regression"],
51
- "tags": ["model_comparison"],
52
- }
49
+ tasks = ["regression"]
50
+ tags = ["model_comparison"]
53
51
 
54
52
  def _build_model_summaries(self, all_coefficients):
55
53
  all_models_df = pd.DataFrame()
@@ -55,14 +55,12 @@ class RegressionPermutationFeatureImportance(Metric):
55
55
  "fontsize": 12,
56
56
  "figure_height": 500,
57
57
  }
58
- metadata = {
59
- "task_types": ["regression"],
60
- "tags": [
61
- "statsmodels",
62
- "feature_importance",
63
- "visualization",
64
- ],
65
- }
58
+ tasks = ["regression"]
59
+ tags = [
60
+ "statsmodels",
61
+ "feature_importance",
62
+ "visualization",
63
+ ]
66
64
 
67
65
  def run(self):
68
66
  x = self.inputs.dataset.x_df()
@@ -50,10 +50,8 @@ class RunsTest(Metric):
50
50
 
51
51
  name = "runs_test"
52
52
  required_inputs = ["dataset"]
53
- metadata = {
54
- "task_types": ["classification", "regression"],
55
- "tags": ["tabular_data", "statistical_test", "statsmodels"],
56
- }
53
+ tasks = ["classification", "regression"]
54
+ tags = ["tabular_data", "statistical_test", "statsmodels"]
57
55
 
58
56
  def run(self):
59
57
  """
@@ -52,10 +52,9 @@ class ScorecardHistogram(Metric):
52
52
 
53
53
  name = "scorecard_histogram"
54
54
  required_inputs = ["datasets"]
55
- metadata = {
56
- "task_types": ["classification"],
57
- "tags": ["tabular_data", "visualization", "credit_risk"],
58
- }
55
+ tasks = ["classification"]
56
+ tags = ["tabular_data", "visualization", "credit_risk"]
57
+
59
58
  default_params = {
60
59
  "title": "Histogram of Scores",
61
60
  "score_column": "score",
@@ -44,10 +44,8 @@ class ShapiroWilk(Metric):
44
44
 
45
45
  name = "shapiro_wilk"
46
46
  required_inputs = ["dataset"]
47
- metadata = {
48
- "task_types": ["classification", "regression"],
49
- "tags": ["tabular_data", "data_distribution", "statistical_test"],
50
- }
47
+ tasks = ["classification", "regression"]
48
+ tags = ["tabular_data", "data_distribution", "statistical_test"]
51
49
 
52
50
  def run(self):
53
51
  """
@@ -75,10 +75,8 @@ class Bias(ThresholdTest):
75
75
  name = "bias"
76
76
  required_inputs = ["model.prompt"]
77
77
  default_params = {"min_threshold": 7}
78
- metadata = {
79
- "task_types": ["text_classification", "text_summarization"],
80
- "tags": ["llm", "few_shot"],
81
- }
78
+ tasks = ["text_classification", "text_summarization"]
79
+ tags = ["llm", "few_shot"]
82
80
 
83
81
  system_prompt = """
84
82
  You are a prompt evaluation AI. You are aware of all prompt engineering best practices and can score prompts based on how well they satisfy different best practices. You analyse the prompts step-by-step based on provided documentation and provide a score and an explanation for how you produced that score.
@@ -64,10 +64,8 @@ class Clarity(ThresholdTest):
64
64
  name = "clarity"
65
65
  required_inputs = ["model.prompt"]
66
66
  default_params = {"min_threshold": 7}
67
- metadata = {
68
- "task_types": ["text_classification", "text_summarization"],
69
- "tags": ["llm", "zero_shot", "few_shot"],
70
- }
67
+ tasks = ["text_classification", "text_summarization"]
68
+ tags = ["llm", "zero_shot", "few_shot"]
71
69
 
72
70
  system_prompt = """
73
71
  You are a prompt evaluation AI. You are aware of all prompt engineering best practices and can score prompts based on how well they satisfy different metrics. You analyse the prompts step-by-step based on provided documentation and provide a score and an explanation for how you produced that score.