validmind 2.3.3__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +8 -1
- validmind/ai/utils.py +2 -1
- validmind/client.py +1 -0
- validmind/datasets/regression/fred_timeseries.py +272 -0
- validmind/tests/__init__.py +14 -468
- validmind/tests/__types__.py +10 -0
- validmind/tests/_store.py +102 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +7 -9
- validmind/tests/data_validation/ADF.py +8 -10
- validmind/tests/data_validation/ANOVAOneWayTable.py +8 -10
- validmind/tests/data_validation/AutoAR.py +2 -4
- validmind/tests/data_validation/AutoMA.py +2 -4
- validmind/tests/data_validation/AutoSeasonality.py +8 -10
- validmind/tests/data_validation/AutoStationarity.py +8 -10
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +8 -10
- validmind/tests/data_validation/BivariateHistograms.py +8 -10
- validmind/tests/data_validation/BivariateScatterPlots.py +8 -10
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +8 -10
- validmind/tests/data_validation/ClassImbalance.py +2 -4
- validmind/tests/data_validation/DFGLSArch.py +2 -4
- validmind/tests/data_validation/DatasetDescription.py +7 -9
- validmind/tests/data_validation/DatasetSplit.py +8 -9
- validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
- validmind/tests/data_validation/Duplicates.py +2 -4
- validmind/tests/data_validation/EngleGrangerCoint.py +2 -4
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +2 -4
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +2 -4
- validmind/tests/data_validation/HighCardinality.py +2 -4
- validmind/tests/data_validation/HighPearsonCorrelation.py +2 -4
- validmind/tests/data_validation/IQROutliersBarPlot.py +2 -4
- validmind/tests/data_validation/IQROutliersTable.py +2 -4
- validmind/tests/data_validation/IsolationForestOutliers.py +2 -4
- validmind/tests/data_validation/KPSS.py +8 -10
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -4
- validmind/tests/data_validation/MissingValues.py +2 -4
- validmind/tests/data_validation/MissingValuesBarPlot.py +2 -4
- validmind/tests/data_validation/MissingValuesRisk.py +2 -4
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -4
- validmind/tests/data_validation/PhillipsPerronArch.py +7 -9
- validmind/tests/data_validation/RollingStatsPlot.py +2 -4
- validmind/tests/data_validation/ScatterPlot.py +2 -4
- validmind/tests/data_validation/SeasonalDecompose.py +70 -44
- validmind/tests/data_validation/Skewness.py +2 -4
- validmind/tests/data_validation/SpreadPlot.py +2 -4
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +2 -4
- validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -4
- validmind/tests/data_validation/TabularDescriptionTables.py +2 -4
- validmind/tests/data_validation/TabularNumericalHistograms.py +2 -4
- validmind/tests/data_validation/TargetRateBarPlots.py +2 -4
- validmind/tests/data_validation/TimeSeriesDescription.py +74 -0
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +76 -0
- validmind/tests/data_validation/TimeSeriesFrequency.py +2 -4
- validmind/tests/data_validation/TimeSeriesHistogram.py +29 -45
- validmind/tests/data_validation/TimeSeriesLinePlot.py +2 -4
- validmind/tests/data_validation/TimeSeriesMissingValues.py +2 -4
- validmind/tests/data_validation/TimeSeriesOutliers.py +32 -45
- validmind/tests/data_validation/TooManyZeroValues.py +2 -4
- validmind/tests/data_validation/UniqueRows.py +2 -4
- validmind/tests/data_validation/WOEBinPlots.py +2 -4
- validmind/tests/data_validation/WOEBinTable.py +2 -4
- validmind/tests/data_validation/ZivotAndrewsArch.py +2 -4
- validmind/tests/data_validation/nlp/CommonWords.py +2 -4
- validmind/tests/data_validation/nlp/Hashtags.py +2 -4
- validmind/tests/data_validation/nlp/Mentions.py +2 -4
- validmind/tests/data_validation/nlp/Punctuations.py +2 -4
- validmind/tests/data_validation/nlp/StopWords.py +2 -4
- validmind/tests/data_validation/nlp/TextDescription.py +2 -4
- validmind/tests/decorator.py +10 -8
- validmind/tests/load.py +264 -0
- validmind/tests/metadata.py +59 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -7
- validmind/tests/model_validation/FeaturesAUC.py +6 -8
- validmind/tests/model_validation/ModelMetadata.py +8 -9
- validmind/tests/model_validation/ModelMetadataComparison.py +59 -0
- validmind/tests/model_validation/ModelPredictionResiduals.py +103 -0
- validmind/tests/model_validation/RegressionResidualsPlot.py +2 -6
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +131 -0
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +76 -0
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +103 -0
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +2 -4
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +2 -4
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -4
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -4
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +2 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -7
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -7
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +7 -9
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -7
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +5 -7
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +2 -7
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -7
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +19 -10
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +83 -0
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -7
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -7
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +4 -7
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +7 -9
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +7 -9
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +7 -9
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +8 -10
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +7 -9
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +9 -11
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +7 -9
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +8 -10
- validmind/tests/model_validation/sklearn/ROCCurve.py +10 -11
- validmind/tests/model_validation/sklearn/RegressionErrors.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +76 -0
- validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +63 -0
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +10 -14
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +8 -10
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -7
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +8 -10
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -7
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +8 -10
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +2 -4
- validmind/tests/model_validation/statsmodels/BoxPierce.py +2 -4
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +3 -4
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +2 -4
- validmind/tests/model_validation/statsmodels/GINITable.py +2 -4
- validmind/tests/model_validation/statsmodels/JarqueBera.py +7 -9
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +7 -9
- validmind/tests/model_validation/statsmodels/LJungBox.py +2 -4
- validmind/tests/model_validation/statsmodels/Lilliefors.py +7 -9
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +6 -8
- validmind/tests/model_validation/statsmodels/RunsTest.py +2 -4
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +3 -4
- validmind/tests/model_validation/statsmodels/ShapiroWilk.py +2 -4
- validmind/tests/prompt_validation/Bias.py +2 -4
- validmind/tests/prompt_validation/Clarity.py +2 -4
- validmind/tests/prompt_validation/Conciseness.py +2 -4
- validmind/tests/prompt_validation/Delimitation.py +2 -4
- validmind/tests/prompt_validation/NegativeInstruction.py +2 -4
- validmind/tests/prompt_validation/Robustness.py +2 -4
- validmind/tests/prompt_validation/Specificity.py +2 -4
- validmind/tests/run.py +394 -0
- validmind/tests/test_providers.py +12 -0
- validmind/tests/utils.py +16 -0
- validmind/unit_metrics/__init__.py +12 -4
- validmind/unit_metrics/composite.py +3 -0
- validmind/vm_models/test/metric.py +8 -5
- validmind/vm_models/test/result_wrapper.py +2 -1
- validmind/vm_models/test/test.py +14 -11
- validmind/vm_models/test/threshold_test.py +1 -0
- validmind/vm_models/test_suite/runner.py +1 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/METADATA +70 -36
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/RECORD +162 -146
- /validmind/datasets/regression/datasets/{lending_club_loan_rates.csv → leanding_club_loan_rates.csv} +0 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/LICENSE +0 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/WHEEL +0 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/entry_points.txt +0 -0
@@ -61,13 +61,11 @@ class RegressionModelsPerformanceComparison(Metric):
|
|
61
61
|
name = "models_performance_comparison"
|
62
62
|
required_inputs = ["dataset", "models"]
|
63
63
|
|
64
|
-
|
65
|
-
|
66
|
-
"
|
67
|
-
|
68
|
-
|
69
|
-
],
|
70
|
-
}
|
64
|
+
tasks = ["regression"]
|
65
|
+
tags = [
|
66
|
+
"sklearn",
|
67
|
+
"model_performance",
|
68
|
+
]
|
71
69
|
|
72
70
|
def regression_errors(self, y_true_test, y_pred_test):
|
73
71
|
mae_test = mean_absolute_error(y_true_test, y_pred_test)
|
@@ -43,13 +43,11 @@ class RegressionR2Square(Metric):
|
|
43
43
|
|
44
44
|
name = "regression_errors_r2_square"
|
45
45
|
required_inputs = ["model", "datasets"]
|
46
|
-
|
47
|
-
|
48
|
-
"
|
49
|
-
|
50
|
-
|
51
|
-
],
|
52
|
-
}
|
46
|
+
tasks = ["regression"]
|
47
|
+
tags = [
|
48
|
+
"sklearn",
|
49
|
+
"model_performance",
|
50
|
+
]
|
53
51
|
|
54
52
|
def summary(self, raw_results):
|
55
53
|
"""
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
from sklearn import metrics
|
7
|
+
|
8
|
+
from validmind import tags, tasks
|
9
|
+
from validmind.tests.model_validation.statsmodels.statsutils import adj_r2_score
|
10
|
+
|
11
|
+
|
12
|
+
@tags("model_performance", "sklearn")
|
13
|
+
@tasks("regression", "time_series_forecasting")
|
14
|
+
def RegressionR2SquareComparison(datasets, models):
|
15
|
+
"""
|
16
|
+
Compare R-Squared and Adjusted R-Squared values for each model and generate a summary table
|
17
|
+
with the results.
|
18
|
+
|
19
|
+
**Purpose**: The purpose of this function is to compare the R-Squared and Adjusted R-Squared values for different models applied to various datasets.
|
20
|
+
|
21
|
+
**Test Mechanism**: The function iterates through each dataset-model pair, calculates the R-Squared and Adjusted R-Squared values, and generates a summary table with these results.
|
22
|
+
|
23
|
+
**Signs of High Risk**:
|
24
|
+
- If the R-Squared values are significantly low, it could indicate that the model is not explaining much of the variability in the dataset.
|
25
|
+
- A significant difference between R-Squared and Adjusted R-Squared values might indicate that the model includes irrelevant features.
|
26
|
+
|
27
|
+
**Strengths**:
|
28
|
+
- Provides a quantitative measure of model performance in terms of variance explained.
|
29
|
+
- Adjusted R-Squared accounts for the number of predictors, making it a more reliable measure when comparing models with different numbers of features.
|
30
|
+
|
31
|
+
**Limitations**:
|
32
|
+
- Assumes that the dataset is provided as a DataFrameDataset object with `y`, `y_pred`, and `feature_columns` attributes.
|
33
|
+
- The function relies on `adj_r2_score` from the `statsmodels.statsutils` module, which should be correctly implemented and imported.
|
34
|
+
- Requires that `dataset.y_pred(model)` returns the predicted values for the model.
|
35
|
+
|
36
|
+
"""
|
37
|
+
results_list = []
|
38
|
+
|
39
|
+
for dataset, model in zip(datasets, models):
|
40
|
+
dataset_name = dataset.input_id
|
41
|
+
model_name = model.input_id
|
42
|
+
|
43
|
+
y_true = dataset.y
|
44
|
+
y_pred = dataset.y_pred(model) # Assuming dataset has X for features
|
45
|
+
y_true = y_true.astype(y_pred.dtype)
|
46
|
+
|
47
|
+
r2s = metrics.r2_score(y_true, y_pred)
|
48
|
+
X_columns = dataset.feature_columns
|
49
|
+
adj_r2 = adj_r2_score(y_true, y_pred, len(y_true), len(X_columns))
|
50
|
+
|
51
|
+
# Append results to the list
|
52
|
+
results_list.append(
|
53
|
+
{
|
54
|
+
"Model": model_name,
|
55
|
+
"Dataset": dataset_name,
|
56
|
+
"R-Squared": r2s,
|
57
|
+
"Adjusted R-Squared": adj_r2,
|
58
|
+
}
|
59
|
+
)
|
60
|
+
|
61
|
+
# Convert results list to a DataFrame
|
62
|
+
results_df = pd.DataFrame(results_list)
|
63
|
+
return results_df
|
@@ -75,20 +75,16 @@ class RobustnessDiagnosis(ThresholdTest):
|
|
75
75
|
"scaling_factor_std_dev_list": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
|
76
76
|
"accuracy_decay_threshold": 4,
|
77
77
|
}
|
78
|
-
|
79
|
-
|
80
|
-
"
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
}
|
88
|
-
|
89
|
-
default_metrics = {
|
90
|
-
"accuracy": metrics.accuracy_score,
|
91
|
-
}
|
78
|
+
tasks = ["classification", "text_classification"]
|
79
|
+
tags = [
|
80
|
+
"sklearn",
|
81
|
+
"binary_classification",
|
82
|
+
"multiclass_classification",
|
83
|
+
"model_diagnosis",
|
84
|
+
"visualization",
|
85
|
+
]
|
86
|
+
|
87
|
+
default_metrics = {"accuracy": metrics.accuracy_score}
|
92
88
|
|
93
89
|
def run(self):
|
94
90
|
# Validate X std deviation parameter
|
@@ -65,16 +65,14 @@ class SHAPGlobalImportance(Metric):
|
|
65
65
|
|
66
66
|
name = "shap"
|
67
67
|
required_inputs = ["model", "dataset"]
|
68
|
-
|
69
|
-
|
70
|
-
"
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
],
|
77
|
-
}
|
68
|
+
tasks = ["classification", "text_classification"]
|
69
|
+
tags = [
|
70
|
+
"sklearn",
|
71
|
+
"binary_classification",
|
72
|
+
"multiclass_classification",
|
73
|
+
"feature_importance",
|
74
|
+
"visualization",
|
75
|
+
]
|
78
76
|
default_params = {
|
79
77
|
"kernel_explainer_samples": 10,
|
80
78
|
"tree_or_linear_explainer_samples": 200,
|
@@ -60,13 +60,11 @@ class SilhouettePlot(Metric):
|
|
60
60
|
|
61
61
|
name = "silhouette_plot"
|
62
62
|
required_inputs = ["model", "dataset"]
|
63
|
-
|
64
|
-
|
65
|
-
"
|
66
|
-
|
67
|
-
|
68
|
-
],
|
69
|
-
}
|
63
|
+
tasks = ["clustering"]
|
64
|
+
tags = [
|
65
|
+
"sklearn",
|
66
|
+
"model_performance",
|
67
|
+
]
|
70
68
|
|
71
69
|
def run(self):
|
72
70
|
y_pred_train = self.inputs.dataset.y_pred(self.inputs.model)
|
@@ -72,16 +72,14 @@ class TrainingTestDegradation(ThresholdTest):
|
|
72
72
|
"max_threshold": 0.10, # Maximum 10% degradation
|
73
73
|
}
|
74
74
|
|
75
|
-
|
76
|
-
|
77
|
-
"
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
],
|
84
|
-
}
|
75
|
+
tasks = ["classification", "text_classification"]
|
76
|
+
tags = [
|
77
|
+
"sklearn",
|
78
|
+
"binary_classification",
|
79
|
+
"multiclass_classification",
|
80
|
+
"model_performance",
|
81
|
+
"visualization",
|
82
|
+
]
|
85
83
|
|
86
84
|
default_metrics = {
|
87
85
|
"accuracy": metrics.accuracy_score,
|
@@ -50,13 +50,11 @@ class VMeasure(ClusterPerformance):
|
|
50
50
|
|
51
51
|
name = "v_measure_score"
|
52
52
|
required_inputs = ["model", "datasets"]
|
53
|
-
|
54
|
-
|
55
|
-
"
|
56
|
-
|
57
|
-
|
58
|
-
],
|
59
|
-
}
|
53
|
+
tasks = ["clustering"]
|
54
|
+
tags = [
|
55
|
+
"sklearn",
|
56
|
+
"model_performance",
|
57
|
+
]
|
60
58
|
|
61
59
|
def metric_info(self):
|
62
60
|
return {"V Measure": metrics.v_measure_score}
|
@@ -85,16 +85,14 @@ class WeakspotsDiagnosis(ThresholdTest):
|
|
85
85
|
},
|
86
86
|
}
|
87
87
|
|
88
|
-
|
89
|
-
|
90
|
-
"
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
],
|
97
|
-
}
|
88
|
+
tasks = ["classification", "text_classification"]
|
89
|
+
tags = [
|
90
|
+
"sklearn",
|
91
|
+
"binary_classification",
|
92
|
+
"multiclass_classification",
|
93
|
+
"model_diagnosis",
|
94
|
+
"visualization",
|
95
|
+
]
|
98
96
|
|
99
97
|
# TODO: allow configuring
|
100
98
|
default_metrics = {
|
@@ -58,10 +58,8 @@ class AutoARIMA(Metric):
|
|
58
58
|
|
59
59
|
name = "auto_arima"
|
60
60
|
required_inputs = ["dataset"]
|
61
|
-
|
62
|
-
|
63
|
-
"tags": ["time_series_data", "forecasting", "model_selection", "statsmodels"],
|
64
|
-
}
|
61
|
+
tasks = ["regression"]
|
62
|
+
tags = ["time_series_data", "forecasting", "model_selection", "statsmodels"]
|
65
63
|
|
66
64
|
max_p = 3
|
67
65
|
max_d = 2
|
@@ -49,10 +49,8 @@ class BoxPierce(Metric):
|
|
49
49
|
|
50
50
|
name = "box_pierce"
|
51
51
|
required_inputs = ["dataset"]
|
52
|
-
|
53
|
-
|
54
|
-
"tags": ["time_series_data", "forecasting", "statistical_test", "statsmodels"],
|
55
|
-
}
|
52
|
+
tasks = ["regression"]
|
53
|
+
tags = ["time_series_data", "forecasting", "statistical_test", "statsmodels"]
|
56
54
|
|
57
55
|
def run(self):
|
58
56
|
"""
|
@@ -57,10 +57,9 @@ class CumulativePredictionProbabilities(Metric):
|
|
57
57
|
|
58
58
|
name = "cumulative_prediction_probabilities"
|
59
59
|
required_inputs = ["model", "datasets"]
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
}
|
60
|
+
tasks = ["classification"]
|
61
|
+
tags = ["logistic_regression", "visualization"]
|
62
|
+
|
64
63
|
default_params = {"title": "Cumulative Probabilities"}
|
65
64
|
|
66
65
|
@staticmethod
|
@@ -44,10 +44,8 @@ class DurbinWatsonTest(Metric):
|
|
44
44
|
|
45
45
|
name = "durbin_watson"
|
46
46
|
required_inputs = ["dataset"]
|
47
|
-
|
48
|
-
|
49
|
-
"tags": ["time_series_data", "forecasting", "statistical_test", "statsmodels"],
|
50
|
-
}
|
47
|
+
tasks = ["regression"]
|
48
|
+
tags = ["time_series_data", "forecasting", "statistical_test", "statsmodels"]
|
51
49
|
|
52
50
|
def run(self):
|
53
51
|
"""
|
@@ -59,10 +59,8 @@ class GINITable(Metric):
|
|
59
59
|
|
60
60
|
name = "gini_table"
|
61
61
|
required_inputs = ["model", "datasets"]
|
62
|
-
|
63
|
-
|
64
|
-
"tags": ["visualization", "model_performance"],
|
65
|
-
}
|
62
|
+
tasks = ["classification"]
|
63
|
+
tags = ["visualization", "model_performance"]
|
66
64
|
|
67
65
|
def run(self):
|
68
66
|
|
@@ -45,15 +45,13 @@ class JarqueBera(Metric):
|
|
45
45
|
|
46
46
|
name = "jarque_bera"
|
47
47
|
required_inputs = ["dataset"]
|
48
|
-
|
49
|
-
|
50
|
-
"
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
],
|
56
|
-
}
|
48
|
+
tasks = ["classification", "regression"]
|
49
|
+
tags = [
|
50
|
+
"tabular_data",
|
51
|
+
"data_distribution",
|
52
|
+
"statistical_test",
|
53
|
+
"statsmodels",
|
54
|
+
]
|
57
55
|
|
58
56
|
def run(self):
|
59
57
|
"""
|
@@ -52,15 +52,13 @@ class KolmogorovSmirnov(Metric):
|
|
52
52
|
name = "kolmogorov_smirnov"
|
53
53
|
required_inputs = ["dataset"]
|
54
54
|
default_params = {"dist": "norm"}
|
55
|
-
|
56
|
-
|
57
|
-
"
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
],
|
63
|
-
}
|
55
|
+
tasks = ["classification", "regression"]
|
56
|
+
tags = [
|
57
|
+
"tabular_data",
|
58
|
+
"data_distribution",
|
59
|
+
"statistical_test",
|
60
|
+
"statsmodels",
|
61
|
+
]
|
64
62
|
|
65
63
|
def summary(self, metric_value):
|
66
64
|
results_table = metric_value["metrics_summary"]
|
@@ -45,10 +45,8 @@ class LJungBox(Metric):
|
|
45
45
|
|
46
46
|
name = "ljung_box"
|
47
47
|
required_inputs = ["dataset"]
|
48
|
-
|
49
|
-
|
50
|
-
"tags": ["time_series_data", "forecasting", "statistical_test", "statsmodels"],
|
51
|
-
}
|
48
|
+
tasks = ["regression"]
|
49
|
+
tags = ["time_series_data", "forecasting", "statistical_test", "statsmodels"]
|
52
50
|
|
53
51
|
def run(self):
|
54
52
|
"""
|
@@ -56,15 +56,13 @@ class Lilliefors(Metric):
|
|
56
56
|
|
57
57
|
name = "lilliefors_test"
|
58
58
|
required_inputs = ["dataset"]
|
59
|
-
|
60
|
-
|
61
|
-
"
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
],
|
67
|
-
}
|
59
|
+
tasks = ["classification", "regression"]
|
60
|
+
tags = [
|
61
|
+
"tabular_data",
|
62
|
+
"data_distribution",
|
63
|
+
"statistical_test",
|
64
|
+
"statsmodels",
|
65
|
+
]
|
68
66
|
|
69
67
|
def run(self):
|
70
68
|
"""
|
@@ -58,10 +58,8 @@ class PredictionProbabilitiesHistogram(Metric):
|
|
58
58
|
|
59
59
|
name = "prediction_probabilities_histogram"
|
60
60
|
required_inputs = ["model", "datasets"]
|
61
|
-
|
62
|
-
|
63
|
-
"tags": ["tabular_data", "visualization", "credit_risk", "logistic_regression"],
|
64
|
-
}
|
61
|
+
tasks = ["classification"]
|
62
|
+
tags = ["tabular_data", "visualization", "credit_risk", "logistic_regression"]
|
65
63
|
|
66
64
|
default_params = {"title": "Histogram of Predictive Probabilities"}
|
67
65
|
|
@@ -55,10 +55,8 @@ class RegressionCoeffsPlot(Metric):
|
|
55
55
|
|
56
56
|
name = "regression_coeffs_plot"
|
57
57
|
required_inputs = ["models"]
|
58
|
-
|
59
|
-
|
60
|
-
"tags": ["tabular_data", "visualization", "model_interpretation"],
|
61
|
-
}
|
58
|
+
tasks = ["regression"]
|
59
|
+
tags = ["tabular_data", "visualization", "model_interpretation"]
|
62
60
|
|
63
61
|
@staticmethod
|
64
62
|
def plot_coefficients_with_ci(model, model_name):
|
@@ -57,15 +57,13 @@ class RegressionFeatureSignificance(Metric):
|
|
57
57
|
required_inputs = ["models"]
|
58
58
|
|
59
59
|
default_params = {"fontsize": 10, "p_threshold": 0.05}
|
60
|
-
|
61
|
-
|
62
|
-
"
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
],
|
68
|
-
}
|
60
|
+
tasks = ["regression"]
|
61
|
+
tags = [
|
62
|
+
"statistical_test",
|
63
|
+
"model_interpretation",
|
64
|
+
"visualization",
|
65
|
+
"feature_importance",
|
66
|
+
]
|
69
67
|
|
70
68
|
def run(self):
|
71
69
|
fontsize = self.params["fontsize"]
|
@@ -58,10 +58,8 @@ class RegressionModelForecastPlot(Metric):
|
|
58
58
|
name = "regression_forecast_plot"
|
59
59
|
required_inputs = ["models", "datasets"]
|
60
60
|
default_params = {"start_date": None, "end_date": None}
|
61
|
-
|
62
|
-
|
63
|
-
"tags": ["forecasting", "visualization"],
|
64
|
-
}
|
61
|
+
tasks = ["regression"]
|
62
|
+
tags = ["forecasting", "visualization"]
|
65
63
|
|
66
64
|
def run(self):
|
67
65
|
start_date = self.params["start_date"]
|
@@ -64,10 +64,8 @@ class RegressionModelForecastPlotLevels(Metric):
|
|
64
64
|
default_params = {
|
65
65
|
"transformation": None,
|
66
66
|
}
|
67
|
-
|
68
|
-
|
69
|
-
"tags": ["forecasting", "visualization"],
|
70
|
-
}
|
67
|
+
tasks = ["regression"]
|
68
|
+
tags = ["forecasting", "visualization"]
|
71
69
|
|
72
70
|
def run(self):
|
73
71
|
transformation = self.params["transformation"]
|
@@ -62,10 +62,8 @@ class RegressionModelSensitivityPlot(Metric):
|
|
62
62
|
"transformation": None,
|
63
63
|
"shocks": [0.1],
|
64
64
|
}
|
65
|
-
|
66
|
-
|
67
|
-
"tags": ["senstivity_analysis", "visualization"],
|
68
|
-
}
|
65
|
+
tasks = ["regression"]
|
66
|
+
tags = ["senstivity_analysis", "visualization"]
|
69
67
|
|
70
68
|
def run(self):
|
71
69
|
logger.info(self.params)
|
@@ -51,10 +51,8 @@ class RegressionModelSummary(Metric):
|
|
51
51
|
|
52
52
|
name = "regression_model_summary"
|
53
53
|
required_inputs = ["model", "dataset"]
|
54
|
-
|
55
|
-
|
56
|
-
"tags": ["model_metadata", "model_comparison"],
|
57
|
-
}
|
54
|
+
tasks = ["regression"]
|
55
|
+
tags = ["model_metadata", "model_comparison"]
|
58
56
|
|
59
57
|
def run(self):
|
60
58
|
X_columns = self.inputs.dataset.feature_columns
|
@@ -46,10 +46,8 @@ class RegressionModelsCoeffs(Metric):
|
|
46
46
|
|
47
47
|
name = "regression_models_coefficients"
|
48
48
|
required_inputs = ["models"]
|
49
|
-
|
50
|
-
|
51
|
-
"tags": ["model_comparison"],
|
52
|
-
}
|
49
|
+
tasks = ["regression"]
|
50
|
+
tags = ["model_comparison"]
|
53
51
|
|
54
52
|
def _build_model_summaries(self, all_coefficients):
|
55
53
|
all_models_df = pd.DataFrame()
|
@@ -55,14 +55,12 @@ class RegressionPermutationFeatureImportance(Metric):
|
|
55
55
|
"fontsize": 12,
|
56
56
|
"figure_height": 500,
|
57
57
|
}
|
58
|
-
|
59
|
-
|
60
|
-
"
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
],
|
65
|
-
}
|
58
|
+
tasks = ["regression"]
|
59
|
+
tags = [
|
60
|
+
"statsmodels",
|
61
|
+
"feature_importance",
|
62
|
+
"visualization",
|
63
|
+
]
|
66
64
|
|
67
65
|
def run(self):
|
68
66
|
x = self.inputs.dataset.x_df()
|
@@ -50,10 +50,8 @@ class RunsTest(Metric):
|
|
50
50
|
|
51
51
|
name = "runs_test"
|
52
52
|
required_inputs = ["dataset"]
|
53
|
-
|
54
|
-
|
55
|
-
"tags": ["tabular_data", "statistical_test", "statsmodels"],
|
56
|
-
}
|
53
|
+
tasks = ["classification", "regression"]
|
54
|
+
tags = ["tabular_data", "statistical_test", "statsmodels"]
|
57
55
|
|
58
56
|
def run(self):
|
59
57
|
"""
|
@@ -52,10 +52,9 @@ class ScorecardHistogram(Metric):
|
|
52
52
|
|
53
53
|
name = "scorecard_histogram"
|
54
54
|
required_inputs = ["datasets"]
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
}
|
55
|
+
tasks = ["classification"]
|
56
|
+
tags = ["tabular_data", "visualization", "credit_risk"]
|
57
|
+
|
59
58
|
default_params = {
|
60
59
|
"title": "Histogram of Scores",
|
61
60
|
"score_column": "score",
|
@@ -44,10 +44,8 @@ class ShapiroWilk(Metric):
|
|
44
44
|
|
45
45
|
name = "shapiro_wilk"
|
46
46
|
required_inputs = ["dataset"]
|
47
|
-
|
48
|
-
|
49
|
-
"tags": ["tabular_data", "data_distribution", "statistical_test"],
|
50
|
-
}
|
47
|
+
tasks = ["classification", "regression"]
|
48
|
+
tags = ["tabular_data", "data_distribution", "statistical_test"]
|
51
49
|
|
52
50
|
def run(self):
|
53
51
|
"""
|
@@ -75,10 +75,8 @@ class Bias(ThresholdTest):
|
|
75
75
|
name = "bias"
|
76
76
|
required_inputs = ["model.prompt"]
|
77
77
|
default_params = {"min_threshold": 7}
|
78
|
-
|
79
|
-
|
80
|
-
"tags": ["llm", "few_shot"],
|
81
|
-
}
|
78
|
+
tasks = ["text_classification", "text_summarization"]
|
79
|
+
tags = ["llm", "few_shot"]
|
82
80
|
|
83
81
|
system_prompt = """
|
84
82
|
You are a prompt evaluation AI. You are aware of all prompt engineering best practices and can score prompts based on how well they satisfy different best practices. You analyse the prompts step-by-step based on provided documentation and provide a score and an explanation for how you produced that score.
|
@@ -64,10 +64,8 @@ class Clarity(ThresholdTest):
|
|
64
64
|
name = "clarity"
|
65
65
|
required_inputs = ["model.prompt"]
|
66
66
|
default_params = {"min_threshold": 7}
|
67
|
-
|
68
|
-
|
69
|
-
"tags": ["llm", "zero_shot", "few_shot"],
|
70
|
-
}
|
67
|
+
tasks = ["text_classification", "text_summarization"]
|
68
|
+
tags = ["llm", "zero_shot", "few_shot"]
|
71
69
|
|
72
70
|
system_prompt = """
|
73
71
|
You are a prompt evaluation AI. You are aware of all prompt engineering best practices and can score prompts based on how well they satisfy different metrics. You analyse the prompts step-by-step based on provided documentation and provide a score and an explanation for how you produced that score.
|