validmind 2.3.3__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +8 -1
- validmind/ai/utils.py +2 -1
- validmind/client.py +1 -0
- validmind/datasets/regression/fred_timeseries.py +272 -0
- validmind/tests/__init__.py +14 -468
- validmind/tests/__types__.py +10 -0
- validmind/tests/_store.py +102 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +7 -9
- validmind/tests/data_validation/ADF.py +8 -10
- validmind/tests/data_validation/ANOVAOneWayTable.py +8 -10
- validmind/tests/data_validation/AutoAR.py +2 -4
- validmind/tests/data_validation/AutoMA.py +2 -4
- validmind/tests/data_validation/AutoSeasonality.py +8 -10
- validmind/tests/data_validation/AutoStationarity.py +8 -10
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +8 -10
- validmind/tests/data_validation/BivariateHistograms.py +8 -10
- validmind/tests/data_validation/BivariateScatterPlots.py +8 -10
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +8 -10
- validmind/tests/data_validation/ClassImbalance.py +2 -4
- validmind/tests/data_validation/DFGLSArch.py +2 -4
- validmind/tests/data_validation/DatasetDescription.py +7 -9
- validmind/tests/data_validation/DatasetSplit.py +8 -9
- validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
- validmind/tests/data_validation/Duplicates.py +2 -4
- validmind/tests/data_validation/EngleGrangerCoint.py +2 -4
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +2 -4
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +2 -4
- validmind/tests/data_validation/HighCardinality.py +2 -4
- validmind/tests/data_validation/HighPearsonCorrelation.py +2 -4
- validmind/tests/data_validation/IQROutliersBarPlot.py +2 -4
- validmind/tests/data_validation/IQROutliersTable.py +2 -4
- validmind/tests/data_validation/IsolationForestOutliers.py +2 -4
- validmind/tests/data_validation/KPSS.py +8 -10
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -4
- validmind/tests/data_validation/MissingValues.py +2 -4
- validmind/tests/data_validation/MissingValuesBarPlot.py +2 -4
- validmind/tests/data_validation/MissingValuesRisk.py +2 -4
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -4
- validmind/tests/data_validation/PhillipsPerronArch.py +7 -9
- validmind/tests/data_validation/RollingStatsPlot.py +2 -4
- validmind/tests/data_validation/ScatterPlot.py +2 -4
- validmind/tests/data_validation/SeasonalDecompose.py +70 -44
- validmind/tests/data_validation/Skewness.py +2 -4
- validmind/tests/data_validation/SpreadPlot.py +2 -4
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +2 -4
- validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -4
- validmind/tests/data_validation/TabularDescriptionTables.py +2 -4
- validmind/tests/data_validation/TabularNumericalHistograms.py +2 -4
- validmind/tests/data_validation/TargetRateBarPlots.py +2 -4
- validmind/tests/data_validation/TimeSeriesDescription.py +74 -0
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +76 -0
- validmind/tests/data_validation/TimeSeriesFrequency.py +2 -4
- validmind/tests/data_validation/TimeSeriesHistogram.py +29 -45
- validmind/tests/data_validation/TimeSeriesLinePlot.py +2 -4
- validmind/tests/data_validation/TimeSeriesMissingValues.py +2 -4
- validmind/tests/data_validation/TimeSeriesOutliers.py +32 -45
- validmind/tests/data_validation/TooManyZeroValues.py +2 -4
- validmind/tests/data_validation/UniqueRows.py +2 -4
- validmind/tests/data_validation/WOEBinPlots.py +2 -4
- validmind/tests/data_validation/WOEBinTable.py +2 -4
- validmind/tests/data_validation/ZivotAndrewsArch.py +2 -4
- validmind/tests/data_validation/nlp/CommonWords.py +2 -4
- validmind/tests/data_validation/nlp/Hashtags.py +2 -4
- validmind/tests/data_validation/nlp/Mentions.py +2 -4
- validmind/tests/data_validation/nlp/Punctuations.py +2 -4
- validmind/tests/data_validation/nlp/StopWords.py +2 -4
- validmind/tests/data_validation/nlp/TextDescription.py +2 -4
- validmind/tests/decorator.py +10 -8
- validmind/tests/load.py +264 -0
- validmind/tests/metadata.py +59 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -7
- validmind/tests/model_validation/FeaturesAUC.py +6 -8
- validmind/tests/model_validation/ModelMetadata.py +8 -9
- validmind/tests/model_validation/ModelMetadataComparison.py +59 -0
- validmind/tests/model_validation/ModelPredictionResiduals.py +103 -0
- validmind/tests/model_validation/RegressionResidualsPlot.py +2 -6
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +131 -0
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +76 -0
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +103 -0
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +2 -4
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +2 -4
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -4
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -4
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +2 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -7
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -7
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +7 -9
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -7
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +5 -7
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +2 -7
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -7
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +19 -10
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +83 -0
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -7
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -7
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +4 -7
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +7 -9
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +7 -9
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +7 -9
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +8 -10
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +7 -9
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +9 -11
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +7 -9
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +8 -10
- validmind/tests/model_validation/sklearn/ROCCurve.py +10 -11
- validmind/tests/model_validation/sklearn/RegressionErrors.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +76 -0
- validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +63 -0
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +10 -14
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +8 -10
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -7
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +8 -10
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -7
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +8 -10
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +2 -4
- validmind/tests/model_validation/statsmodels/BoxPierce.py +2 -4
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +3 -4
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +2 -4
- validmind/tests/model_validation/statsmodels/GINITable.py +2 -4
- validmind/tests/model_validation/statsmodels/JarqueBera.py +7 -9
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +7 -9
- validmind/tests/model_validation/statsmodels/LJungBox.py +2 -4
- validmind/tests/model_validation/statsmodels/Lilliefors.py +7 -9
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +6 -8
- validmind/tests/model_validation/statsmodels/RunsTest.py +2 -4
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +3 -4
- validmind/tests/model_validation/statsmodels/ShapiroWilk.py +2 -4
- validmind/tests/prompt_validation/Bias.py +2 -4
- validmind/tests/prompt_validation/Clarity.py +2 -4
- validmind/tests/prompt_validation/Conciseness.py +2 -4
- validmind/tests/prompt_validation/Delimitation.py +2 -4
- validmind/tests/prompt_validation/NegativeInstruction.py +2 -4
- validmind/tests/prompt_validation/Robustness.py +2 -4
- validmind/tests/prompt_validation/Specificity.py +2 -4
- validmind/tests/run.py +394 -0
- validmind/tests/test_providers.py +12 -0
- validmind/tests/utils.py +16 -0
- validmind/unit_metrics/__init__.py +12 -4
- validmind/unit_metrics/composite.py +3 -0
- validmind/vm_models/test/metric.py +8 -5
- validmind/vm_models/test/result_wrapper.py +2 -1
- validmind/vm_models/test/test.py +14 -11
- validmind/vm_models/test/threshold_test.py +1 -0
- validmind/vm_models/test_suite/runner.py +1 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/METADATA +70 -36
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/RECORD +162 -146
- /validmind/datasets/regression/datasets/{lending_club_loan_rates.csv → leanding_club_loan_rates.csv} +0 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/LICENSE +0 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/WHEEL +0 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/entry_points.txt +0 -0
@@ -58,15 +58,13 @@ class ClassifierPerformance(Metric):
|
|
58
58
|
|
59
59
|
name = "classifier_performance"
|
60
60
|
required_inputs = ["model", "dataset"]
|
61
|
-
|
62
|
-
|
63
|
-
"
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
],
|
69
|
-
}
|
61
|
+
tasks = ["classification", "text_classification"]
|
62
|
+
tags = [
|
63
|
+
"sklearn",
|
64
|
+
"binary_classification",
|
65
|
+
"multiclass_classification",
|
66
|
+
"model_performance",
|
67
|
+
]
|
70
68
|
|
71
69
|
def summary(self, metric_value: dict):
|
72
70
|
"""
|
@@ -57,13 +57,11 @@ class ClusterCosineSimilarity(Metric):
|
|
57
57
|
|
58
58
|
name = "cluster_cosine_similarity"
|
59
59
|
required_inputs = ["model", "dataset"]
|
60
|
-
|
61
|
-
|
62
|
-
"
|
63
|
-
|
64
|
-
|
65
|
-
],
|
66
|
-
}
|
60
|
+
tasks = ["clustering"]
|
61
|
+
tags = [
|
62
|
+
"sklearn",
|
63
|
+
"model_performance",
|
64
|
+
]
|
67
65
|
|
68
66
|
def run(self):
|
69
67
|
y_true_train = self.inputs.dataset.y
|
@@ -51,13 +51,11 @@ class ClusterPerformance(Metric):
|
|
51
51
|
|
52
52
|
name = "cluster_performance_metrics"
|
53
53
|
required_inputs = ["model", "datasets"]
|
54
|
-
|
55
|
-
|
56
|
-
"
|
57
|
-
|
58
|
-
|
59
|
-
],
|
60
|
-
}
|
54
|
+
tasks = ["clustering"]
|
55
|
+
tags = [
|
56
|
+
"sklearn",
|
57
|
+
"model_performance",
|
58
|
+
]
|
61
59
|
|
62
60
|
def cluser_performance_metrics(
|
63
61
|
self, y_true_train, y_pred_train, y_true_test, y_pred_test, samples, metric_info
|
@@ -61,13 +61,8 @@ class ClusterPerformanceMetrics(ClusterPerformance):
|
|
61
61
|
|
62
62
|
name = "homogeneity_score"
|
63
63
|
required_inputs = ["model", "datasets"]
|
64
|
-
|
65
|
-
|
66
|
-
"tags": [
|
67
|
-
"sklearn",
|
68
|
-
"model_performance",
|
69
|
-
],
|
70
|
-
}
|
64
|
+
tasks = ["clustering"]
|
65
|
+
tags = ["sklearn", "model_performance"]
|
71
66
|
default_metrics = {
|
72
67
|
"Homogeneity Score": metrics.homogeneity_score,
|
73
68
|
"Completeness Score": metrics.completeness_score,
|
@@ -44,13 +44,11 @@ class CompletenessScore(ClusterPerformance):
|
|
44
44
|
|
45
45
|
name = "homogeneity_score"
|
46
46
|
required_inputs = ["model", "datasets"]
|
47
|
-
|
48
|
-
|
49
|
-
"
|
50
|
-
|
51
|
-
|
52
|
-
],
|
53
|
-
}
|
47
|
+
tasks = ["clustering"]
|
48
|
+
tags = [
|
49
|
+
"sklearn",
|
50
|
+
"model_performance",
|
51
|
+
]
|
54
52
|
|
55
53
|
def metric_info(self):
|
56
54
|
return {"Completeness Score": metrics.completeness_score}
|
@@ -55,16 +55,14 @@ class ConfusionMatrix(Metric):
|
|
55
55
|
|
56
56
|
name = "confusion_matrix"
|
57
57
|
required_inputs = ["model", "dataset"]
|
58
|
-
|
59
|
-
|
60
|
-
"
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
],
|
67
|
-
}
|
58
|
+
tasks = ["classification", "text_classification"]
|
59
|
+
tags = [
|
60
|
+
"sklearn",
|
61
|
+
"binary_classification",
|
62
|
+
"multiclass_classification",
|
63
|
+
"model_performance",
|
64
|
+
"visualization",
|
65
|
+
]
|
68
66
|
|
69
67
|
def run(self):
|
70
68
|
y_true = self.inputs.dataset.y
|
@@ -113,6 +111,17 @@ class ConfusionMatrix(Metric):
|
|
113
111
|
height=600,
|
114
112
|
)
|
115
113
|
|
114
|
+
# Add an annotation at the bottom of the heatmap
|
115
|
+
fig.add_annotation(
|
116
|
+
x=0.5,
|
117
|
+
y=-0.1,
|
118
|
+
xref="paper",
|
119
|
+
yref="paper",
|
120
|
+
text=f"Confusion Matrix for {self.inputs.model.input_id} on {self.inputs.dataset.input_id}",
|
121
|
+
showarrow=False,
|
122
|
+
font=dict(size=14),
|
123
|
+
)
|
124
|
+
|
116
125
|
return self.cache_results(
|
117
126
|
metric_value={
|
118
127
|
"confusion_matrix": cm,
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
from sklearn.inspection import permutation_importance
|
7
|
+
|
8
|
+
from validmind import tags, tasks
|
9
|
+
|
10
|
+
|
11
|
+
@tags("model_explainability", "sklearn")
|
12
|
+
@tasks("regression", "time_series_forecasting")
|
13
|
+
def FeatureImportanceComparison(datasets, models, num_features=3):
|
14
|
+
"""
|
15
|
+
Compare feature importance scores for each model and generate a summary table
|
16
|
+
with the top important features.
|
17
|
+
|
18
|
+
**Purpose**: The purpose of this function is to compare the feature importance scores for different models applied to various datasets.
|
19
|
+
|
20
|
+
**Test Mechanism**: The function iterates through each dataset-model pair, calculates permutation feature importance (PFI) scores, and generates a summary table with the top `num_features` important features for each model.
|
21
|
+
|
22
|
+
**Signs of High Risk**:
|
23
|
+
- If key features expected to be important are ranked low, it could indicate potential issues with model training or data quality.
|
24
|
+
- High variance in feature importance scores across different models may suggest instability in feature selection.
|
25
|
+
|
26
|
+
**Strengths**:
|
27
|
+
- Provides a clear comparison of the most important features for each model.
|
28
|
+
- Uses permutation importance, which is a model-agnostic method and can be applied to any estimator.
|
29
|
+
|
30
|
+
**Limitations**:
|
31
|
+
- Assumes that the dataset is provided as a DataFrameDataset object with `x_df` and `y_df` methods to access feature and target data.
|
32
|
+
- Requires that `model.model` is compatible with `sklearn.inspection.permutation_importance`.
|
33
|
+
- The function's output is dependent on the number of features specified by `num_features`, which defaults to 3 but can be adjusted.
|
34
|
+
|
35
|
+
|
36
|
+
"""
|
37
|
+
results_list = []
|
38
|
+
|
39
|
+
for dataset, model in zip(datasets, models):
|
40
|
+
x = dataset.x_df()
|
41
|
+
y = dataset.y_df()
|
42
|
+
|
43
|
+
pfi_values = permutation_importance(
|
44
|
+
model.model,
|
45
|
+
x,
|
46
|
+
y,
|
47
|
+
random_state=0,
|
48
|
+
n_jobs=-2,
|
49
|
+
)
|
50
|
+
|
51
|
+
# Create a dictionary to store PFI scores
|
52
|
+
pfi = {
|
53
|
+
column: pfi_values["importances_mean"][i]
|
54
|
+
for i, column in enumerate(x.columns)
|
55
|
+
}
|
56
|
+
|
57
|
+
# Sort features by their importance
|
58
|
+
sorted_features = sorted(pfi.items(), key=lambda item: item[1], reverse=True)
|
59
|
+
|
60
|
+
# Extract the top `num_features` features
|
61
|
+
top_features = sorted_features[:num_features]
|
62
|
+
|
63
|
+
# Prepare the result for the current model and dataset
|
64
|
+
result = {
|
65
|
+
"Model": model.input_id,
|
66
|
+
"Dataset": dataset.input_id,
|
67
|
+
}
|
68
|
+
|
69
|
+
# Dynamically add feature columns to the result
|
70
|
+
for i in range(num_features):
|
71
|
+
if i < len(top_features):
|
72
|
+
result[
|
73
|
+
f"Feature {i + 1}"
|
74
|
+
] = f"[{top_features[i][0]}; {top_features[i][1]:.4f}]"
|
75
|
+
else:
|
76
|
+
result[f"Feature {i + 1}"] = None
|
77
|
+
|
78
|
+
# Append the result to the list
|
79
|
+
results_list.append(result)
|
80
|
+
|
81
|
+
# Convert the results list to a DataFrame
|
82
|
+
results_df = pd.DataFrame(results_list)
|
83
|
+
return results_df
|
@@ -55,13 +55,11 @@ class FowlkesMallowsScore(ClusterPerformance):
|
|
55
55
|
|
56
56
|
name = "fowlkes_mallows_score"
|
57
57
|
required_inputs = ["model", "datasets"]
|
58
|
-
|
59
|
-
|
60
|
-
"
|
61
|
-
|
62
|
-
|
63
|
-
],
|
64
|
-
}
|
58
|
+
tasks = ["clustering"]
|
59
|
+
tags = [
|
60
|
+
"sklearn",
|
61
|
+
"model_performance",
|
62
|
+
]
|
65
63
|
|
66
64
|
def metric_info(self):
|
67
65
|
return {"Fowlkes-Mallows score": metrics.fowlkes_mallows_score}
|
@@ -46,13 +46,11 @@ class HomogeneityScore(ClusterPerformance):
|
|
46
46
|
|
47
47
|
name = "homogeneity_score"
|
48
48
|
required_inputs = ["model", "datasets"]
|
49
|
-
|
50
|
-
|
51
|
-
"
|
52
|
-
|
53
|
-
|
54
|
-
],
|
55
|
-
}
|
49
|
+
tasks = ["clustering"]
|
50
|
+
tags = [
|
51
|
+
"sklearn",
|
52
|
+
"model_performance",
|
53
|
+
]
|
56
54
|
|
57
55
|
def metric_info(self):
|
58
56
|
return {"Homogeneity Score": metrics.homogeneity_score}
|
@@ -52,13 +52,8 @@ class HyperParametersTuning(Metric):
|
|
52
52
|
|
53
53
|
name = "hyper_parameters_tuning"
|
54
54
|
required_inputs = ["model", "dataset"]
|
55
|
-
|
56
|
-
|
57
|
-
"tags": [
|
58
|
-
"sklearn",
|
59
|
-
"model_performance",
|
60
|
-
],
|
61
|
-
}
|
55
|
+
tasks = ["classification", "clustering"]
|
56
|
+
tags = ["sklearn", "model_performance"]
|
62
57
|
default_params = {"param_grid": None, "scoring": None}
|
63
58
|
|
64
59
|
def run(self):
|
@@ -60,13 +60,10 @@ class KMeansClustersOptimization(Metric):
|
|
60
60
|
|
61
61
|
name = "clusters_optimize_elbow_method"
|
62
62
|
required_inputs = ["model", "dataset"]
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
}
|
67
|
-
default_params = {
|
68
|
-
"n_clusters": None,
|
69
|
-
}
|
63
|
+
tasks = ["clustering"]
|
64
|
+
tags = ["sklearn", "model_performance", "kmeans"]
|
65
|
+
|
66
|
+
default_params = {"n_clusters": None}
|
70
67
|
|
71
68
|
def run(self):
|
72
69
|
n_clusters = self.params["n_clusters"]
|
@@ -59,15 +59,13 @@ class MinimumAccuracy(ThresholdTest):
|
|
59
59
|
name = "accuracy_score"
|
60
60
|
required_inputs = ["model", "dataset"]
|
61
61
|
default_params = {"min_threshold": 0.7}
|
62
|
-
|
63
|
-
|
64
|
-
"
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
],
|
70
|
-
}
|
62
|
+
tasks = ["classification", "text_classification"]
|
63
|
+
tags = [
|
64
|
+
"sklearn",
|
65
|
+
"binary_classification",
|
66
|
+
"multiclass_classification",
|
67
|
+
"model_performance",
|
68
|
+
]
|
71
69
|
|
72
70
|
def summary(self, results: List[ThresholdTestResult], all_passed: bool):
|
73
71
|
"""
|
@@ -62,15 +62,13 @@ class MinimumF1Score(ThresholdTest):
|
|
62
62
|
name = "f1_score"
|
63
63
|
required_inputs = ["model", "dataset"]
|
64
64
|
default_params = {"min_threshold": 0.5}
|
65
|
-
|
66
|
-
|
67
|
-
"
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
],
|
73
|
-
}
|
65
|
+
tasks = ["classification", "text_classification"]
|
66
|
+
tags = [
|
67
|
+
"sklearn",
|
68
|
+
"binary_classification",
|
69
|
+
"multiclass_classification",
|
70
|
+
"model_performance",
|
71
|
+
]
|
74
72
|
|
75
73
|
def summary(self, results: List[ThresholdTestResult], all_passed: bool):
|
76
74
|
"""
|
@@ -59,15 +59,13 @@ class MinimumROCAUCScore(ThresholdTest):
|
|
59
59
|
name = "roc_auc_score"
|
60
60
|
required_inputs = ["model", "dataset"]
|
61
61
|
default_params = {"min_threshold": 0.5}
|
62
|
-
|
63
|
-
|
64
|
-
"
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
],
|
70
|
-
}
|
62
|
+
tasks = ["classification", "text_classification"]
|
63
|
+
tags = [
|
64
|
+
"sklearn",
|
65
|
+
"binary_classification",
|
66
|
+
"multiclass_classification",
|
67
|
+
"model_performance",
|
68
|
+
]
|
71
69
|
|
72
70
|
def summary(self, results: List[ThresholdTestResult], all_passed: bool):
|
73
71
|
"""
|
@@ -53,16 +53,14 @@ class ModelsPerformanceComparison(ClassifierPerformance):
|
|
53
53
|
|
54
54
|
name = "models_performance_comparison"
|
55
55
|
required_inputs = ["dataset", "models"]
|
56
|
-
|
57
|
-
|
58
|
-
"
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
],
|
65
|
-
}
|
56
|
+
tasks = ["classification", "text_classification"]
|
57
|
+
tags = [
|
58
|
+
"sklearn",
|
59
|
+
"binary_classification",
|
60
|
+
"multiclass_classification",
|
61
|
+
"model_performance",
|
62
|
+
"model_comparison",
|
63
|
+
]
|
66
64
|
|
67
65
|
def summary(self, metric_value: dict):
|
68
66
|
"""
|
@@ -67,15 +67,13 @@ class OverfitDiagnosis(ThresholdTest):
|
|
67
67
|
name = "overfit_regions"
|
68
68
|
required_inputs = ["model", "datasets"]
|
69
69
|
default_params = {"features_columns": None, "cut_off_percentage": 4}
|
70
|
-
|
71
|
-
|
72
|
-
"
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
],
|
78
|
-
}
|
70
|
+
tasks = ["classification", "text_classification"]
|
71
|
+
tags = [
|
72
|
+
"sklearn",
|
73
|
+
"binary_classification",
|
74
|
+
"multiclass_classification",
|
75
|
+
"model_diagnosis",
|
76
|
+
]
|
79
77
|
|
80
78
|
default_metrics = {
|
81
79
|
"accuracy": metrics.accuracy_score,
|
@@ -56,16 +56,14 @@ class PermutationFeatureImportance(Metric):
|
|
56
56
|
"fontsize": None,
|
57
57
|
"figure_height": 1000,
|
58
58
|
}
|
59
|
-
|
60
|
-
|
61
|
-
"
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
],
|
68
|
-
}
|
59
|
+
tasks = ["classification", "text_classification"]
|
60
|
+
tags = [
|
61
|
+
"sklearn",
|
62
|
+
"binary_classification",
|
63
|
+
"multiclass_classification",
|
64
|
+
"feature_importance",
|
65
|
+
"visualization",
|
66
|
+
]
|
69
67
|
|
70
68
|
def run(self):
|
71
69
|
x = self.inputs.dataset.x_df()
|
@@ -121,7 +119,7 @@ class PermutationFeatureImportance(Metric):
|
|
121
119
|
figures=[
|
122
120
|
Figure(
|
123
121
|
for_object=self,
|
124
|
-
key="
|
122
|
+
key=f"pfi_{self.inputs.dataset.input_id}_{self.inputs.model.input_id}",
|
125
123
|
figure=fig,
|
126
124
|
),
|
127
125
|
],
|
@@ -73,15 +73,13 @@ class PopulationStabilityIndex(Metric):
|
|
73
73
|
|
74
74
|
name = "psi"
|
75
75
|
required_inputs = ["model", "datasets"]
|
76
|
-
|
77
|
-
|
78
|
-
"
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
],
|
84
|
-
}
|
76
|
+
tasks = ["classification", "text_classification"]
|
77
|
+
tags = [
|
78
|
+
"sklearn",
|
79
|
+
"binary_classification",
|
80
|
+
"multiclass_classification",
|
81
|
+
"model_performance",
|
82
|
+
]
|
85
83
|
default_params = {
|
86
84
|
"num_bins": 10,
|
87
85
|
"mode": "fixed",
|
@@ -51,16 +51,14 @@ class PrecisionRecallCurve(Metric):
|
|
51
51
|
|
52
52
|
name = "pr_curve"
|
53
53
|
required_inputs = ["model", "dataset"]
|
54
|
-
|
55
|
-
|
56
|
-
"
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
],
|
63
|
-
}
|
54
|
+
tasks = ["classification", "text_classification"]
|
55
|
+
tags = [
|
56
|
+
"sklearn",
|
57
|
+
"binary_classification",
|
58
|
+
"multiclass_classification",
|
59
|
+
"model_performance",
|
60
|
+
"visualization",
|
61
|
+
]
|
64
62
|
|
65
63
|
def run(self):
|
66
64
|
if isinstance(self.inputs.model, FoundationModel):
|
@@ -59,16 +59,14 @@ class ROCCurve(Metric):
|
|
59
59
|
|
60
60
|
name = "roc_curve"
|
61
61
|
required_inputs = ["model", "dataset"]
|
62
|
-
|
63
|
-
|
64
|
-
"
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
],
|
71
|
-
}
|
62
|
+
tasks = ["classification", "text_classification"]
|
63
|
+
tags = [
|
64
|
+
"sklearn",
|
65
|
+
"binary_classification",
|
66
|
+
"multiclass_classification",
|
67
|
+
"model_performance",
|
68
|
+
"visualization",
|
69
|
+
]
|
72
70
|
|
73
71
|
def run(self):
|
74
72
|
if isinstance(self.inputs.model, FoundationModel):
|
@@ -109,7 +107,7 @@ class ROCCurve(Metric):
|
|
109
107
|
)
|
110
108
|
|
111
109
|
layout = go.Layout(
|
112
|
-
title="ROC Curve",
|
110
|
+
title=f"ROC Curve for {self.inputs.model.input_id} on {self.inputs.dataset.input_id}",
|
113
111
|
xaxis=dict(title="False Positive Rate"),
|
114
112
|
yaxis=dict(title="True Positive Rate"),
|
115
113
|
width=700,
|
@@ -117,6 +115,7 @@ class ROCCurve(Metric):
|
|
117
115
|
)
|
118
116
|
|
119
117
|
fig = go.Figure(data=[trace0, trace1], layout=layout)
|
118
|
+
|
120
119
|
return self.cache_results(
|
121
120
|
metric_value={
|
122
121
|
"auc": auc,
|
@@ -43,13 +43,11 @@ class RegressionErrors(Metric):
|
|
43
43
|
|
44
44
|
name = "regression_errors"
|
45
45
|
required_inputs = ["model", "datasets"]
|
46
|
-
|
47
|
-
|
48
|
-
"
|
49
|
-
|
50
|
-
|
51
|
-
],
|
52
|
-
}
|
46
|
+
tasks = ["regression"]
|
47
|
+
tags = [
|
48
|
+
"sklearn",
|
49
|
+
"model_performance",
|
50
|
+
]
|
53
51
|
|
54
52
|
def summary(self, raw_results):
|
55
53
|
"""
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
import pandas as pd
|
7
|
+
from sklearn import metrics
|
8
|
+
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.logging import get_logger
|
11
|
+
|
12
|
+
logger = get_logger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
@tags("model_performance", "sklearn")
|
16
|
+
@tasks("regression", "time_series_forecasting")
|
17
|
+
def RegressionErrorsComparison(datasets, models):
|
18
|
+
"""
|
19
|
+
Compare regression error metrics for each model and generate a summary table
|
20
|
+
with the results.
|
21
|
+
|
22
|
+
**Purpose**: The purpose of this function is to compare the regression errors for different models applied to various datasets.
|
23
|
+
|
24
|
+
**Test Mechanism**: The function iterates through each dataset-model pair, calculates various error metrics (MAE, MSE, MAPE, MBD), and generates a summary table with these results.
|
25
|
+
|
26
|
+
**Signs of High Risk**:
|
27
|
+
- High Mean Absolute Error (MAE) or Mean Squared Error (MSE) indicates poor model performance.
|
28
|
+
- High Mean Absolute Percentage Error (MAPE) suggests large percentage errors, especially problematic if the true values are small.
|
29
|
+
- Mean Bias Deviation (MBD) significantly different from zero indicates systematic overestimation or underestimation by the model.
|
30
|
+
|
31
|
+
**Strengths**:
|
32
|
+
- Provides multiple error metrics to assess model performance from different perspectives.
|
33
|
+
- Includes a check to avoid division by zero when calculating MAPE.
|
34
|
+
|
35
|
+
**Limitations**:
|
36
|
+
- Assumes that the dataset is provided as a DataFrameDataset object with `y`, `y_pred`, and `feature_columns` attributes.
|
37
|
+
- The function relies on the `logger` from `validmind.logging` to warn about zero values in `y_true`, which should be correctly implemented and imported.
|
38
|
+
- Requires that `dataset.y_pred(model)` returns the predicted values for the model.
|
39
|
+
"""
|
40
|
+
results_list = []
|
41
|
+
|
42
|
+
for dataset, model in zip(datasets, models):
|
43
|
+
dataset_name = dataset.input_id
|
44
|
+
model_name = model.input_id
|
45
|
+
|
46
|
+
y_true = dataset.y
|
47
|
+
y_pred = dataset.y_pred(model) # Assuming dataset has X for features
|
48
|
+
y_true = y_true.astype(y_pred.dtype)
|
49
|
+
|
50
|
+
mae = metrics.mean_absolute_error(y_true, y_pred)
|
51
|
+
mse = metrics.mean_squared_error(y_true, y_pred)
|
52
|
+
|
53
|
+
if np.any(y_true == 0):
|
54
|
+
logger.warning(
|
55
|
+
"y_true contains zero values. Skipping MAPE calculation to avoid division by zero."
|
56
|
+
)
|
57
|
+
mape = None
|
58
|
+
else:
|
59
|
+
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
|
60
|
+
mbd = np.mean(y_pred - y_true)
|
61
|
+
|
62
|
+
# Append results to the list
|
63
|
+
results_list.append(
|
64
|
+
{
|
65
|
+
"Model": model_name,
|
66
|
+
"Dataset": dataset_name,
|
67
|
+
"Mean Absolute Error (MAE)": mae,
|
68
|
+
"Mean Squared Error (MSE)": mse,
|
69
|
+
"Mean Absolute Percentage Error (MAPE)": mape,
|
70
|
+
"Mean Bias Deviation (MBD)": mbd,
|
71
|
+
}
|
72
|
+
)
|
73
|
+
|
74
|
+
# Convert results list to a DataFrame
|
75
|
+
results_df = pd.DataFrame(results_list)
|
76
|
+
return results_df
|