validmind 2.3.5__py3-none-any.whl → 2.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +8 -1
- validmind/ai/utils.py +2 -1
- validmind/client.py +1 -0
- validmind/template.py +2 -0
- validmind/tests/__init__.py +14 -468
- validmind/tests/_store.py +102 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +7 -9
- validmind/tests/data_validation/ADF.py +8 -10
- validmind/tests/data_validation/ANOVAOneWayTable.py +8 -10
- validmind/tests/data_validation/AutoAR.py +2 -4
- validmind/tests/data_validation/AutoMA.py +2 -4
- validmind/tests/data_validation/AutoSeasonality.py +8 -10
- validmind/tests/data_validation/AutoStationarity.py +8 -10
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +8 -10
- validmind/tests/data_validation/BivariateHistograms.py +8 -10
- validmind/tests/data_validation/BivariateScatterPlots.py +8 -10
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +8 -10
- validmind/tests/data_validation/ClassImbalance.py +2 -4
- validmind/tests/data_validation/DFGLSArch.py +2 -4
- validmind/tests/data_validation/DatasetDescription.py +7 -9
- validmind/tests/data_validation/DatasetSplit.py +8 -9
- validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
- validmind/tests/data_validation/Duplicates.py +2 -4
- validmind/tests/data_validation/EngleGrangerCoint.py +2 -4
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +2 -4
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +2 -4
- validmind/tests/data_validation/HighCardinality.py +2 -4
- validmind/tests/data_validation/HighPearsonCorrelation.py +2 -4
- validmind/tests/data_validation/IQROutliersBarPlot.py +2 -4
- validmind/tests/data_validation/IQROutliersTable.py +2 -4
- validmind/tests/data_validation/IsolationForestOutliers.py +2 -4
- validmind/tests/data_validation/KPSS.py +8 -10
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -4
- validmind/tests/data_validation/MissingValues.py +2 -4
- validmind/tests/data_validation/MissingValuesBarPlot.py +2 -4
- validmind/tests/data_validation/MissingValuesRisk.py +2 -4
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -4
- validmind/tests/data_validation/PhillipsPerronArch.py +7 -9
- validmind/tests/data_validation/RollingStatsPlot.py +2 -4
- validmind/tests/data_validation/ScatterPlot.py +2 -4
- validmind/tests/data_validation/SeasonalDecompose.py +2 -4
- validmind/tests/data_validation/Skewness.py +2 -4
- validmind/tests/data_validation/SpreadPlot.py +2 -4
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +2 -4
- validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -4
- validmind/tests/data_validation/TabularDescriptionTables.py +2 -4
- validmind/tests/data_validation/TabularNumericalHistograms.py +2 -4
- validmind/tests/data_validation/TargetRateBarPlots.py +2 -4
- validmind/tests/data_validation/TimeSeriesFrequency.py +2 -4
- validmind/tests/data_validation/TimeSeriesLinePlot.py +2 -4
- validmind/tests/data_validation/TimeSeriesMissingValues.py +2 -4
- validmind/tests/data_validation/TimeSeriesOutliers.py +2 -4
- validmind/tests/data_validation/TooManyZeroValues.py +2 -4
- validmind/tests/data_validation/UniqueRows.py +2 -4
- validmind/tests/data_validation/WOEBinPlots.py +2 -4
- validmind/tests/data_validation/WOEBinTable.py +2 -4
- validmind/tests/data_validation/ZivotAndrewsArch.py +2 -4
- validmind/tests/data_validation/nlp/CommonWords.py +2 -4
- validmind/tests/data_validation/nlp/Hashtags.py +2 -4
- validmind/tests/data_validation/nlp/Mentions.py +2 -4
- validmind/tests/data_validation/nlp/Punctuations.py +2 -4
- validmind/tests/data_validation/nlp/StopWords.py +2 -4
- validmind/tests/data_validation/nlp/TextDescription.py +2 -4
- validmind/tests/decorator.py +10 -8
- validmind/tests/load.py +264 -0
- validmind/tests/metadata.py +59 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -7
- validmind/tests/model_validation/FeaturesAUC.py +6 -8
- validmind/tests/model_validation/ModelMetadata.py +8 -9
- validmind/tests/model_validation/RegressionResidualsPlot.py +2 -6
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +2 -4
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +2 -4
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -4
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -4
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +2 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -7
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -7
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +7 -9
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -7
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +5 -7
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +2 -7
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -7
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +19 -10
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -7
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -7
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +4 -7
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +7 -9
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +7 -9
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +7 -9
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +8 -10
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +7 -9
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +8 -10
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +7 -9
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +8 -10
- validmind/tests/model_validation/sklearn/ROCCurve.py +10 -11
- validmind/tests/model_validation/sklearn/RegressionErrors.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +5 -7
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +10 -14
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +8 -10
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -7
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +8 -10
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -7
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +8 -10
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +2 -4
- validmind/tests/model_validation/statsmodels/BoxPierce.py +2 -4
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +3 -4
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +2 -4
- validmind/tests/model_validation/statsmodels/GINITable.py +2 -4
- validmind/tests/model_validation/statsmodels/JarqueBera.py +7 -9
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +7 -9
- validmind/tests/model_validation/statsmodels/LJungBox.py +2 -4
- validmind/tests/model_validation/statsmodels/Lilliefors.py +7 -9
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +6 -8
- validmind/tests/model_validation/statsmodels/RunsTest.py +2 -4
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +3 -4
- validmind/tests/model_validation/statsmodels/ShapiroWilk.py +2 -4
- validmind/tests/prompt_validation/Bias.py +2 -4
- validmind/tests/prompt_validation/Clarity.py +2 -4
- validmind/tests/prompt_validation/Conciseness.py +2 -4
- validmind/tests/prompt_validation/Delimitation.py +2 -4
- validmind/tests/prompt_validation/NegativeInstruction.py +2 -4
- validmind/tests/prompt_validation/Robustness.py +2 -4
- validmind/tests/prompt_validation/Specificity.py +2 -4
- validmind/tests/run.py +394 -0
- validmind/tests/test_providers.py +12 -0
- validmind/tests/utils.py +16 -0
- validmind/unit_metrics/__init__.py +12 -4
- validmind/unit_metrics/composite.py +3 -0
- validmind/vm_models/test/metric.py +8 -5
- validmind/vm_models/test/result_wrapper.py +2 -1
- validmind/vm_models/test/test.py +14 -11
- validmind/vm_models/test/threshold_test.py +1 -0
- validmind/vm_models/test_suite/runner.py +1 -0
- {validmind-2.3.5.dist-info → validmind-2.4.1.dist-info}/METADATA +1 -1
- {validmind-2.3.5.dist-info → validmind-2.4.1.dist-info}/RECORD +149 -144
- {validmind-2.3.5.dist-info → validmind-2.4.1.dist-info}/LICENSE +0 -0
- {validmind-2.3.5.dist-info → validmind-2.4.1.dist-info}/WHEEL +0 -0
- {validmind-2.3.5.dist-info → validmind-2.4.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
|
7
|
+
from validmind.utils import format_dataframe
|
8
|
+
|
9
|
+
from .load import list_tests
|
10
|
+
|
11
|
+
|
12
|
+
def list_tags():
|
13
|
+
"""
|
14
|
+
List unique tags from all test classes.
|
15
|
+
"""
|
16
|
+
|
17
|
+
unique_tags = set()
|
18
|
+
|
19
|
+
for test in list_tests(__as_class=True):
|
20
|
+
unique_tags.update(test.tags)
|
21
|
+
|
22
|
+
return list(unique_tags)
|
23
|
+
|
24
|
+
|
25
|
+
def list_tasks_and_tags():
|
26
|
+
"""
|
27
|
+
List all task types and their associated tags, with one row per task type and
|
28
|
+
all tags for a task type in one row.
|
29
|
+
|
30
|
+
Returns:
|
31
|
+
pandas.DataFrame: A DataFrame with 'Task Type' and concatenated 'Tags'.
|
32
|
+
"""
|
33
|
+
task_tags_dict = {}
|
34
|
+
|
35
|
+
for test in list_tests(__as_class=True):
|
36
|
+
for task in test.tasks:
|
37
|
+
task_tags_dict.setdefault(task, set()).update(test.tags)
|
38
|
+
|
39
|
+
return format_dataframe(
|
40
|
+
pd.DataFrame(
|
41
|
+
[
|
42
|
+
{"Task": task, "Tags": ", ".join(tags)}
|
43
|
+
for task, tags in task_tags_dict.items()
|
44
|
+
]
|
45
|
+
)
|
46
|
+
)
|
47
|
+
|
48
|
+
|
49
|
+
def list_tasks():
|
50
|
+
"""
|
51
|
+
List unique tasks from all test classes.
|
52
|
+
"""
|
53
|
+
|
54
|
+
unique_tasks = set()
|
55
|
+
|
56
|
+
for test in list_tests(__as_class=True):
|
57
|
+
unique_tasks.update(test.tasks)
|
58
|
+
|
59
|
+
return list(unique_tasks)
|
@@ -51,13 +51,11 @@ class ClusterSizeDistribution(Metric):
|
|
51
51
|
|
52
52
|
name = "cluster_size_distribution"
|
53
53
|
required_inputs = ["model", "dataset"]
|
54
|
-
|
55
|
-
|
56
|
-
"
|
57
|
-
|
58
|
-
|
59
|
-
],
|
60
|
-
}
|
54
|
+
tasks = ["clustering"]
|
55
|
+
tags = [
|
56
|
+
"sklearn",
|
57
|
+
"model_performance",
|
58
|
+
]
|
61
59
|
|
62
60
|
def run(self):
|
63
61
|
y_true_train = self.inputs.dataset.y
|
@@ -45,14 +45,12 @@ class FeaturesAUC(Metric):
|
|
45
45
|
"fontsize": 12,
|
46
46
|
"figure_height": 500,
|
47
47
|
}
|
48
|
-
|
49
|
-
|
50
|
-
"
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
],
|
55
|
-
}
|
48
|
+
tasks = ["classification"]
|
49
|
+
tags = [
|
50
|
+
"feature_importance",
|
51
|
+
"AUC",
|
52
|
+
"visualization",
|
53
|
+
]
|
56
54
|
|
57
55
|
def run(self):
|
58
56
|
dataset = self.inputs.dataset
|
@@ -53,15 +53,14 @@ class ModelMetadata(Metric):
|
|
53
53
|
|
54
54
|
name = "model_metadata"
|
55
55
|
required_inputs = ["model"]
|
56
|
-
|
57
|
-
"
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
}
|
56
|
+
tasks = [
|
57
|
+
"classification",
|
58
|
+
"regression",
|
59
|
+
"text_classification",
|
60
|
+
"text_summarization",
|
61
|
+
]
|
62
|
+
|
63
|
+
tags = ["model_metadata"]
|
65
64
|
|
66
65
|
column_labels = {
|
67
66
|
"architecture": "Modeling Technique",
|
@@ -52,12 +52,8 @@ class RegressionResidualsPlot(Metric):
|
|
52
52
|
|
53
53
|
name = "regression_residuals_plot"
|
54
54
|
required_inputs = ["model", "dataset"]
|
55
|
-
|
56
|
-
|
57
|
-
"tags": [
|
58
|
-
"model_performance",
|
59
|
-
],
|
60
|
-
}
|
55
|
+
tasks = ["regression"]
|
56
|
+
tags = ["model_performance"]
|
61
57
|
default_params = {"bin_size": 0.1}
|
62
58
|
|
63
59
|
def run(self):
|
@@ -51,10 +51,8 @@ class ClusterDistribution(Metric):
|
|
51
51
|
default_params = {
|
52
52
|
"num_clusters": 5,
|
53
53
|
}
|
54
|
-
|
55
|
-
|
56
|
-
"tags": ["llm", "text_data", "text_embeddings", "visualization"],
|
57
|
-
}
|
54
|
+
tasks = ["feature_extraction"]
|
55
|
+
tags = ["llm", "text_data", "text_embeddings", "visualization"]
|
58
56
|
|
59
57
|
def run(self):
|
60
58
|
# run kmeans clustering on embeddings
|
@@ -50,10 +50,8 @@ class CosineSimilarityDistribution(Metric):
|
|
50
50
|
|
51
51
|
name = "Text Embeddings Cosine Similarity Distribution"
|
52
52
|
required_inputs = ["model", "dataset"]
|
53
|
-
|
54
|
-
|
55
|
-
"tags": ["llm", "text_data", "text_embeddings", "visualization"],
|
56
|
-
}
|
53
|
+
tasks = ["feature_extraction"]
|
54
|
+
tags = ["llm", "text_data", "text_embeddings", "visualization"]
|
57
55
|
|
58
56
|
def run(self):
|
59
57
|
# Compute cosine similarity
|
@@ -53,10 +53,8 @@ class DescriptiveAnalytics(Metric):
|
|
53
53
|
|
54
54
|
name = "Descriptive Analytics for Text Embeddings Models"
|
55
55
|
required_inputs = ["model", "dataset"]
|
56
|
-
|
57
|
-
|
58
|
-
"tags": ["llm", "text_data", "text_embeddings", "visualization"],
|
59
|
-
}
|
56
|
+
tasks = ["feature_extraction"]
|
57
|
+
tags = ["llm", "text_data", "text_embeddings", "visualization"]
|
60
58
|
|
61
59
|
def run(self):
|
62
60
|
# Assuming y_pred returns a 2D array of embeddings [samples, features]
|
@@ -53,10 +53,8 @@ class EmbeddingsVisualization2D(Metric):
|
|
53
53
|
"cluster_column": None,
|
54
54
|
"perplexity": 30,
|
55
55
|
}
|
56
|
-
|
57
|
-
|
58
|
-
"tags": ["llm", "text_data", "text_embeddings", "visualization"],
|
59
|
-
}
|
56
|
+
tasks = ["feature_extraction"]
|
57
|
+
tags = ["llm", "text_data", "text_embeddings", "visualization"]
|
60
58
|
|
61
59
|
def run(self):
|
62
60
|
cluster_column = self.params.get("cluster_column")
|
@@ -29,10 +29,8 @@ class StabilityAnalysis(ThresholdTest):
|
|
29
29
|
default_params = {
|
30
30
|
"mean_similarity_threshold": 0.7,
|
31
31
|
}
|
32
|
-
|
33
|
-
|
34
|
-
"tags": ["llm", "text_data", "text_embeddings", "visualization"],
|
35
|
-
}
|
32
|
+
tasks = ["feature_extraction"]
|
33
|
+
tags = ["llm", "text_data", "text_embeddings", "visualization"]
|
36
34
|
|
37
35
|
@abstractmethod
|
38
36
|
def perturb_data(self, data: str) -> str:
|
@@ -48,13 +48,11 @@ class AdjustedMutualInformation(ClusterPerformance):
|
|
48
48
|
|
49
49
|
name = "adjusted_mutual_information"
|
50
50
|
required_inputs = ["model", "datasets"]
|
51
|
-
|
52
|
-
|
53
|
-
"
|
54
|
-
|
55
|
-
|
56
|
-
],
|
57
|
-
}
|
51
|
+
tasks = ["clustering"]
|
52
|
+
tags = [
|
53
|
+
"sklearn",
|
54
|
+
"model_performance",
|
55
|
+
]
|
58
56
|
|
59
57
|
def metric_info(self):
|
60
58
|
return {"Adjusted Mutual Information": metrics.adjusted_mutual_info_score}
|
@@ -47,13 +47,11 @@ class AdjustedRandIndex(ClusterPerformance):
|
|
47
47
|
|
48
48
|
name = "adjusted_rand_index"
|
49
49
|
required_inputs = ["model", "datasets"]
|
50
|
-
|
51
|
-
|
52
|
-
"
|
53
|
-
|
54
|
-
|
55
|
-
],
|
56
|
-
}
|
50
|
+
tasks = ["clustering"]
|
51
|
+
tags = [
|
52
|
+
"sklearn",
|
53
|
+
"model_performance",
|
54
|
+
]
|
57
55
|
|
58
56
|
def metric_info(self):
|
59
57
|
return {"Adjusted Rand Index": metrics.adjusted_rand_score}
|
@@ -58,15 +58,13 @@ class ClassifierPerformance(Metric):
|
|
58
58
|
|
59
59
|
name = "classifier_performance"
|
60
60
|
required_inputs = ["model", "dataset"]
|
61
|
-
|
62
|
-
|
63
|
-
"
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
],
|
69
|
-
}
|
61
|
+
tasks = ["classification", "text_classification"]
|
62
|
+
tags = [
|
63
|
+
"sklearn",
|
64
|
+
"binary_classification",
|
65
|
+
"multiclass_classification",
|
66
|
+
"model_performance",
|
67
|
+
]
|
70
68
|
|
71
69
|
def summary(self, metric_value: dict):
|
72
70
|
"""
|
@@ -57,13 +57,11 @@ class ClusterCosineSimilarity(Metric):
|
|
57
57
|
|
58
58
|
name = "cluster_cosine_similarity"
|
59
59
|
required_inputs = ["model", "dataset"]
|
60
|
-
|
61
|
-
|
62
|
-
"
|
63
|
-
|
64
|
-
|
65
|
-
],
|
66
|
-
}
|
60
|
+
tasks = ["clustering"]
|
61
|
+
tags = [
|
62
|
+
"sklearn",
|
63
|
+
"model_performance",
|
64
|
+
]
|
67
65
|
|
68
66
|
def run(self):
|
69
67
|
y_true_train = self.inputs.dataset.y
|
@@ -51,13 +51,11 @@ class ClusterPerformance(Metric):
|
|
51
51
|
|
52
52
|
name = "cluster_performance_metrics"
|
53
53
|
required_inputs = ["model", "datasets"]
|
54
|
-
|
55
|
-
|
56
|
-
"
|
57
|
-
|
58
|
-
|
59
|
-
],
|
60
|
-
}
|
54
|
+
tasks = ["clustering"]
|
55
|
+
tags = [
|
56
|
+
"sklearn",
|
57
|
+
"model_performance",
|
58
|
+
]
|
61
59
|
|
62
60
|
def cluser_performance_metrics(
|
63
61
|
self, y_true_train, y_pred_train, y_true_test, y_pred_test, samples, metric_info
|
@@ -61,13 +61,8 @@ class ClusterPerformanceMetrics(ClusterPerformance):
|
|
61
61
|
|
62
62
|
name = "homogeneity_score"
|
63
63
|
required_inputs = ["model", "datasets"]
|
64
|
-
|
65
|
-
|
66
|
-
"tags": [
|
67
|
-
"sklearn",
|
68
|
-
"model_performance",
|
69
|
-
],
|
70
|
-
}
|
64
|
+
tasks = ["clustering"]
|
65
|
+
tags = ["sklearn", "model_performance"]
|
71
66
|
default_metrics = {
|
72
67
|
"Homogeneity Score": metrics.homogeneity_score,
|
73
68
|
"Completeness Score": metrics.completeness_score,
|
@@ -44,13 +44,11 @@ class CompletenessScore(ClusterPerformance):
|
|
44
44
|
|
45
45
|
name = "homogeneity_score"
|
46
46
|
required_inputs = ["model", "datasets"]
|
47
|
-
|
48
|
-
|
49
|
-
"
|
50
|
-
|
51
|
-
|
52
|
-
],
|
53
|
-
}
|
47
|
+
tasks = ["clustering"]
|
48
|
+
tags = [
|
49
|
+
"sklearn",
|
50
|
+
"model_performance",
|
51
|
+
]
|
54
52
|
|
55
53
|
def metric_info(self):
|
56
54
|
return {"Completeness Score": metrics.completeness_score}
|
@@ -55,16 +55,14 @@ class ConfusionMatrix(Metric):
|
|
55
55
|
|
56
56
|
name = "confusion_matrix"
|
57
57
|
required_inputs = ["model", "dataset"]
|
58
|
-
|
59
|
-
|
60
|
-
"
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
],
|
67
|
-
}
|
58
|
+
tasks = ["classification", "text_classification"]
|
59
|
+
tags = [
|
60
|
+
"sklearn",
|
61
|
+
"binary_classification",
|
62
|
+
"multiclass_classification",
|
63
|
+
"model_performance",
|
64
|
+
"visualization",
|
65
|
+
]
|
68
66
|
|
69
67
|
def run(self):
|
70
68
|
y_true = self.inputs.dataset.y
|
@@ -113,6 +111,17 @@ class ConfusionMatrix(Metric):
|
|
113
111
|
height=600,
|
114
112
|
)
|
115
113
|
|
114
|
+
# Add an annotation at the bottom of the heatmap
|
115
|
+
fig.add_annotation(
|
116
|
+
x=0.5,
|
117
|
+
y=-0.1,
|
118
|
+
xref="paper",
|
119
|
+
yref="paper",
|
120
|
+
text=f"Confusion Matrix for {self.inputs.model.input_id} on {self.inputs.dataset.input_id}",
|
121
|
+
showarrow=False,
|
122
|
+
font=dict(size=14),
|
123
|
+
)
|
124
|
+
|
116
125
|
return self.cache_results(
|
117
126
|
metric_value={
|
118
127
|
"confusion_matrix": cm,
|
@@ -55,13 +55,11 @@ class FowlkesMallowsScore(ClusterPerformance):
|
|
55
55
|
|
56
56
|
name = "fowlkes_mallows_score"
|
57
57
|
required_inputs = ["model", "datasets"]
|
58
|
-
|
59
|
-
|
60
|
-
"
|
61
|
-
|
62
|
-
|
63
|
-
],
|
64
|
-
}
|
58
|
+
tasks = ["clustering"]
|
59
|
+
tags = [
|
60
|
+
"sklearn",
|
61
|
+
"model_performance",
|
62
|
+
]
|
65
63
|
|
66
64
|
def metric_info(self):
|
67
65
|
return {"Fowlkes-Mallows score": metrics.fowlkes_mallows_score}
|
@@ -46,13 +46,11 @@ class HomogeneityScore(ClusterPerformance):
|
|
46
46
|
|
47
47
|
name = "homogeneity_score"
|
48
48
|
required_inputs = ["model", "datasets"]
|
49
|
-
|
50
|
-
|
51
|
-
"
|
52
|
-
|
53
|
-
|
54
|
-
],
|
55
|
-
}
|
49
|
+
tasks = ["clustering"]
|
50
|
+
tags = [
|
51
|
+
"sklearn",
|
52
|
+
"model_performance",
|
53
|
+
]
|
56
54
|
|
57
55
|
def metric_info(self):
|
58
56
|
return {"Homogeneity Score": metrics.homogeneity_score}
|
@@ -52,13 +52,8 @@ class HyperParametersTuning(Metric):
|
|
52
52
|
|
53
53
|
name = "hyper_parameters_tuning"
|
54
54
|
required_inputs = ["model", "dataset"]
|
55
|
-
|
56
|
-
|
57
|
-
"tags": [
|
58
|
-
"sklearn",
|
59
|
-
"model_performance",
|
60
|
-
],
|
61
|
-
}
|
55
|
+
tasks = ["classification", "clustering"]
|
56
|
+
tags = ["sklearn", "model_performance"]
|
62
57
|
default_params = {"param_grid": None, "scoring": None}
|
63
58
|
|
64
59
|
def run(self):
|
@@ -60,13 +60,10 @@ class KMeansClustersOptimization(Metric):
|
|
60
60
|
|
61
61
|
name = "clusters_optimize_elbow_method"
|
62
62
|
required_inputs = ["model", "dataset"]
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
}
|
67
|
-
default_params = {
|
68
|
-
"n_clusters": None,
|
69
|
-
}
|
63
|
+
tasks = ["clustering"]
|
64
|
+
tags = ["sklearn", "model_performance", "kmeans"]
|
65
|
+
|
66
|
+
default_params = {"n_clusters": None}
|
70
67
|
|
71
68
|
def run(self):
|
72
69
|
n_clusters = self.params["n_clusters"]
|
@@ -59,15 +59,13 @@ class MinimumAccuracy(ThresholdTest):
|
|
59
59
|
name = "accuracy_score"
|
60
60
|
required_inputs = ["model", "dataset"]
|
61
61
|
default_params = {"min_threshold": 0.7}
|
62
|
-
|
63
|
-
|
64
|
-
"
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
],
|
70
|
-
}
|
62
|
+
tasks = ["classification", "text_classification"]
|
63
|
+
tags = [
|
64
|
+
"sklearn",
|
65
|
+
"binary_classification",
|
66
|
+
"multiclass_classification",
|
67
|
+
"model_performance",
|
68
|
+
]
|
71
69
|
|
72
70
|
def summary(self, results: List[ThresholdTestResult], all_passed: bool):
|
73
71
|
"""
|
@@ -62,15 +62,13 @@ class MinimumF1Score(ThresholdTest):
|
|
62
62
|
name = "f1_score"
|
63
63
|
required_inputs = ["model", "dataset"]
|
64
64
|
default_params = {"min_threshold": 0.5}
|
65
|
-
|
66
|
-
|
67
|
-
"
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
],
|
73
|
-
}
|
65
|
+
tasks = ["classification", "text_classification"]
|
66
|
+
tags = [
|
67
|
+
"sklearn",
|
68
|
+
"binary_classification",
|
69
|
+
"multiclass_classification",
|
70
|
+
"model_performance",
|
71
|
+
]
|
74
72
|
|
75
73
|
def summary(self, results: List[ThresholdTestResult], all_passed: bool):
|
76
74
|
"""
|
@@ -59,15 +59,13 @@ class MinimumROCAUCScore(ThresholdTest):
|
|
59
59
|
name = "roc_auc_score"
|
60
60
|
required_inputs = ["model", "dataset"]
|
61
61
|
default_params = {"min_threshold": 0.5}
|
62
|
-
|
63
|
-
|
64
|
-
"
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
],
|
70
|
-
}
|
62
|
+
tasks = ["classification", "text_classification"]
|
63
|
+
tags = [
|
64
|
+
"sklearn",
|
65
|
+
"binary_classification",
|
66
|
+
"multiclass_classification",
|
67
|
+
"model_performance",
|
68
|
+
]
|
71
69
|
|
72
70
|
def summary(self, results: List[ThresholdTestResult], all_passed: bool):
|
73
71
|
"""
|
@@ -53,16 +53,14 @@ class ModelsPerformanceComparison(ClassifierPerformance):
|
|
53
53
|
|
54
54
|
name = "models_performance_comparison"
|
55
55
|
required_inputs = ["dataset", "models"]
|
56
|
-
|
57
|
-
|
58
|
-
"
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
],
|
65
|
-
}
|
56
|
+
tasks = ["classification", "text_classification"]
|
57
|
+
tags = [
|
58
|
+
"sklearn",
|
59
|
+
"binary_classification",
|
60
|
+
"multiclass_classification",
|
61
|
+
"model_performance",
|
62
|
+
"model_comparison",
|
63
|
+
]
|
66
64
|
|
67
65
|
def summary(self, metric_value: dict):
|
68
66
|
"""
|
@@ -67,15 +67,13 @@ class OverfitDiagnosis(ThresholdTest):
|
|
67
67
|
name = "overfit_regions"
|
68
68
|
required_inputs = ["model", "datasets"]
|
69
69
|
default_params = {"features_columns": None, "cut_off_percentage": 4}
|
70
|
-
|
71
|
-
|
72
|
-
"
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
],
|
78
|
-
}
|
70
|
+
tasks = ["classification", "text_classification"]
|
71
|
+
tags = [
|
72
|
+
"sklearn",
|
73
|
+
"binary_classification",
|
74
|
+
"multiclass_classification",
|
75
|
+
"model_diagnosis",
|
76
|
+
]
|
79
77
|
|
80
78
|
default_metrics = {
|
81
79
|
"accuracy": metrics.accuracy_score,
|
@@ -56,16 +56,14 @@ class PermutationFeatureImportance(Metric):
|
|
56
56
|
"fontsize": None,
|
57
57
|
"figure_height": 1000,
|
58
58
|
}
|
59
|
-
|
60
|
-
|
61
|
-
"
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
],
|
68
|
-
}
|
59
|
+
tasks = ["classification", "text_classification"]
|
60
|
+
tags = [
|
61
|
+
"sklearn",
|
62
|
+
"binary_classification",
|
63
|
+
"multiclass_classification",
|
64
|
+
"feature_importance",
|
65
|
+
"visualization",
|
66
|
+
]
|
69
67
|
|
70
68
|
def run(self):
|
71
69
|
x = self.inputs.dataset.x_df()
|
@@ -73,15 +73,13 @@ class PopulationStabilityIndex(Metric):
|
|
73
73
|
|
74
74
|
name = "psi"
|
75
75
|
required_inputs = ["model", "datasets"]
|
76
|
-
|
77
|
-
|
78
|
-
"
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
],
|
84
|
-
}
|
76
|
+
tasks = ["classification", "text_classification"]
|
77
|
+
tags = [
|
78
|
+
"sklearn",
|
79
|
+
"binary_classification",
|
80
|
+
"multiclass_classification",
|
81
|
+
"model_performance",
|
82
|
+
]
|
85
83
|
default_params = {
|
86
84
|
"num_bins": 10,
|
87
85
|
"mode": "fixed",
|