validmind 2.5.24__py3-none-any.whl → 2.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -17
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +66 -85
- validmind/ai/test_result_description/context.py +2 -2
- validmind/ai/utils.py +26 -1
- validmind/api_client.py +43 -79
- validmind/client.py +5 -7
- validmind/client_config.py +1 -1
- validmind/datasets/__init__.py +1 -1
- validmind/datasets/classification/customer_churn.py +7 -5
- validmind/datasets/nlp/__init__.py +2 -2
- validmind/errors.py +6 -10
- validmind/html_templates/content_blocks.py +18 -16
- validmind/logging.py +21 -16
- validmind/tests/__init__.py +28 -5
- validmind/tests/__types__.py +186 -170
- validmind/tests/_store.py +7 -21
- validmind/tests/comparison.py +362 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
- validmind/tests/data_validation/ADF.py +49 -83
- validmind/tests/data_validation/AutoAR.py +59 -96
- validmind/tests/data_validation/AutoMA.py +59 -96
- validmind/tests/data_validation/AutoStationarity.py +66 -114
- validmind/tests/data_validation/ClassImbalance.py +48 -117
- validmind/tests/data_validation/DatasetDescription.py +180 -209
- validmind/tests/data_validation/DatasetSplit.py +50 -75
- validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
- validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
- validmind/tests/data_validation/Duplicates.py +21 -90
- validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
- validmind/tests/data_validation/HighCardinality.py +32 -80
- validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
- validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
- validmind/tests/data_validation/IQROutliersTable.py +40 -80
- validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
- validmind/tests/data_validation/KPSS.py +33 -81
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
- validmind/tests/data_validation/MissingValues.py +17 -58
- validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
- validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
- validmind/tests/data_validation/RollingStatsPlot.py +50 -81
- validmind/tests/data_validation/SeasonalDecompose.py +102 -184
- validmind/tests/data_validation/Skewness.py +27 -64
- validmind/tests/data_validation/SpreadPlot.py +34 -57
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
- validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
- validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
- validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
- validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
- validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
- validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
- validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
- validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
- validmind/tests/data_validation/TooManyZeroValues.py +21 -70
- validmind/tests/data_validation/UniqueRows.py +23 -62
- validmind/tests/data_validation/WOEBinPlots.py +83 -109
- validmind/tests/data_validation/WOEBinTable.py +28 -69
- validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
- validmind/tests/data_validation/nlp/CommonWords.py +49 -57
- validmind/tests/data_validation/nlp/Hashtags.py +27 -49
- validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
- validmind/tests/data_validation/nlp/Mentions.py +32 -63
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
- validmind/tests/data_validation/nlp/Punctuations.py +63 -47
- validmind/tests/data_validation/nlp/Sentiment.py +4 -0
- validmind/tests/data_validation/nlp/StopWords.py +62 -91
- validmind/tests/data_validation/nlp/TextDescription.py +116 -159
- validmind/tests/data_validation/nlp/Toxicity.py +12 -4
- validmind/tests/decorator.py +33 -242
- validmind/tests/load.py +212 -153
- validmind/tests/model_validation/BertScore.py +13 -7
- validmind/tests/model_validation/BleuScore.py +4 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/FeaturesAUC.py +43 -74
- validmind/tests/model_validation/MeteorScore.py +3 -0
- validmind/tests/model_validation/RegardScore.py +5 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
- validmind/tests/model_validation/embeddings/utils.py +53 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
- validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
- validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
- validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
- validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
- validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
- validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
- validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
- validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
- validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
- validmind/tests/output.py +120 -0
- validmind/tests/prompt_validation/Bias.py +55 -98
- validmind/tests/prompt_validation/Clarity.py +56 -99
- validmind/tests/prompt_validation/Conciseness.py +63 -101
- validmind/tests/prompt_validation/Delimitation.py +48 -89
- validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
- validmind/tests/prompt_validation/Robustness.py +80 -121
- validmind/tests/prompt_validation/Specificity.py +61 -95
- validmind/tests/prompt_validation/ai_powered_test.py +2 -2
- validmind/tests/run.py +314 -496
- validmind/tests/test_providers.py +109 -79
- validmind/tests/utils.py +91 -0
- validmind/unit_metrics/__init__.py +16 -155
- validmind/unit_metrics/classification/F1.py +1 -0
- validmind/unit_metrics/classification/Precision.py +1 -0
- validmind/unit_metrics/classification/ROC_AUC.py +1 -0
- validmind/unit_metrics/classification/Recall.py +1 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
- validmind/unit_metrics/regression/HuberLoss.py +1 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
- validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
- validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
- validmind/unit_metrics/regression/QuantileLoss.py +1 -0
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
- validmind/utils.py +66 -17
- validmind/vm_models/__init__.py +2 -17
- validmind/vm_models/dataset/dataset.py +31 -4
- validmind/vm_models/figure.py +7 -37
- validmind/vm_models/model.py +3 -0
- validmind/vm_models/result/__init__.py +7 -0
- validmind/vm_models/result/result.jinja +21 -0
- validmind/vm_models/result/result.py +337 -0
- validmind/vm_models/result/utils.py +160 -0
- validmind/vm_models/test_suite/runner.py +16 -54
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +43 -77
- validmind/vm_models/test_suite/test_suite.py +8 -40
- validmind-2.6.7.dist-info/METADATA +137 -0
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
- validmind/tests/data_validation/AutoSeasonality.py +0 -190
- validmind/tests/metadata.py +0 -59
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
- validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
- validmind/unit_metrics/composite.py +0 -238
- validmind/vm_models/test/metric.py +0 -98
- validmind/vm_models/test/metric_result.py +0 -61
- validmind/vm_models/test/output_template.py +0 -55
- validmind/vm_models/test/result_summary.py +0 -76
- validmind/vm_models/test/result_wrapper.py +0 -488
- validmind/vm_models/test/test.py +0 -103
- validmind/vm_models/test/threshold_test.py +0 -106
- validmind/vm_models/test/threshold_test_result.py +0 -75
- validmind/vm_models/test_context.py +0 -259
- validmind-2.5.24.dist-info/METADATA +0 -118
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
- {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -2,15 +2,15 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from
|
5
|
+
from sklearn.metrics import adjusted_rand_score
|
6
6
|
|
7
|
-
from
|
7
|
+
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
|
-
from .ClusterPerformance import ClusterPerformance
|
10
10
|
|
11
|
-
|
12
|
-
@
|
13
|
-
|
11
|
+
@tags("sklearn", "model_performance", "clustering")
|
12
|
+
@tasks("clustering")
|
13
|
+
def AdjustedRandIndex(model: VMModel, dataset: VMDataset):
|
14
14
|
"""
|
15
15
|
Measures the similarity between two data clusters using the Adjusted Rand Index (ARI) metric in clustering machine
|
16
16
|
learning models.
|
@@ -49,14 +49,11 @@ class AdjustedRandIndex(ClusterPerformance):
|
|
49
49
|
- It may be difficult to interpret the implications of an ARI score without context or a benchmark, as it is
|
50
50
|
heavily dependent on the characteristics of the dataset used.
|
51
51
|
"""
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
52
|
+
return [
|
53
|
+
{
|
54
|
+
"Adjusted Rand Index": adjusted_rand_score(
|
55
|
+
labels_true=dataset.y,
|
56
|
+
labels_pred=dataset.y_pred(model),
|
57
|
+
)
|
58
|
+
}
|
59
59
|
]
|
60
|
-
|
61
|
-
def metric_info(self):
|
62
|
-
return {"Adjusted Rand Index": metrics.adjusted_rand_score}
|
@@ -2,24 +2,25 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
5
|
import numpy as np
|
8
6
|
from sklearn.metrics import classification_report, roc_auc_score
|
9
7
|
from sklearn.preprocessing import LabelBinarizer
|
10
8
|
|
11
|
-
from validmind
|
9
|
+
from validmind import tags, tasks
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
12
11
|
|
13
12
|
|
14
13
|
def multiclass_roc_auc_score(y_test, y_pred, average="macro"):
|
15
14
|
lb = LabelBinarizer()
|
16
15
|
lb.fit(y_test)
|
17
|
-
|
18
16
|
return roc_auc_score(lb.transform(y_test), lb.transform(y_pred), average=average)
|
19
17
|
|
20
18
|
|
21
|
-
@
|
22
|
-
|
19
|
+
@tags(
|
20
|
+
"sklearn", "binary_classification", "multiclass_classification", "model_performance"
|
21
|
+
)
|
22
|
+
@tasks("classification", "text_classification")
|
23
|
+
def ClassifierPerformance(dataset: VMDataset, model: VMModel, average: str = "macro"):
|
23
24
|
"""
|
24
25
|
Evaluates performance of binary or multiclass classification models using precision, recall, F1-Score, accuracy,
|
25
26
|
and ROC AUC scores.
|
@@ -57,92 +58,53 @@ class ClassifierPerformance(Metric):
|
|
57
58
|
- Specifically designed for classification models and not suitable for regression models.
|
58
59
|
- May provide limited insights if the test dataset does not represent real-world scenarios adequately.
|
59
60
|
"""
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
61
|
+
y_pred = dataset.y_pred(model)
|
62
|
+
y_true = dataset.y
|
63
|
+
|
64
|
+
labels = np.unique(y_true)
|
65
|
+
labels = sorted(labels.tolist())
|
66
|
+
|
67
|
+
report = classification_report(
|
68
|
+
y_true=y_true,
|
69
|
+
y_pred=y_pred,
|
70
|
+
output_dict=True,
|
71
|
+
zero_division=0,
|
72
|
+
)
|
73
|
+
|
74
|
+
if len(labels) > 2:
|
75
|
+
y_true = y_true.astype(y_pred.dtype)
|
76
|
+
roc_auc = multiclass_roc_auc_score(y_true, y_pred, average=average)
|
77
|
+
else:
|
78
|
+
y_prob = dataset.y_prob(model)
|
79
|
+
y_true = y_true.astype(y_prob.dtype).flatten()
|
80
|
+
roc_auc = roc_auc_score(y_true, y_prob, average=average)
|
81
|
+
|
82
|
+
report["roc_auc"] = roc_auc
|
83
|
+
|
84
|
+
pr_f1_table = [
|
85
|
+
{
|
86
|
+
"Class": f"{class_name}",
|
87
|
+
"Precision": report[f"{class_name}"]["precision"],
|
88
|
+
"Recall": report[f"{class_name}"]["recall"],
|
89
|
+
"F1": report[f"{class_name}"]["f1-score"],
|
90
|
+
}
|
91
|
+
for class_name in labels
|
69
92
|
]
|
70
|
-
default_params = {"average": "macro"}
|
71
|
-
|
72
|
-
def summary(self, metric_value: dict):
|
73
|
-
"""
|
74
|
-
When building a multi-class summary we need to calculate weighted average,
|
75
|
-
macro average and per class metrics.
|
76
|
-
"""
|
77
|
-
classes = {str(i) for i in np.unique(self.inputs.dataset.y)}
|
78
|
-
pr_f1_table = [
|
79
|
-
{
|
80
|
-
"Class": class_name,
|
81
|
-
"Precision": metric_value[class_name]["precision"],
|
82
|
-
"Recall": metric_value[class_name]["recall"],
|
83
|
-
"F1": metric_value[class_name]["f1-score"],
|
84
|
-
}
|
85
|
-
for class_name in classes
|
86
|
-
]
|
87
|
-
pr_f1_table.extend(
|
88
|
-
[
|
89
|
-
{
|
90
|
-
"Class": "Weighted Average",
|
91
|
-
"Precision": metric_value["weighted avg"]["precision"],
|
92
|
-
"Recall": metric_value["weighted avg"]["recall"],
|
93
|
-
"F1": metric_value["weighted avg"]["f1-score"],
|
94
|
-
},
|
95
|
-
{
|
96
|
-
"Class": "Macro Average",
|
97
|
-
"Precision": metric_value["macro avg"]["precision"],
|
98
|
-
"Recall": metric_value["macro avg"]["recall"],
|
99
|
-
"F1": metric_value["macro avg"]["f1-score"],
|
100
|
-
},
|
101
|
-
]
|
102
|
-
)
|
103
93
|
|
104
|
-
|
94
|
+
for avg in ["weighted avg", "macro avg"]:
|
95
|
+
pr_f1_table.append(
|
105
96
|
{
|
106
|
-
"
|
107
|
-
"
|
97
|
+
"Class": avg.replace("avg", "Average").title(),
|
98
|
+
"Precision": report[avg]["precision"],
|
99
|
+
"Recall": report[avg]["recall"],
|
100
|
+
"F1": report[avg]["f1-score"],
|
108
101
|
}
|
109
|
-
for metric_name in ["accuracy", "roc_auc"]
|
110
|
-
]
|
111
|
-
|
112
|
-
return ResultSummary(
|
113
|
-
results=[
|
114
|
-
ResultTable(
|
115
|
-
data=pr_f1_table,
|
116
|
-
metadata=ResultTableMetadata(title="Precision, Recall, and F1"),
|
117
|
-
),
|
118
|
-
ResultTable(
|
119
|
-
data=acc_roc_auc_table,
|
120
|
-
metadata=ResultTableMetadata(title="Accuracy and ROC AUC"),
|
121
|
-
),
|
122
|
-
]
|
123
102
|
)
|
124
103
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
y_true = self.inputs.dataset.y
|
134
|
-
|
135
|
-
if len(np.unique(y_true)) > 2:
|
136
|
-
y_pred = self.inputs.dataset.y_pred(self.inputs.model)
|
137
|
-
y_true = y_true.astype(y_pred.dtype)
|
138
|
-
roc_auc = multiclass_roc_auc_score(
|
139
|
-
y_true, y_pred, average=self.params["average"]
|
140
|
-
)
|
141
|
-
else:
|
142
|
-
y_prob = self.inputs.dataset.y_prob(self.inputs.model)
|
143
|
-
y_true = y_true.astype(y_prob.dtype).flatten()
|
144
|
-
roc_auc = roc_auc_score(y_true, y_prob, average=self.params["average"])
|
145
|
-
|
146
|
-
report["roc_auc"] = roc_auc
|
147
|
-
|
148
|
-
return self.cache_results(report)
|
104
|
+
return {
|
105
|
+
"Precision, Recall, and F1": pr_f1_table,
|
106
|
+
"Accuracy and ROC AUC": [
|
107
|
+
{"Metric": m, "Value": report[k]}
|
108
|
+
for m, k in [("Accuracy", "accuracy"), ("ROC AUC", "roc_auc")]
|
109
|
+
],
|
110
|
+
}
|
@@ -2,17 +2,17 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
5
|
import numpy as np
|
8
|
-
import pandas as pd
|
9
6
|
from sklearn.metrics.pairwise import cosine_similarity
|
10
7
|
|
11
|
-
from validmind
|
8
|
+
from validmind import tags, tasks
|
9
|
+
from validmind.errors import SkipTestError
|
10
|
+
from validmind.vm_models import VMDataset, VMModel
|
12
11
|
|
13
12
|
|
14
|
-
@
|
15
|
-
|
13
|
+
@tags("sklearn", "model_performance", "clustering")
|
14
|
+
@tasks("clustering")
|
15
|
+
def ClusterCosineSimilarity(model: VMModel, dataset: VMDataset):
|
16
16
|
"""
|
17
17
|
Measures the intra-cluster similarity of a clustering model using cosine similarity.
|
18
18
|
|
@@ -56,59 +56,29 @@ class ClusterCosineSimilarity(Metric):
|
|
56
56
|
- Lastly, although rare, perfect perpendicular vectors (cosine similarity = 0) could be within the same cluster,
|
57
57
|
which may give an inaccurate representation of a 'bad' cluster due to low cosine similarity score.
|
58
58
|
"""
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
cluster_data = self.inputs.dataset.x[cluster_mask]
|
78
|
-
if cluster_data.size != 0:
|
79
|
-
# Compute the centroid of the cluster
|
80
|
-
cluster_centroid = np.mean(cluster_data, axis=0)
|
81
|
-
# Compute cosine similarities between the centroid and data points in the cluster
|
82
|
-
cosine_similarities = cosine_similarity(
|
83
|
-
cluster_data, [cluster_centroid]
|
84
|
-
)
|
85
|
-
# Extract cosine similarity values for each data point in the cluster
|
86
|
-
cosine_similarities = cosine_similarities.flatten()
|
87
|
-
results.append(
|
88
|
-
{
|
89
|
-
"Cluster": cluster_id,
|
90
|
-
"Mean Cosine Similarity": np.mean(cosine_similarities),
|
91
|
-
}
|
92
|
-
)
|
93
|
-
return self.cache_results(
|
94
|
-
{
|
95
|
-
"cosine_similarity": pd.DataFrame(results).to_dict(orient="records"),
|
96
|
-
}
|
97
|
-
)
|
98
|
-
|
99
|
-
def summary(self, metric_value):
|
100
|
-
"""
|
101
|
-
Build one table for summarizing the cluster cosine similarity results
|
102
|
-
"""
|
103
|
-
summary_regression = metric_value["cosine_similarity"]
|
104
|
-
|
105
|
-
return ResultSummary(
|
106
|
-
results=[
|
107
|
-
ResultTable(
|
108
|
-
data=summary_regression,
|
109
|
-
metadata=ResultTableMetadata(
|
110
|
-
title="Cluster Cosine Similarity Results"
|
59
|
+
y_pred = dataset.y_pred(model)
|
60
|
+
num_clusters = len(np.unique(y_pred))
|
61
|
+
|
62
|
+
table = []
|
63
|
+
|
64
|
+
for cluster_idx in range(num_clusters):
|
65
|
+
cluster_data = dataset.x[y_pred == cluster_idx]
|
66
|
+
|
67
|
+
if cluster_data.size != 0:
|
68
|
+
cluster_centroid = np.mean(cluster_data, axis=0)
|
69
|
+
table.append(
|
70
|
+
{
|
71
|
+
"Cluster": cluster_idx,
|
72
|
+
"Mean Cosine Similarity": np.mean(
|
73
|
+
cosine_similarity(
|
74
|
+
X=cluster_data,
|
75
|
+
Y=[cluster_centroid],
|
76
|
+
).flatten()
|
111
77
|
),
|
112
|
-
|
113
|
-
|
114
|
-
|
78
|
+
}
|
79
|
+
)
|
80
|
+
|
81
|
+
if not table:
|
82
|
+
raise SkipTestError("No clusters found")
|
83
|
+
|
84
|
+
return table
|
@@ -2,17 +2,74 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
5
|
+
from sklearn.metrics import (
|
6
|
+
adjusted_mutual_info_score,
|
7
|
+
adjusted_rand_score,
|
8
|
+
completeness_score,
|
9
|
+
fowlkes_mallows_score,
|
10
|
+
homogeneity_score,
|
11
|
+
v_measure_score,
|
12
|
+
)
|
13
|
+
|
14
|
+
from validmind import tags, tasks
|
15
|
+
from validmind.vm_models import VMDataset, VMModel
|
16
|
+
|
17
|
+
HOMOGENEITY = """
|
18
|
+
The homogeneity score is a clustering evaluation metric that quantifies the degree to which each cluster within a
|
19
|
+
clustering solution contains only data points that belong to a single true class or category. It provides a score
|
20
|
+
within the range of 0 to 1, where a higher homogeneity score indicates that the clusters are more pure and internally
|
21
|
+
consistent with respect to the ground truth labels, meaning that the data points within each cluster are closely related
|
22
|
+
in terms of their actual class membership.
|
23
|
+
"""
|
24
|
+
|
25
|
+
COMPLETENESS = """
|
26
|
+
The completeness score is a clustering evaluation metric used to assess how well a clustering solution captures all data points
|
27
|
+
that belong to a single true class or category. It quantifies the extent to which the data points of a given class are
|
28
|
+
grouped into a single cluster. The completeness score ranges from 0 to 1, with a higher score indicating that the clustering
|
29
|
+
solution effectively accounts for all data points within their actual class, emphasizing the comprehensiveness of the
|
30
|
+
clustering results with respect to the ground truth labels.
|
31
|
+
"""
|
32
|
+
|
33
|
+
V_MEASURE = """
|
34
|
+
The V-Measure score is a clustering evaluation metric that combines both homogeneity and completeness to provide a
|
35
|
+
single measure of the overall quality of a clustering solution. It takes into account how well clusters are internally
|
36
|
+
coherent (homogeneity) and how well they capture all data points from the true classes (completeness). The V-Measure
|
37
|
+
score ranges from 0 to 1, where a higher score indicates a better clustering result. It balances the trade-off between
|
38
|
+
cluster purity and the extent to which all data points from true classes are captured, offering a comprehensive evaluation
|
39
|
+
of the clustering performance.
|
40
|
+
"""
|
41
|
+
ADJUSTED_RAND_INDEX = """
|
42
|
+
The Adjusted Rand Index (ARI) is a clustering evaluation metric used to measure the
|
43
|
+
similarity between the cluster assignments in a clustering solution and the true class labels. It calculates a
|
44
|
+
score that ranges from -1 to 1, with a higher score indicating a better clustering result. A score of 1 signifies
|
45
|
+
perfect agreement between the clustering and the ground truth, while a score near 0 implies that the clustering
|
46
|
+
is random with respect to the true labels, and negative values indicate disagreement. ARI accounts for chance
|
47
|
+
clustering, making it a robust measure for assessing the quality of clustering solutions by considering both the
|
48
|
+
extent of agreement and potential randomness in the assignments.
|
49
|
+
"""
|
50
|
+
|
51
|
+
ADJUSTED_MUTUAL_INFORMATION = """
|
52
|
+
The Adjusted Mutual Information (AMI) is a clustering evaluation metric used to quantify the degree of
|
53
|
+
agreement between a clustering solution and the true class labels. It provides a score that ranges from 0 to 1,
|
54
|
+
with a higher score indicating a better clustering result. A score of 1 signifies perfect agreement,
|
55
|
+
while a score of 0 suggests that the clustering is random with respect to the true labels. AMI takes into account the
|
56
|
+
potential randomness in the assignments and adjusts for chance, making it a robust measure that considers both the
|
57
|
+
extent of agreement and the potential for random clustering.
|
58
|
+
"""
|
59
|
+
|
60
|
+
FOULKES_MALLOWS_SCORE = """
|
61
|
+
The Fowlkes-Mallows score is a clustering evaluation metric used to assess the quality of
|
62
|
+
a clustering solution by measuring the geometric mean of two fundamental clustering metrics: precision and recall. It
|
63
|
+
provides a score that ranges from 0 to 1, where a higher score indicates a better clustering result. A score of 1 signifies
|
64
|
+
perfect agreement with the true class labels, while lower scores suggest less precise and recall clustering performance.
|
65
|
+
The Fowlkes-Mallows score offers a balanced evaluation of clustering quality by considering both the ability to correctly
|
66
|
+
identify members of the same class (precision) and the ability to capture all members of the same class (recall).
|
67
|
+
"""
|
68
|
+
|
69
|
+
|
70
|
+
@tags("sklearn", "model_performance", "clustering")
|
71
|
+
@tasks("clustering")
|
72
|
+
def ClusterPerformanceMetrics(model: VMModel, dataset: VMDataset):
|
16
73
|
"""
|
17
74
|
Evaluates the performance of clustering machine learning models using multiple established metrics.
|
18
75
|
|
@@ -58,75 +115,53 @@ class ClusterPerformanceMetrics(ClusterPerformance):
|
|
58
115
|
- Does not consider aspects like computational efficiency of the model or its capability to handle high dimensional
|
59
116
|
data.
|
60
117
|
"""
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
The Fowlkes-Mallows score offers a balanced evaluation of clustering quality by considering both the ability to correctly
|
112
|
-
identify members of the same class (precision) and the ability to capture all members of the same class (recall).""",
|
113
|
-
}
|
114
|
-
|
115
|
-
def summary(self, raw_results):
|
116
|
-
"""
|
117
|
-
Returns a summarized representation of the dataset split information
|
118
|
-
"""
|
119
|
-
table_records = []
|
120
|
-
for result in raw_results:
|
121
|
-
for key, _ in result.items():
|
122
|
-
table_records.append(
|
123
|
-
{
|
124
|
-
"Description": self.default_metrics_desc[key],
|
125
|
-
key: result[key],
|
126
|
-
}
|
127
|
-
)
|
128
|
-
|
129
|
-
return ResultSummary(results=[ResultTable(data=table_records)])
|
130
|
-
|
131
|
-
def metric_info(self):
|
132
|
-
return self.default_metrics
|
118
|
+
return [
|
119
|
+
{
|
120
|
+
"Metric": "Homogeneity Score",
|
121
|
+
"Description": HOMOGENEITY,
|
122
|
+
"Value": homogeneity_score(
|
123
|
+
labels_true=dataset.y,
|
124
|
+
labels_pred=dataset.y_pred(model),
|
125
|
+
),
|
126
|
+
},
|
127
|
+
{
|
128
|
+
"Metric": "Completeness Score",
|
129
|
+
"Description": COMPLETENESS,
|
130
|
+
"Value": completeness_score(
|
131
|
+
labels_true=dataset.y,
|
132
|
+
labels_pred=dataset.y_pred(model),
|
133
|
+
),
|
134
|
+
},
|
135
|
+
{
|
136
|
+
"Metric": "V Measure",
|
137
|
+
"Description": V_MEASURE,
|
138
|
+
"Value": v_measure_score(
|
139
|
+
labels_true=dataset.y,
|
140
|
+
labels_pred=dataset.y_pred(model),
|
141
|
+
),
|
142
|
+
},
|
143
|
+
{
|
144
|
+
"Metric": "Adjusted Rand Index",
|
145
|
+
"Description": ADJUSTED_RAND_INDEX,
|
146
|
+
"Value": adjusted_rand_score(
|
147
|
+
labels_true=dataset.y,
|
148
|
+
labels_pred=dataset.y_pred(model),
|
149
|
+
),
|
150
|
+
},
|
151
|
+
{
|
152
|
+
"Metric": "Adjusted Mutual Information",
|
153
|
+
"Description": ADJUSTED_MUTUAL_INFORMATION,
|
154
|
+
"Value": adjusted_mutual_info_score(
|
155
|
+
labels_true=dataset.y,
|
156
|
+
labels_pred=dataset.y_pred(model),
|
157
|
+
),
|
158
|
+
},
|
159
|
+
{
|
160
|
+
"Metric": "Fowlkes-Mallows score",
|
161
|
+
"Description": FOULKES_MALLOWS_SCORE,
|
162
|
+
"Value": fowlkes_mallows_score(
|
163
|
+
labels_true=dataset.y,
|
164
|
+
labels_pred=dataset.y_pred(model),
|
165
|
+
),
|
166
|
+
},
|
167
|
+
]
|
@@ -2,15 +2,15 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from
|
5
|
+
from sklearn.metrics import completeness_score
|
6
6
|
|
7
|
-
from
|
7
|
+
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
8
9
|
|
9
|
-
from .ClusterPerformance import ClusterPerformance
|
10
10
|
|
11
|
-
|
12
|
-
@
|
13
|
-
|
11
|
+
@tags("sklearn", "model_performance", "clustering")
|
12
|
+
@tasks("clustering")
|
13
|
+
def CompletenessScore(model: VMModel, dataset: VMDataset):
|
14
14
|
"""
|
15
15
|
Evaluates a clustering model's capacity to categorize instances from a single class into the same cluster.
|
16
16
|
|
@@ -47,14 +47,11 @@ class CompletenessScore(ClusterPerformance):
|
|
47
47
|
- The Completeness Score only applies to clustering models; it cannot be used for other types of machine learning
|
48
48
|
models.
|
49
49
|
"""
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
50
|
+
return [
|
51
|
+
{
|
52
|
+
"Completeness Score": completeness_score(
|
53
|
+
labels_true=dataset.y,
|
54
|
+
labels_pred=dataset.y_pred(model),
|
55
|
+
)
|
56
|
+
}
|
57
57
|
]
|
58
|
-
|
59
|
-
def metric_info(self):
|
60
|
-
return {"Completeness Score": metrics.completeness_score}
|