validmind 2.5.25__py3-none-any.whl → 2.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -17
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +66 -85
- validmind/ai/test_result_description/context.py +2 -2
- validmind/ai/utils.py +26 -1
- validmind/api_client.py +43 -79
- validmind/client.py +5 -7
- validmind/client_config.py +1 -1
- validmind/datasets/__init__.py +1 -1
- validmind/datasets/classification/customer_churn.py +7 -5
- validmind/datasets/nlp/__init__.py +2 -2
- validmind/errors.py +6 -10
- validmind/html_templates/content_blocks.py +18 -16
- validmind/logging.py +21 -16
- validmind/tests/__init__.py +28 -5
- validmind/tests/__types__.py +186 -170
- validmind/tests/_store.py +7 -21
- validmind/tests/comparison.py +362 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
- validmind/tests/data_validation/ADF.py +49 -83
- validmind/tests/data_validation/AutoAR.py +59 -96
- validmind/tests/data_validation/AutoMA.py +59 -96
- validmind/tests/data_validation/AutoStationarity.py +66 -114
- validmind/tests/data_validation/ClassImbalance.py +48 -117
- validmind/tests/data_validation/DatasetDescription.py +180 -209
- validmind/tests/data_validation/DatasetSplit.py +50 -75
- validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
- validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
- validmind/tests/data_validation/Duplicates.py +21 -90
- validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
- validmind/tests/data_validation/HighCardinality.py +32 -80
- validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
- validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
- validmind/tests/data_validation/IQROutliersTable.py +40 -80
- validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
- validmind/tests/data_validation/KPSS.py +33 -81
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
- validmind/tests/data_validation/MissingValues.py +17 -58
- validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
- validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
- validmind/tests/data_validation/RollingStatsPlot.py +50 -81
- validmind/tests/data_validation/SeasonalDecompose.py +102 -184
- validmind/tests/data_validation/Skewness.py +27 -64
- validmind/tests/data_validation/SpreadPlot.py +34 -57
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
- validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
- validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
- validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
- validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
- validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
- validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
- validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
- validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
- validmind/tests/data_validation/TooManyZeroValues.py +21 -70
- validmind/tests/data_validation/UniqueRows.py +23 -62
- validmind/tests/data_validation/WOEBinPlots.py +83 -109
- validmind/tests/data_validation/WOEBinTable.py +28 -69
- validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
- validmind/tests/data_validation/nlp/CommonWords.py +49 -57
- validmind/tests/data_validation/nlp/Hashtags.py +27 -49
- validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
- validmind/tests/data_validation/nlp/Mentions.py +32 -63
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
- validmind/tests/data_validation/nlp/Punctuations.py +63 -47
- validmind/tests/data_validation/nlp/Sentiment.py +4 -0
- validmind/tests/data_validation/nlp/StopWords.py +62 -91
- validmind/tests/data_validation/nlp/TextDescription.py +116 -159
- validmind/tests/data_validation/nlp/Toxicity.py +12 -4
- validmind/tests/decorator.py +33 -242
- validmind/tests/load.py +212 -153
- validmind/tests/model_validation/BertScore.py +13 -7
- validmind/tests/model_validation/BleuScore.py +4 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/FeaturesAUC.py +43 -74
- validmind/tests/model_validation/MeteorScore.py +3 -0
- validmind/tests/model_validation/RegardScore.py +5 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
- validmind/tests/model_validation/embeddings/utils.py +53 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
- validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
- validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
- validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
- validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
- validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
- validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
- validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
- validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
- validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
- validmind/tests/output.py +120 -0
- validmind/tests/prompt_validation/Bias.py +55 -98
- validmind/tests/prompt_validation/Clarity.py +56 -99
- validmind/tests/prompt_validation/Conciseness.py +63 -101
- validmind/tests/prompt_validation/Delimitation.py +48 -89
- validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
- validmind/tests/prompt_validation/Robustness.py +80 -121
- validmind/tests/prompt_validation/Specificity.py +61 -95
- validmind/tests/prompt_validation/ai_powered_test.py +2 -2
- validmind/tests/run.py +314 -496
- validmind/tests/test_providers.py +109 -79
- validmind/tests/utils.py +91 -0
- validmind/unit_metrics/__init__.py +16 -155
- validmind/unit_metrics/classification/F1.py +1 -0
- validmind/unit_metrics/classification/Precision.py +1 -0
- validmind/unit_metrics/classification/ROC_AUC.py +1 -0
- validmind/unit_metrics/classification/Recall.py +1 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
- validmind/unit_metrics/regression/HuberLoss.py +1 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
- validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
- validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
- validmind/unit_metrics/regression/QuantileLoss.py +1 -0
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
- validmind/utils.py +66 -17
- validmind/vm_models/__init__.py +2 -17
- validmind/vm_models/dataset/dataset.py +31 -4
- validmind/vm_models/figure.py +7 -37
- validmind/vm_models/model.py +3 -0
- validmind/vm_models/result/__init__.py +7 -0
- validmind/vm_models/result/result.jinja +21 -0
- validmind/vm_models/result/result.py +337 -0
- validmind/vm_models/result/utils.py +160 -0
- validmind/vm_models/test_suite/runner.py +16 -54
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +43 -77
- validmind/vm_models/test_suite/test_suite.py +8 -40
- validmind-2.6.8.dist-info/METADATA +137 -0
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/RECORD +182 -189
- validmind/tests/data_validation/AutoSeasonality.py +0 -190
- validmind/tests/metadata.py +0 -59
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
- validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
- validmind/unit_metrics/composite.py +0 -238
- validmind/vm_models/test/metric.py +0 -98
- validmind/vm_models/test/metric_result.py +0 -61
- validmind/vm_models/test/output_template.py +0 -55
- validmind/vm_models/test/result_summary.py +0 -76
- validmind/vm_models/test/result_wrapper.py +0 -488
- validmind/vm_models/test/test.py +0 -103
- validmind/vm_models/test/threshold_test.py +0 -106
- validmind/vm_models/test/threshold_test_result.py +0 -75
- validmind/vm_models/test_context.py +0 -259
- validmind-2.5.25.dist-info/METADATA +0 -118
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/LICENSE +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/WHEEL +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/entry_points.txt +0 -0
@@ -2,17 +2,24 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
5
|
|
7
6
|
import numpy as np
|
8
7
|
import plotly.figure_factory as ff
|
9
|
-
from sklearn import
|
8
|
+
from sklearn.metrics import confusion_matrix
|
10
9
|
|
11
|
-
from validmind
|
10
|
+
from validmind import tags, tasks
|
11
|
+
from validmind.vm_models import VMDataset, VMModel
|
12
12
|
|
13
13
|
|
14
|
-
@
|
15
|
-
|
14
|
+
@tags(
|
15
|
+
"sklearn",
|
16
|
+
"binary_classification",
|
17
|
+
"multiclass_classification",
|
18
|
+
"model_performance",
|
19
|
+
"visualization",
|
20
|
+
)
|
21
|
+
@tasks("classification", "text_classification")
|
22
|
+
def ConfusionMatrix(dataset: VMDataset, model: VMModel):
|
16
23
|
"""
|
17
24
|
Evaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix
|
18
25
|
heatmap.
|
@@ -59,95 +66,56 @@ class ConfusionMatrix(Metric):
|
|
59
66
|
- Risks of misinterpretation exist because the matrix doesn't directly provide precision, recall, or F1-score data.
|
60
67
|
These metrics have to be computed separately.
|
61
68
|
"""
|
69
|
+
y_pred = dataset.y_pred(model)
|
70
|
+
y_true = dataset.y.astype(y_pred.dtype)
|
62
71
|
|
63
|
-
|
64
|
-
|
65
|
-
tasks = ["classification", "text_classification"]
|
66
|
-
tags = [
|
67
|
-
"sklearn",
|
68
|
-
"binary_classification",
|
69
|
-
"multiclass_classification",
|
70
|
-
"model_performance",
|
71
|
-
"visualization",
|
72
|
-
]
|
73
|
-
|
74
|
-
def run(self):
|
75
|
-
y_true = self.inputs.dataset.y
|
76
|
-
labels = np.unique(y_true)
|
77
|
-
labels.sort()
|
78
|
-
labels = np.array(labels).T.tolist()
|
79
|
-
|
80
|
-
y_pred = self.inputs.dataset.y_pred(self.inputs.model)
|
81
|
-
y_true = y_true.astype(y_pred.dtype)
|
82
|
-
|
83
|
-
cm = metrics.confusion_matrix(y_true, y_pred, labels=labels)
|
84
|
-
|
85
|
-
text = None
|
86
|
-
if len(labels) == 2:
|
87
|
-
tn, fp, fn, tp = cm.ravel()
|
88
|
-
|
89
|
-
# Custom text to display on the heatmap cells
|
90
|
-
text = [
|
91
|
-
[
|
92
|
-
f"<b>True Negatives (TN)</b><br />{tn}",
|
93
|
-
f"<b>False Positives (FP)</b><br />{fp}",
|
94
|
-
],
|
95
|
-
[
|
96
|
-
f"<b>False Negatives (FN)</b><br />{fn}",
|
97
|
-
f"<b>True Positives (TP)</b><br />{tp}",
|
98
|
-
],
|
99
|
-
]
|
100
|
-
|
101
|
-
fig = ff.create_annotated_heatmap(
|
102
|
-
z=cm,
|
103
|
-
colorscale="Blues",
|
104
|
-
x=labels,
|
105
|
-
y=labels,
|
106
|
-
annotation_text=text,
|
107
|
-
)
|
108
|
-
|
109
|
-
fig["data"][0][
|
110
|
-
"hovertemplate"
|
111
|
-
] = "True Label:%{y}<br>Predicted Label:%{x}<br>Count:%{z}<extra></extra>"
|
112
|
-
|
113
|
-
fig.update_layout(
|
114
|
-
xaxis=dict(title="Predicted label"),
|
115
|
-
yaxis=dict(title="True label"),
|
116
|
-
autosize=False,
|
117
|
-
width=600,
|
118
|
-
height=600,
|
119
|
-
)
|
120
|
-
|
121
|
-
# Add an annotation at the bottom of the heatmap
|
122
|
-
fig.add_annotation(
|
123
|
-
x=0.5,
|
124
|
-
y=-0.1,
|
125
|
-
xref="paper",
|
126
|
-
yref="paper",
|
127
|
-
text=f"Confusion Matrix for {self.inputs.model.input_id} on {self.inputs.dataset.input_id}",
|
128
|
-
showarrow=False,
|
129
|
-
font=dict(size=14),
|
130
|
-
)
|
131
|
-
|
132
|
-
return self.cache_results(
|
133
|
-
metric_value={
|
134
|
-
"confusion_matrix": cm,
|
135
|
-
},
|
136
|
-
figures=[
|
137
|
-
Figure(
|
138
|
-
for_object=self,
|
139
|
-
key="confusion_matrix",
|
140
|
-
figure=fig,
|
141
|
-
)
|
142
|
-
],
|
143
|
-
)
|
144
|
-
|
145
|
-
def test(self):
|
146
|
-
"""Unit Test for Confusion Matrix Metric"""
|
147
|
-
assert self.result is not None
|
72
|
+
labels = np.unique(y_true)
|
73
|
+
labels = sorted(labels.tolist())
|
148
74
|
|
149
|
-
|
150
|
-
assert isinstance(self.result.metric.value, dict)
|
151
|
-
assert "confusion_matrix" in self.result.metric.value
|
75
|
+
cm = confusion_matrix(y_true, y_pred, labels=labels)
|
152
76
|
|
153
|
-
|
77
|
+
text = None
|
78
|
+
if len(labels) == 2:
|
79
|
+
tn, fp, fn, tp = cm.ravel()
|
80
|
+
text = [
|
81
|
+
[
|
82
|
+
f"<b>True Negatives (TN)</b><br />{tn}",
|
83
|
+
f"<b>False Positives (FP)</b><br />{fp}",
|
84
|
+
],
|
85
|
+
[
|
86
|
+
f"<b>False Negatives (FN)</b><br />{fn}",
|
87
|
+
f"<b>True Positives (TP)</b><br />{tp}",
|
88
|
+
],
|
89
|
+
]
|
90
|
+
|
91
|
+
fig = ff.create_annotated_heatmap(
|
92
|
+
z=cm,
|
93
|
+
colorscale="Blues",
|
94
|
+
x=labels,
|
95
|
+
y=labels,
|
96
|
+
annotation_text=text,
|
97
|
+
)
|
98
|
+
|
99
|
+
fig["data"][0][
|
100
|
+
"hovertemplate"
|
101
|
+
] = "True Label:%{y}<br>Predicted Label:%{x}<br>Count:%{z}<extra></extra>"
|
102
|
+
|
103
|
+
fig.update_layout(
|
104
|
+
xaxis=dict(title="Predicted label"),
|
105
|
+
yaxis=dict(title="True label"),
|
106
|
+
autosize=False,
|
107
|
+
width=600,
|
108
|
+
height=600,
|
109
|
+
)
|
110
|
+
|
111
|
+
fig.add_annotation(
|
112
|
+
x=0.5,
|
113
|
+
y=-0.1,
|
114
|
+
xref="paper",
|
115
|
+
yref="paper",
|
116
|
+
text=f"Confusion Matrix for {model.input_id} on {dataset.input_id}",
|
117
|
+
showarrow=False,
|
118
|
+
font=dict(size=14),
|
119
|
+
)
|
120
|
+
|
121
|
+
return fig
|
@@ -6,11 +6,12 @@ import pandas as pd
|
|
6
6
|
from sklearn.inspection import permutation_importance
|
7
7
|
|
8
8
|
from validmind import tags, tasks
|
9
|
+
from validmind.vm_models import VMDataset, VMModel
|
9
10
|
|
10
11
|
|
11
12
|
@tags("model_explainability", "sklearn")
|
12
13
|
@tasks("regression", "time_series_forecasting")
|
13
|
-
def FeatureImportance(dataset, model, num_features=3):
|
14
|
+
def FeatureImportance(dataset: VMDataset, model: VMModel, num_features: int = 3):
|
14
15
|
"""
|
15
16
|
Compute feature importance scores for a given model and generate a summary table
|
16
17
|
with the top important features.
|
@@ -53,20 +54,18 @@ def FeatureImportance(dataset, model, num_features=3):
|
|
53
54
|
"""
|
54
55
|
results_list = []
|
55
56
|
|
56
|
-
x = dataset.x_df()
|
57
|
-
y = dataset.y_df()
|
58
|
-
|
59
57
|
pfi_values = permutation_importance(
|
60
|
-
model.model,
|
61
|
-
|
62
|
-
y,
|
58
|
+
estimator=model.model,
|
59
|
+
X=dataset.x_df(),
|
60
|
+
y=dataset.y_df(),
|
63
61
|
random_state=0,
|
64
62
|
n_jobs=-2,
|
65
63
|
)
|
66
64
|
|
67
65
|
# Create a dictionary to store PFI scores
|
68
66
|
pfi = {
|
69
|
-
column: pfi_values["importances_mean"][i]
|
67
|
+
column: pfi_values["importances_mean"][i]
|
68
|
+
for i, column in enumerate(dataset.feature_columns)
|
70
69
|
}
|
71
70
|
|
72
71
|
# Sort features by their importance
|
@@ -2,15 +2,15 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
5
|
from sklearn import metrics
|
8
6
|
|
9
|
-
from
|
7
|
+
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
9
|
|
11
10
|
|
12
|
-
@
|
13
|
-
|
11
|
+
@tags("sklearn", "model_performance")
|
12
|
+
@tasks("clustering")
|
13
|
+
def FowlkesMallowsScore(dataset: VMDataset, model: VMModel):
|
14
14
|
"""
|
15
15
|
Evaluates the similarity between predicted and actual cluster assignments in a model using the Fowlkes-Mallows
|
16
16
|
score.
|
@@ -52,14 +52,11 @@ class FowlkesMallowsScore(ClusterPerformance):
|
|
52
52
|
- It does not handle mismatching numbers of clusters between the true and predicted labels. As such, it may return
|
53
53
|
misleading results if the predicted labels suggest a different number of clusters than what is in the true labels.
|
54
54
|
"""
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
55
|
+
return [
|
56
|
+
{
|
57
|
+
"Fowlkes-Mallows score": metrics.fowlkes_mallows_score(
|
58
|
+
labels_true=dataset.y,
|
59
|
+
labels_pred=dataset.y_pred(model),
|
60
|
+
)
|
61
|
+
}
|
62
62
|
]
|
63
|
-
|
64
|
-
def metric_info(self):
|
65
|
-
return {"Fowlkes-Mallows score": metrics.fowlkes_mallows_score}
|
@@ -2,15 +2,15 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
5
|
from sklearn import metrics
|
8
6
|
|
9
|
-
from
|
7
|
+
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
9
|
|
11
10
|
|
12
|
-
@
|
13
|
-
|
11
|
+
@tags("sklearn", "model_performance")
|
12
|
+
@tasks("clustering")
|
13
|
+
def HomogeneityScore(dataset: VMDataset, model: VMModel):
|
14
14
|
"""
|
15
15
|
Assesses clustering homogeneity by comparing true and predicted labels, scoring from 0 (heterogeneous) to 1
|
16
16
|
(homogeneous).
|
@@ -50,14 +50,11 @@ class HomogeneityScore(ClusterPerformance):
|
|
50
50
|
- The score does not address the actual number of clusters formed, or the evenness of cluster sizes. It only checks
|
51
51
|
the homogeneity within the given clusters created by the model.
|
52
52
|
"""
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
53
|
+
return [
|
54
|
+
{
|
55
|
+
"Homogeneity Score": metrics.homogeneity_score(
|
56
|
+
labels_true=dataset.y,
|
57
|
+
labels_pred=dataset.y_pred(model),
|
58
|
+
)
|
59
|
+
}
|
60
60
|
]
|
61
|
-
|
62
|
-
def metric_info(self):
|
63
|
-
return {"Homogeneity Score": metrics.homogeneity_score}
|
@@ -2,17 +2,23 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from
|
5
|
+
from typing import Union
|
6
6
|
|
7
|
-
import pandas as pd
|
8
7
|
from sklearn.model_selection import GridSearchCV
|
9
8
|
|
9
|
+
from validmind import tags, tasks
|
10
10
|
from validmind.errors import SkipTestError
|
11
|
-
from validmind.vm_models import
|
11
|
+
from validmind.vm_models import VMDataset, VMModel
|
12
12
|
|
13
13
|
|
14
|
-
@
|
15
|
-
|
14
|
+
@tags("sklearn", "model_performance")
|
15
|
+
@tasks("classification", "clustering")
|
16
|
+
def HyperParametersTuning(
|
17
|
+
model: VMModel,
|
18
|
+
dataset: VMDataset,
|
19
|
+
param_grid: Union[dict, None] = None,
|
20
|
+
scoring: Union[str, None] = None,
|
21
|
+
):
|
16
22
|
"""
|
17
23
|
Exerts exhaustive grid search to identify optimal hyperparameters for the model, improving performance.
|
18
24
|
|
@@ -54,51 +60,15 @@ class HyperParametersTuning(Metric):
|
|
54
60
|
- There's a potential risk of overfitting the model if the training set is not representative of the data that the
|
55
61
|
model will be applied to.
|
56
62
|
"""
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
)
|
70
|
-
|
71
|
-
model = self.inputs.model.model
|
72
|
-
estimators = GridSearchCV(
|
73
|
-
model, param_grid=param_grid, scoring=self.params["scoring"]
|
74
|
-
)
|
75
|
-
estimators.fit(self.inputs.dataset.x, self.inputs.dataset.y)
|
76
|
-
|
77
|
-
results = [
|
78
|
-
{
|
79
|
-
"Best Model": f"{estimators.best_estimator_}",
|
80
|
-
"Best Parameters": estimators.best_params_,
|
81
|
-
}
|
82
|
-
]
|
83
|
-
return self.cache_results(
|
84
|
-
{
|
85
|
-
"parameters_tuning": pd.DataFrame(results).to_dict(orient="records"),
|
86
|
-
}
|
87
|
-
)
|
88
|
-
|
89
|
-
def summary(self, metric_value):
|
90
|
-
"""
|
91
|
-
Build one table for summarizing the hyper parameters tunning
|
92
|
-
"""
|
93
|
-
summary_regression = metric_value["parameters_tuning"]
|
94
|
-
|
95
|
-
return ResultSummary(
|
96
|
-
results=[
|
97
|
-
ResultTable(
|
98
|
-
data=summary_regression,
|
99
|
-
metadata=ResultTableMetadata(
|
100
|
-
title="Hyper Parameters Tuning Results"
|
101
|
-
),
|
102
|
-
),
|
103
|
-
]
|
104
|
-
)
|
63
|
+
if not param_grid:
|
64
|
+
raise SkipTestError("'param_grid' dictionary must be provided to run this test")
|
65
|
+
|
66
|
+
estimators = GridSearchCV(model.model, param_grid=param_grid, scoring=scoring)
|
67
|
+
estimators.fit(dataset.x, dataset.y)
|
68
|
+
|
69
|
+
return [
|
70
|
+
{
|
71
|
+
"Best Model": estimators.best_estimator_,
|
72
|
+
"Best Parameters": estimators.best_params_,
|
73
|
+
}
|
74
|
+
]
|
@@ -2,20 +2,25 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from
|
5
|
+
from typing import List, Union
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import plotly.graph_objects as go
|
9
9
|
from plotly.subplots import make_subplots
|
10
10
|
from scipy.spatial.distance import cdist
|
11
|
+
from sklearn import clone
|
11
12
|
from sklearn.metrics import silhouette_score
|
12
13
|
|
14
|
+
from validmind import tags, tasks
|
13
15
|
from validmind.errors import SkipTestError
|
14
|
-
from validmind.vm_models import
|
16
|
+
from validmind.vm_models import VMDataset, VMModel
|
15
17
|
|
16
18
|
|
17
|
-
@
|
18
|
-
|
19
|
+
@tags("sklearn", "model_performance", "kmeans")
|
20
|
+
@tasks("clustering")
|
21
|
+
def KMeansClustersOptimization(
|
22
|
+
model: VMModel, dataset: VMDataset, n_clusters: Union[List[int], None] = None
|
23
|
+
):
|
19
24
|
"""
|
20
25
|
Optimizes the number of clusters in K-means models using Elbow and Silhouette methods.
|
21
26
|
|
@@ -62,80 +67,61 @@ class KMeansClustersOptimization(Metric):
|
|
62
67
|
- Assumes spherical clusters (due to using the Euclidean distance in the Elbow method), which might not align with
|
63
68
|
the actual structure of the data.
|
64
69
|
"""
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
tasks = ["clustering"]
|
69
|
-
tags = ["sklearn", "model_performance", "kmeans"]
|
70
|
-
|
71
|
-
default_params = {"n_clusters": None}
|
72
|
-
|
73
|
-
def run(self):
|
74
|
-
n_clusters = self.params["n_clusters"]
|
75
|
-
if n_clusters is None:
|
76
|
-
raise SkipTestError("n_clusters parameter must be provide in list format")
|
77
|
-
model = self.inputs.model.model
|
78
|
-
|
79
|
-
distortions = {}
|
80
|
-
silhouette_avg = {}
|
81
|
-
|
82
|
-
for k in n_clusters:
|
83
|
-
# Building and fitting the model
|
84
|
-
kmeanModel = model.set_params(n_clusters=k)
|
85
|
-
kmeanModel = kmeanModel.fit(self.inputs.dataset.x)
|
86
|
-
# Calculate silhouette coefficients for each data point
|
87
|
-
silhouette_avg[k] = silhouette_score(
|
88
|
-
self.inputs.dataset.x,
|
89
|
-
kmeanModel.predict(self.inputs.dataset.x),
|
90
|
-
)
|
91
|
-
|
92
|
-
distortions[k] = (
|
93
|
-
sum(
|
94
|
-
np.min(
|
95
|
-
cdist(
|
96
|
-
self.inputs.dataset.x,
|
97
|
-
kmeanModel.cluster_centers_,
|
98
|
-
"euclidean",
|
99
|
-
),
|
100
|
-
axis=1,
|
101
|
-
)
|
102
|
-
)
|
103
|
-
/ self.inputs.dataset.x.shape[0]
|
104
|
-
)
|
105
|
-
fig = make_subplots(
|
106
|
-
rows=1,
|
107
|
-
cols=2,
|
108
|
-
subplot_titles=(
|
109
|
-
"The Silhouette value of each cluster",
|
110
|
-
"The Elbow Method using Distortion",
|
111
|
-
),
|
70
|
+
if not n_clusters:
|
71
|
+
raise SkipTestError(
|
72
|
+
"Cluster range must be provided via the 'n_clusters' parameter"
|
112
73
|
)
|
113
74
|
|
114
|
-
|
115
|
-
|
116
|
-
row=1,
|
117
|
-
col=1,
|
118
|
-
)
|
119
|
-
fig.update_xaxes(title_text="Number of clusters", row=1, col=1)
|
120
|
-
fig.update_yaxes(title_text="Avg Silhouette Score", row=1, col=1)
|
75
|
+
distortions = {}
|
76
|
+
silhouette_avg = {}
|
121
77
|
|
122
|
-
|
123
|
-
|
124
|
-
row=1,
|
125
|
-
col=2,
|
126
|
-
)
|
127
|
-
# Update xaxis properties
|
128
|
-
fig.update_xaxes(title_text="Number of clusters", showgrid=False, row=1, col=2)
|
129
|
-
fig.update_yaxes(title_text="Distortion", showgrid=False, row=1, col=2)
|
78
|
+
for k in n_clusters:
|
79
|
+
kmeanModel = clone(model.model).set_params(n_clusters=k).fit(dataset.x)
|
130
80
|
|
131
|
-
|
81
|
+
silhouette_avg[k] = silhouette_score(
|
82
|
+
dataset.x,
|
83
|
+
kmeanModel.predict(dataset.x),
|
84
|
+
)
|
132
85
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
86
|
+
distortions[k] = (
|
87
|
+
sum(
|
88
|
+
np.min(
|
89
|
+
cdist(
|
90
|
+
dataset.x,
|
91
|
+
kmeanModel.cluster_centers_,
|
92
|
+
"euclidean",
|
93
|
+
),
|
94
|
+
axis=1,
|
95
|
+
)
|
138
96
|
)
|
139
|
-
|
97
|
+
/ dataset.x.shape[0]
|
98
|
+
)
|
140
99
|
|
141
|
-
|
100
|
+
fig = make_subplots(
|
101
|
+
rows=1,
|
102
|
+
cols=2,
|
103
|
+
subplot_titles=(
|
104
|
+
"The Silhouette value of each cluster",
|
105
|
+
"The Elbow Method using Distortion",
|
106
|
+
),
|
107
|
+
)
|
108
|
+
|
109
|
+
fig.add_trace(
|
110
|
+
go.Scatter(x=list(silhouette_avg.keys()), y=list(silhouette_avg.values())),
|
111
|
+
row=1,
|
112
|
+
col=1,
|
113
|
+
)
|
114
|
+
fig.update_xaxes(title_text="Number of clusters", row=1, col=1)
|
115
|
+
fig.update_yaxes(title_text="Avg Silhouette Score", row=1, col=1)
|
116
|
+
|
117
|
+
fig.add_trace(
|
118
|
+
go.Scatter(x=list(distortions.keys()), y=list(distortions.values())),
|
119
|
+
row=1,
|
120
|
+
col=2,
|
121
|
+
)
|
122
|
+
fig.update_xaxes(title_text="Number of clusters", showgrid=False, row=1, col=2)
|
123
|
+
fig.update_yaxes(title_text="Distortion", showgrid=False, row=1, col=2)
|
124
|
+
|
125
|
+
fig.update_layout(showlegend=False)
|
126
|
+
|
127
|
+
return fig
|
@@ -1,24 +1,17 @@
|
|
1
1
|
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
from sklearn.metrics import accuracy_score
|
4
5
|
|
5
|
-
from
|
6
|
-
from
|
6
|
+
from validmind.tests import tags, tasks
|
7
|
+
from validmind.vm_models import VMDataset, VMModel
|
7
8
|
|
8
|
-
import pandas as pd
|
9
|
-
from sklearn import metrics
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
ResultTable,
|
14
|
-
ResultTableMetadata,
|
15
|
-
ThresholdTest,
|
16
|
-
ThresholdTestResult,
|
10
|
+
@tags(
|
11
|
+
"sklearn", "binary_classification", "multiclass_classification", "model_performance"
|
17
12
|
)
|
18
|
-
|
19
|
-
|
20
|
-
@dataclass
|
21
|
-
class MinimumAccuracy(ThresholdTest):
|
13
|
+
@tasks("classification", "text_classification")
|
14
|
+
def MinimumAccuracy(dataset: VMDataset, model: VMModel, min_threshold: float = 0.7):
|
22
15
|
"""
|
23
16
|
Checks if the model's prediction accuracy meets or surpasses a specified threshold.
|
24
17
|
|
@@ -55,73 +48,12 @@ class MinimumAccuracy(ThresholdTest):
|
|
55
48
|
- Inability to measure the model's precision, recall, or capacity to manage false positives or false negatives.
|
56
49
|
- Focused on overall correctness and may not be sufficient for all types of model analytics.
|
57
50
|
"""
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
"model_performance",
|
68
|
-
]
|
69
|
-
|
70
|
-
def summary(self, results: List[ThresholdTestResult], all_passed: bool):
|
71
|
-
"""
|
72
|
-
The accuracy score test returns results like these:
|
73
|
-
[{"values": {"score": 0.734375, "threshold": 0.7}, "passed": true}]
|
74
|
-
"""
|
75
|
-
result = results[0]
|
76
|
-
results_table = [
|
77
|
-
{
|
78
|
-
"Score": result.values["score"],
|
79
|
-
"Threshold": result.values["threshold"],
|
80
|
-
"Pass/Fail": "Pass" if result.passed else "Fail",
|
81
|
-
}
|
82
|
-
]
|
83
|
-
|
84
|
-
return ResultSummary(
|
85
|
-
results=[
|
86
|
-
ResultTable(
|
87
|
-
data=pd.DataFrame(results_table),
|
88
|
-
metadata=ResultTableMetadata(
|
89
|
-
title="Minimum Accuracy Test on Test Data"
|
90
|
-
),
|
91
|
-
)
|
92
|
-
]
|
93
|
-
)
|
94
|
-
|
95
|
-
def run(self):
|
96
|
-
y_true = self.inputs.dataset.y
|
97
|
-
class_pred = self.inputs.dataset.y_pred(self.inputs.model)
|
98
|
-
y_true = y_true.astype(class_pred.dtype)
|
99
|
-
|
100
|
-
accuracy_score = metrics.accuracy_score(y_true, class_pred)
|
101
|
-
|
102
|
-
passed = accuracy_score > self.params["min_threshold"]
|
103
|
-
results = [
|
104
|
-
ThresholdTestResult(
|
105
|
-
passed=passed,
|
106
|
-
values={
|
107
|
-
"score": accuracy_score,
|
108
|
-
"threshold": self.params["min_threshold"],
|
109
|
-
},
|
110
|
-
)
|
111
|
-
]
|
112
|
-
|
113
|
-
return self.cache_results(results, passed=all([r.passed for r in results]))
|
114
|
-
|
115
|
-
def test(self):
|
116
|
-
# Test that there is a result and it's not None
|
117
|
-
assert self.result is not None
|
118
|
-
# Test that results are contained in a list
|
119
|
-
assert isinstance(self.result.test_results.results, list)
|
120
|
-
# Verify that there is exactly one result
|
121
|
-
assert len(self.result.test_results.results) == 1
|
122
|
-
# Extract the single result for clarity
|
123
|
-
test_result = self.result.test_results.results[0]
|
124
|
-
# Check the 'passed' condition logic against the test outcome
|
125
|
-
assert test_result.passed == (
|
126
|
-
test_result.values["score"] >= test_result.values["threshold"]
|
127
|
-
)
|
51
|
+
accuracy = accuracy_score(dataset.y, dataset.y_pred(model))
|
52
|
+
|
53
|
+
return [
|
54
|
+
{
|
55
|
+
"Score": accuracy,
|
56
|
+
"Threshold": min_threshold,
|
57
|
+
"Pass/Fail": "Pass" if accuracy > min_threshold else "Fail",
|
58
|
+
}
|
59
|
+
], accuracy > min_threshold
|