validmind 2.5.25__py3-none-any.whl → 2.6.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -17
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +66 -85
- validmind/ai/test_result_description/context.py +2 -2
- validmind/ai/utils.py +26 -1
- validmind/api_client.py +43 -79
- validmind/client.py +5 -7
- validmind/client_config.py +1 -1
- validmind/datasets/__init__.py +1 -1
- validmind/datasets/classification/customer_churn.py +7 -5
- validmind/datasets/nlp/__init__.py +2 -2
- validmind/errors.py +6 -10
- validmind/html_templates/content_blocks.py +18 -16
- validmind/logging.py +21 -16
- validmind/tests/__init__.py +28 -5
- validmind/tests/__types__.py +186 -170
- validmind/tests/_store.py +7 -21
- validmind/tests/comparison.py +362 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
- validmind/tests/data_validation/ADF.py +49 -83
- validmind/tests/data_validation/AutoAR.py +59 -96
- validmind/tests/data_validation/AutoMA.py +59 -96
- validmind/tests/data_validation/AutoStationarity.py +66 -114
- validmind/tests/data_validation/ClassImbalance.py +48 -117
- validmind/tests/data_validation/DatasetDescription.py +180 -209
- validmind/tests/data_validation/DatasetSplit.py +50 -75
- validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
- validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
- validmind/tests/data_validation/Duplicates.py +21 -90
- validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
- validmind/tests/data_validation/HighCardinality.py +32 -80
- validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
- validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
- validmind/tests/data_validation/IQROutliersTable.py +40 -80
- validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
- validmind/tests/data_validation/KPSS.py +33 -81
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
- validmind/tests/data_validation/MissingValues.py +17 -58
- validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
- validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
- validmind/tests/data_validation/RollingStatsPlot.py +50 -81
- validmind/tests/data_validation/SeasonalDecompose.py +102 -184
- validmind/tests/data_validation/Skewness.py +27 -64
- validmind/tests/data_validation/SpreadPlot.py +34 -57
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
- validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
- validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
- validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
- validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
- validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
- validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
- validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
- validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
- validmind/tests/data_validation/TooManyZeroValues.py +21 -70
- validmind/tests/data_validation/UniqueRows.py +23 -62
- validmind/tests/data_validation/WOEBinPlots.py +83 -109
- validmind/tests/data_validation/WOEBinTable.py +28 -69
- validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
- validmind/tests/data_validation/nlp/CommonWords.py +49 -57
- validmind/tests/data_validation/nlp/Hashtags.py +27 -49
- validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
- validmind/tests/data_validation/nlp/Mentions.py +32 -63
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
- validmind/tests/data_validation/nlp/Punctuations.py +63 -47
- validmind/tests/data_validation/nlp/Sentiment.py +4 -0
- validmind/tests/data_validation/nlp/StopWords.py +62 -91
- validmind/tests/data_validation/nlp/TextDescription.py +116 -159
- validmind/tests/data_validation/nlp/Toxicity.py +12 -4
- validmind/tests/decorator.py +33 -242
- validmind/tests/load.py +212 -153
- validmind/tests/model_validation/BertScore.py +13 -7
- validmind/tests/model_validation/BleuScore.py +4 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/FeaturesAUC.py +43 -74
- validmind/tests/model_validation/MeteorScore.py +3 -0
- validmind/tests/model_validation/RegardScore.py +5 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
- validmind/tests/model_validation/embeddings/utils.py +53 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
- validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
- validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
- validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
- validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
- validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
- validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
- validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
- validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
- validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
- validmind/tests/output.py +120 -0
- validmind/tests/prompt_validation/Bias.py +55 -98
- validmind/tests/prompt_validation/Clarity.py +56 -99
- validmind/tests/prompt_validation/Conciseness.py +63 -101
- validmind/tests/prompt_validation/Delimitation.py +48 -89
- validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
- validmind/tests/prompt_validation/Robustness.py +80 -121
- validmind/tests/prompt_validation/Specificity.py +61 -95
- validmind/tests/prompt_validation/ai_powered_test.py +2 -2
- validmind/tests/run.py +314 -496
- validmind/tests/test_providers.py +109 -79
- validmind/tests/utils.py +91 -0
- validmind/unit_metrics/__init__.py +16 -155
- validmind/unit_metrics/classification/F1.py +1 -0
- validmind/unit_metrics/classification/Precision.py +1 -0
- validmind/unit_metrics/classification/ROC_AUC.py +1 -0
- validmind/unit_metrics/classification/Recall.py +1 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
- validmind/unit_metrics/regression/HuberLoss.py +1 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
- validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
- validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
- validmind/unit_metrics/regression/QuantileLoss.py +1 -0
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
- validmind/utils.py +66 -17
- validmind/vm_models/__init__.py +2 -17
- validmind/vm_models/dataset/dataset.py +31 -4
- validmind/vm_models/figure.py +7 -37
- validmind/vm_models/model.py +3 -0
- validmind/vm_models/result/__init__.py +7 -0
- validmind/vm_models/result/result.jinja +21 -0
- validmind/vm_models/result/result.py +337 -0
- validmind/vm_models/result/utils.py +160 -0
- validmind/vm_models/test_suite/runner.py +16 -54
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +43 -77
- validmind/vm_models/test_suite/test_suite.py +8 -40
- validmind-2.6.7.dist-info/METADATA +137 -0
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
- validmind/tests/data_validation/AutoSeasonality.py +0 -190
- validmind/tests/metadata.py +0 -59
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
- validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
- validmind/unit_metrics/composite.py +0 -238
- validmind/vm_models/test/metric.py +0 -98
- validmind/vm_models/test/metric_result.py +0 -61
- validmind/vm_models/test/output_template.py +0 -55
- validmind/vm_models/test/result_summary.py +0 -76
- validmind/vm_models/test/result_wrapper.py +0 -488
- validmind/vm_models/test/test.py +0 -103
- validmind/vm_models/test/threshold_test.py +0 -106
- validmind/vm_models/test/threshold_test_result.py +0 -75
- validmind/vm_models/test_context.py +0 -259
- validmind-2.5.25.dist-info/METADATA +0 -118
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -2,33 +2,26 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
|
-
from functools import partial
|
7
5
|
from typing import List
|
8
6
|
|
9
|
-
import pandas as pd
|
10
7
|
from numpy import unique
|
11
|
-
from sklearn import
|
12
|
-
|
13
|
-
from validmind.vm_models import (
|
14
|
-
ResultSummary,
|
15
|
-
ResultTable,
|
16
|
-
ResultTableMetadata,
|
17
|
-
ThresholdTest,
|
18
|
-
ThresholdTestResult,
|
19
|
-
)
|
20
|
-
|
8
|
+
from sklearn.metrics import classification_report
|
21
9
|
|
22
|
-
|
23
|
-
|
24
|
-
lb.fit(y_test)
|
25
|
-
y_test = lb.transform(y_test)
|
26
|
-
y_pred = lb.transform(y_pred)
|
27
|
-
return metrics.roc_auc_score(y_test, y_pred, average=average)
|
10
|
+
from validmind.tests import tags, tasks
|
11
|
+
from validmind.vm_models import VMDataset, VMModel
|
28
12
|
|
29
13
|
|
30
|
-
@
|
31
|
-
|
14
|
+
@tags(
|
15
|
+
"sklearn",
|
16
|
+
"binary_classification",
|
17
|
+
"multiclass_classification",
|
18
|
+
"model_performance",
|
19
|
+
"visualization",
|
20
|
+
)
|
21
|
+
@tasks("classification", "text_classification")
|
22
|
+
def TrainingTestDegradation(
|
23
|
+
datasets: List[VMDataset], model: VMModel, max_threshold: float = 0.10
|
24
|
+
):
|
32
25
|
"""
|
33
26
|
Tests if model performance degradation between training and test datasets exceeds a predefined threshold.
|
34
27
|
|
@@ -70,106 +63,39 @@ class TrainingTestDegradation(ThresholdTest):
|
|
70
63
|
not always be available.
|
71
64
|
- The test is currently only designed for classification tasks.
|
72
65
|
"""
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
"Test Score": result.values["test_score"],
|
110
|
-
"Degradation (%)": result.values["degradation"] * 100,
|
111
|
-
"Pass/Fail": "Pass" if result.passed else "Fail",
|
112
|
-
}
|
113
|
-
for result in results
|
114
|
-
]
|
115
|
-
|
116
|
-
return ResultSummary(
|
117
|
-
results=[
|
118
|
-
ResultTable(
|
119
|
-
data=pd.DataFrame(results_table),
|
120
|
-
metadata=ResultTableMetadata(
|
121
|
-
title="Training-Test Degradation Test"
|
122
|
-
),
|
123
|
-
)
|
124
|
-
]
|
125
|
-
)
|
126
|
-
|
127
|
-
def run(self):
|
128
|
-
y_train_true = self.inputs.datasets[0].y
|
129
|
-
y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model)
|
130
|
-
y_train_true = y_train_true.astype(y_train_pred.dtype)
|
131
|
-
|
132
|
-
y_test_true = self.inputs.datasets[1].y
|
133
|
-
y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model)
|
134
|
-
y_test_true = y_test_true.astype(y_test_pred.dtype)
|
135
|
-
|
136
|
-
report_train = metrics.classification_report(
|
137
|
-
y_train_true, y_train_pred, output_dict=True, zero_division=0
|
138
|
-
)
|
139
|
-
report_train["roc_auc"] = multiclass_roc_auc_score(y_train_true, y_train_pred)
|
140
|
-
|
141
|
-
report_test = metrics.classification_report(
|
142
|
-
y_test_true, y_test_pred, output_dict=True, zero_division=0
|
143
|
-
)
|
144
|
-
report_test["roc_auc"] = multiclass_roc_auc_score(y_test_true, y_test_pred)
|
145
|
-
|
146
|
-
classes = {str(i) for i in unique(y_train_true)}
|
147
|
-
|
148
|
-
test_results = []
|
149
|
-
for class_name in classes:
|
150
|
-
for metric_name in ["precision", "recall", "f1-score"]:
|
151
|
-
train_score = report_train[class_name][metric_name]
|
152
|
-
test_score = report_test[class_name][metric_name]
|
153
|
-
|
154
|
-
# If training score is 0, degradation is assumed to be 100%
|
155
|
-
if train_score == 0:
|
156
|
-
degradation = 1.0
|
157
|
-
else:
|
158
|
-
degradation = (train_score - test_score) / train_score
|
159
|
-
|
160
|
-
passed = degradation < self.params["max_threshold"]
|
161
|
-
test_results.append(
|
162
|
-
ThresholdTestResult(
|
163
|
-
test_name=metric_name,
|
164
|
-
passed=passed,
|
165
|
-
values={
|
166
|
-
"class": class_name,
|
167
|
-
"test_score": test_score,
|
168
|
-
"train_score": train_score,
|
169
|
-
"degradation": degradation,
|
170
|
-
},
|
171
|
-
)
|
172
|
-
)
|
173
|
-
return self.cache_results(
|
174
|
-
test_results, passed=all(r.passed for r in test_results)
|
175
|
-
)
|
66
|
+
ds1_report = classification_report(
|
67
|
+
y_true=datasets[0].y,
|
68
|
+
y_pred=datasets[0].y_pred(model),
|
69
|
+
output_dict=True,
|
70
|
+
zero_division=0,
|
71
|
+
)
|
72
|
+
ds2_report = classification_report(
|
73
|
+
y_true=datasets[1].y,
|
74
|
+
y_pred=datasets[1].y_pred(model),
|
75
|
+
output_dict=True,
|
76
|
+
zero_division=0,
|
77
|
+
)
|
78
|
+
|
79
|
+
table = []
|
80
|
+
|
81
|
+
for class_name in {str(i) for i in unique(datasets[0].y)}:
|
82
|
+
for metric_name in ["precision", "recall", "f1-score"]:
|
83
|
+
ds1_score = ds1_report[class_name][metric_name]
|
84
|
+
ds2_score = ds2_report[class_name][metric_name]
|
85
|
+
|
86
|
+
# If training score is 0, degradation is assumed to be 100%
|
87
|
+
degradation = 1.0 if ds1_score == 0 else (ds1_score - ds2_score) / ds1_score
|
88
|
+
passed = degradation < max_threshold
|
89
|
+
|
90
|
+
table.append(
|
91
|
+
{
|
92
|
+
"Class": class_name,
|
93
|
+
"Metric": metric_name.title(),
|
94
|
+
f"{datasets[0].input_id} Score": ds1_score,
|
95
|
+
f"{datasets[1].input_id} Score": ds2_score,
|
96
|
+
"Degradation (%)": degradation * 100,
|
97
|
+
"Pass/Fail": "Pass" if passed else "Fail",
|
98
|
+
}
|
99
|
+
)
|
100
|
+
|
101
|
+
return table, all(row["Pass/Fail"] == "Pass" for row in table)
|
@@ -2,15 +2,15 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from dataclasses import dataclass
|
6
|
-
|
7
5
|
from sklearn import metrics
|
8
6
|
|
9
|
-
from
|
7
|
+
from validmind import tags, tasks
|
8
|
+
from validmind.vm_models import VMDataset, VMModel
|
10
9
|
|
11
10
|
|
12
|
-
@
|
13
|
-
|
11
|
+
@tags("sklearn", "model_performance")
|
12
|
+
@tasks("clustering")
|
13
|
+
def VMeasure(dataset: VMDataset, model: VMModel):
|
14
14
|
"""
|
15
15
|
Evaluates homogeneity and completeness of a clustering model using the V Measure Score.
|
16
16
|
|
@@ -48,14 +48,11 @@ class VMeasure(ClusterPerformance):
|
|
48
48
|
the other. The V Measure Score does not provide flexibility in assigning different weights to homogeneity and
|
49
49
|
completeness.
|
50
50
|
"""
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
51
|
+
return [
|
52
|
+
{
|
53
|
+
"V Measure": metrics.v_measure_score(
|
54
|
+
labels_true=dataset.y,
|
55
|
+
labels_pred=dataset.y_pred(model),
|
56
|
+
)
|
57
|
+
}
|
58
58
|
]
|
59
|
-
|
60
|
-
def metric_info(self):
|
61
|
-
return {"V Measure": metrics.v_measure_score}
|