validmind 2.5.25__py3-none-any.whl → 2.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. validmind/__init__.py +8 -17
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +66 -85
  4. validmind/ai/test_result_description/context.py +2 -2
  5. validmind/ai/utils.py +26 -1
  6. validmind/api_client.py +43 -79
  7. validmind/client.py +5 -7
  8. validmind/client_config.py +1 -1
  9. validmind/datasets/__init__.py +1 -1
  10. validmind/datasets/classification/customer_churn.py +7 -5
  11. validmind/datasets/nlp/__init__.py +2 -2
  12. validmind/errors.py +6 -10
  13. validmind/html_templates/content_blocks.py +18 -16
  14. validmind/logging.py +21 -16
  15. validmind/tests/__init__.py +28 -5
  16. validmind/tests/__types__.py +186 -170
  17. validmind/tests/_store.py +7 -21
  18. validmind/tests/comparison.py +362 -0
  19. validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
  20. validmind/tests/data_validation/ADF.py +49 -83
  21. validmind/tests/data_validation/AutoAR.py +59 -96
  22. validmind/tests/data_validation/AutoMA.py +59 -96
  23. validmind/tests/data_validation/AutoStationarity.py +66 -114
  24. validmind/tests/data_validation/ClassImbalance.py +48 -117
  25. validmind/tests/data_validation/DatasetDescription.py +180 -209
  26. validmind/tests/data_validation/DatasetSplit.py +50 -75
  27. validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
  28. validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
  29. validmind/tests/data_validation/Duplicates.py +21 -90
  30. validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
  31. validmind/tests/data_validation/HighCardinality.py +32 -80
  32. validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
  33. validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
  34. validmind/tests/data_validation/IQROutliersTable.py +40 -80
  35. validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
  36. validmind/tests/data_validation/KPSS.py +33 -81
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
  38. validmind/tests/data_validation/MissingValues.py +17 -58
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
  40. validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
  41. validmind/tests/data_validation/RollingStatsPlot.py +50 -81
  42. validmind/tests/data_validation/SeasonalDecompose.py +102 -184
  43. validmind/tests/data_validation/Skewness.py +27 -64
  44. validmind/tests/data_validation/SpreadPlot.py +34 -57
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
  48. validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
  49. validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
  50. validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
  51. validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
  52. validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
  53. validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
  54. validmind/tests/data_validation/TooManyZeroValues.py +21 -70
  55. validmind/tests/data_validation/UniqueRows.py +23 -62
  56. validmind/tests/data_validation/WOEBinPlots.py +83 -109
  57. validmind/tests/data_validation/WOEBinTable.py +28 -69
  58. validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
  59. validmind/tests/data_validation/nlp/CommonWords.py +49 -57
  60. validmind/tests/data_validation/nlp/Hashtags.py +27 -49
  61. validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
  62. validmind/tests/data_validation/nlp/Mentions.py +32 -63
  63. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
  64. validmind/tests/data_validation/nlp/Punctuations.py +63 -47
  65. validmind/tests/data_validation/nlp/Sentiment.py +4 -0
  66. validmind/tests/data_validation/nlp/StopWords.py +62 -91
  67. validmind/tests/data_validation/nlp/TextDescription.py +116 -159
  68. validmind/tests/data_validation/nlp/Toxicity.py +12 -4
  69. validmind/tests/decorator.py +33 -242
  70. validmind/tests/load.py +212 -153
  71. validmind/tests/model_validation/BertScore.py +13 -7
  72. validmind/tests/model_validation/BleuScore.py +4 -0
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
  74. validmind/tests/model_validation/ContextualRecall.py +3 -0
  75. validmind/tests/model_validation/FeaturesAUC.py +43 -74
  76. validmind/tests/model_validation/MeteorScore.py +3 -0
  77. validmind/tests/model_validation/RegardScore.py +5 -1
  78. validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
  79. validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
  80. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
  81. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
  82. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
  83. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
  84. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
  85. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
  86. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
  87. validmind/tests/model_validation/embeddings/utils.py +53 -0
  88. validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
  89. validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
  90. validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
  91. validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
  92. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
  93. validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
  94. validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
  95. validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
  96. validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
  97. validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
  98. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
  99. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
  100. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
  101. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
  102. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
  103. validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
  104. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
  105. validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
  106. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
  107. validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
  108. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
  109. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
  110. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
  111. validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
  112. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
  113. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
  114. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
  115. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
  116. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
  117. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
  118. validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
  119. validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
  120. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
  121. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
  122. validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
  123. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
  124. validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
  125. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
  126. validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
  127. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
  128. validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
  129. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
  130. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
  132. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
  133. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
  134. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
  135. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
  136. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
  137. validmind/tests/output.py +120 -0
  138. validmind/tests/prompt_validation/Bias.py +55 -98
  139. validmind/tests/prompt_validation/Clarity.py +56 -99
  140. validmind/tests/prompt_validation/Conciseness.py +63 -101
  141. validmind/tests/prompt_validation/Delimitation.py +48 -89
  142. validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
  143. validmind/tests/prompt_validation/Robustness.py +80 -121
  144. validmind/tests/prompt_validation/Specificity.py +61 -95
  145. validmind/tests/prompt_validation/ai_powered_test.py +2 -2
  146. validmind/tests/run.py +314 -496
  147. validmind/tests/test_providers.py +109 -79
  148. validmind/tests/utils.py +91 -0
  149. validmind/unit_metrics/__init__.py +16 -155
  150. validmind/unit_metrics/classification/F1.py +1 -0
  151. validmind/unit_metrics/classification/Precision.py +1 -0
  152. validmind/unit_metrics/classification/ROC_AUC.py +1 -0
  153. validmind/unit_metrics/classification/Recall.py +1 -0
  154. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
  155. validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
  156. validmind/unit_metrics/regression/HuberLoss.py +1 -0
  157. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
  158. validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
  159. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
  160. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
  161. validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
  162. validmind/unit_metrics/regression/QuantileLoss.py +1 -0
  163. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  164. validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
  165. validmind/utils.py +66 -17
  166. validmind/vm_models/__init__.py +2 -17
  167. validmind/vm_models/dataset/dataset.py +31 -4
  168. validmind/vm_models/figure.py +7 -37
  169. validmind/vm_models/model.py +3 -0
  170. validmind/vm_models/result/__init__.py +7 -0
  171. validmind/vm_models/result/result.jinja +21 -0
  172. validmind/vm_models/result/result.py +337 -0
  173. validmind/vm_models/result/utils.py +160 -0
  174. validmind/vm_models/test_suite/runner.py +16 -54
  175. validmind/vm_models/test_suite/summary.py +3 -3
  176. validmind/vm_models/test_suite/test.py +43 -77
  177. validmind/vm_models/test_suite/test_suite.py +8 -40
  178. validmind-2.6.8.dist-info/METADATA +137 -0
  179. {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/RECORD +182 -189
  180. validmind/tests/data_validation/AutoSeasonality.py +0 -190
  181. validmind/tests/metadata.py +0 -59
  182. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
  183. validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
  184. validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
  185. validmind/unit_metrics/composite.py +0 -238
  186. validmind/vm_models/test/metric.py +0 -98
  187. validmind/vm_models/test/metric_result.py +0 -61
  188. validmind/vm_models/test/output_template.py +0 -55
  189. validmind/vm_models/test/result_summary.py +0 -76
  190. validmind/vm_models/test/result_wrapper.py +0 -488
  191. validmind/vm_models/test/test.py +0 -103
  192. validmind/vm_models/test/threshold_test.py +0 -106
  193. validmind/vm_models/test/threshold_test_result.py +0 -75
  194. validmind/vm_models/test_context.py +0 -259
  195. validmind-2.5.25.dist-info/METADATA +0 -118
  196. {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/LICENSE +0 -0
  197. {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/WHEEL +0 -0
  198. {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/entry_points.txt +0 -0
@@ -2,33 +2,26 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
- from functools import partial
7
5
  from typing import List
8
6
 
9
- import pandas as pd
10
7
  from numpy import unique
11
- from sklearn import metrics, preprocessing
12
-
13
- from validmind.vm_models import (
14
- ResultSummary,
15
- ResultTable,
16
- ResultTableMetadata,
17
- ThresholdTest,
18
- ThresholdTestResult,
19
- )
20
-
8
+ from sklearn.metrics import classification_report
21
9
 
22
- def multiclass_roc_auc_score(y_test, y_pred, average="macro"):
23
- lb = preprocessing.LabelBinarizer()
24
- lb.fit(y_test)
25
- y_test = lb.transform(y_test)
26
- y_pred = lb.transform(y_pred)
27
- return metrics.roc_auc_score(y_test, y_pred, average=average)
10
+ from validmind.tests import tags, tasks
11
+ from validmind.vm_models import VMDataset, VMModel
28
12
 
29
13
 
30
- @dataclass
31
- class TrainingTestDegradation(ThresholdTest):
14
+ @tags(
15
+ "sklearn",
16
+ "binary_classification",
17
+ "multiclass_classification",
18
+ "model_performance",
19
+ "visualization",
20
+ )
21
+ @tasks("classification", "text_classification")
22
+ def TrainingTestDegradation(
23
+ datasets: List[VMDataset], model: VMModel, max_threshold: float = 0.10
24
+ ):
32
25
  """
33
26
  Tests if model performance degradation between training and test datasets exceeds a predefined threshold.
34
27
 
@@ -70,106 +63,39 @@ class TrainingTestDegradation(ThresholdTest):
70
63
  not always be available.
71
64
  - The test is currently only designed for classification tasks.
72
65
  """
73
-
74
- name = "training_test_degradation"
75
- required_inputs = ["model", "datasets"]
76
-
77
- default_params = {
78
- "metrics": ["accuracy", "precision", "recall", "f1"],
79
- "max_threshold": 0.10, # Maximum 10% degradation
80
- }
81
-
82
- tasks = ["classification", "text_classification"]
83
- tags = [
84
- "sklearn",
85
- "binary_classification",
86
- "multiclass_classification",
87
- "model_performance",
88
- "visualization",
89
- ]
90
-
91
- default_metrics = {
92
- "accuracy": metrics.accuracy_score,
93
- "precision": partial(metrics.precision_score, zero_division=0, average="micro"),
94
- "recall": partial(metrics.recall_score, zero_division=0, average="micro"),
95
- "f1": partial(metrics.f1_score, zero_division=0, average="micro"),
96
- }
97
-
98
- def summary(self, results: List[ThresholdTestResult], all_passed: bool):
99
- """
100
- The training test degradation test returns results like these:
101
- [{"values":
102
- {"test_score": 0.7225, "train_score": 0.7316666666666667, "degradation": 0.012528473804100214}, "test_name": "accuracy", "passed": true}, ...]
103
- """
104
- results_table = [
105
- {
106
- "Class": result.values["class"],
107
- "Metric": result.test_name.title(),
108
- "Train Score": result.values["train_score"],
109
- "Test Score": result.values["test_score"],
110
- "Degradation (%)": result.values["degradation"] * 100,
111
- "Pass/Fail": "Pass" if result.passed else "Fail",
112
- }
113
- for result in results
114
- ]
115
-
116
- return ResultSummary(
117
- results=[
118
- ResultTable(
119
- data=pd.DataFrame(results_table),
120
- metadata=ResultTableMetadata(
121
- title="Training-Test Degradation Test"
122
- ),
123
- )
124
- ]
125
- )
126
-
127
- def run(self):
128
- y_train_true = self.inputs.datasets[0].y
129
- y_train_pred = self.inputs.datasets[0].y_pred(self.inputs.model)
130
- y_train_true = y_train_true.astype(y_train_pred.dtype)
131
-
132
- y_test_true = self.inputs.datasets[1].y
133
- y_test_pred = self.inputs.datasets[1].y_pred(self.inputs.model)
134
- y_test_true = y_test_true.astype(y_test_pred.dtype)
135
-
136
- report_train = metrics.classification_report(
137
- y_train_true, y_train_pred, output_dict=True, zero_division=0
138
- )
139
- report_train["roc_auc"] = multiclass_roc_auc_score(y_train_true, y_train_pred)
140
-
141
- report_test = metrics.classification_report(
142
- y_test_true, y_test_pred, output_dict=True, zero_division=0
143
- )
144
- report_test["roc_auc"] = multiclass_roc_auc_score(y_test_true, y_test_pred)
145
-
146
- classes = {str(i) for i in unique(y_train_true)}
147
-
148
- test_results = []
149
- for class_name in classes:
150
- for metric_name in ["precision", "recall", "f1-score"]:
151
- train_score = report_train[class_name][metric_name]
152
- test_score = report_test[class_name][metric_name]
153
-
154
- # If training score is 0, degradation is assumed to be 100%
155
- if train_score == 0:
156
- degradation = 1.0
157
- else:
158
- degradation = (train_score - test_score) / train_score
159
-
160
- passed = degradation < self.params["max_threshold"]
161
- test_results.append(
162
- ThresholdTestResult(
163
- test_name=metric_name,
164
- passed=passed,
165
- values={
166
- "class": class_name,
167
- "test_score": test_score,
168
- "train_score": train_score,
169
- "degradation": degradation,
170
- },
171
- )
172
- )
173
- return self.cache_results(
174
- test_results, passed=all(r.passed for r in test_results)
175
- )
66
+ ds1_report = classification_report(
67
+ y_true=datasets[0].y,
68
+ y_pred=datasets[0].y_pred(model),
69
+ output_dict=True,
70
+ zero_division=0,
71
+ )
72
+ ds2_report = classification_report(
73
+ y_true=datasets[1].y,
74
+ y_pred=datasets[1].y_pred(model),
75
+ output_dict=True,
76
+ zero_division=0,
77
+ )
78
+
79
+ table = []
80
+
81
+ for class_name in {str(i) for i in unique(datasets[0].y)}:
82
+ for metric_name in ["precision", "recall", "f1-score"]:
83
+ ds1_score = ds1_report[class_name][metric_name]
84
+ ds2_score = ds2_report[class_name][metric_name]
85
+
86
+ # If training score is 0, degradation is assumed to be 100%
87
+ degradation = 1.0 if ds1_score == 0 else (ds1_score - ds2_score) / ds1_score
88
+ passed = degradation < max_threshold
89
+
90
+ table.append(
91
+ {
92
+ "Class": class_name,
93
+ "Metric": metric_name.title(),
94
+ f"{datasets[0].input_id} Score": ds1_score,
95
+ f"{datasets[1].input_id} Score": ds2_score,
96
+ "Degradation (%)": degradation * 100,
97
+ "Pass/Fail": "Pass" if passed else "Fail",
98
+ }
99
+ )
100
+
101
+ return table, all(row["Pass/Fail"] == "Pass" for row in table)
@@ -2,15 +2,15 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  from sklearn import metrics
8
6
 
9
- from .ClusterPerformance import ClusterPerformance
7
+ from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
10
9
 
11
10
 
12
- @dataclass
13
- class VMeasure(ClusterPerformance):
11
+ @tags("sklearn", "model_performance")
12
+ @tasks("clustering")
13
+ def VMeasure(dataset: VMDataset, model: VMModel):
14
14
  """
15
15
  Evaluates homogeneity and completeness of a clustering model using the V Measure Score.
16
16
 
@@ -48,14 +48,11 @@ class VMeasure(ClusterPerformance):
48
48
  the other. The V Measure Score does not provide flexibility in assigning different weights to homogeneity and
49
49
  completeness.
50
50
  """
51
-
52
- name = "v_measure_score"
53
- required_inputs = ["model", "dataset"]
54
- tasks = ["clustering"]
55
- tags = [
56
- "sklearn",
57
- "model_performance",
51
+ return [
52
+ {
53
+ "V Measure": metrics.v_measure_score(
54
+ labels_true=dataset.y,
55
+ labels_pred=dataset.y_pred(model),
56
+ )
57
+ }
58
58
  ]
59
-
60
- def metric_info(self):
61
- return {"V Measure": metrics.v_measure_score}