validmind 2.5.24__py3-none-any.whl → 2.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. validmind/__init__.py +8 -17
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +66 -85
  4. validmind/ai/test_result_description/context.py +2 -2
  5. validmind/ai/utils.py +26 -1
  6. validmind/api_client.py +43 -79
  7. validmind/client.py +5 -7
  8. validmind/client_config.py +1 -1
  9. validmind/datasets/__init__.py +1 -1
  10. validmind/datasets/classification/customer_churn.py +7 -5
  11. validmind/datasets/nlp/__init__.py +2 -2
  12. validmind/errors.py +6 -10
  13. validmind/html_templates/content_blocks.py +18 -16
  14. validmind/logging.py +21 -16
  15. validmind/tests/__init__.py +28 -5
  16. validmind/tests/__types__.py +186 -170
  17. validmind/tests/_store.py +7 -21
  18. validmind/tests/comparison.py +362 -0
  19. validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
  20. validmind/tests/data_validation/ADF.py +49 -83
  21. validmind/tests/data_validation/AutoAR.py +59 -96
  22. validmind/tests/data_validation/AutoMA.py +59 -96
  23. validmind/tests/data_validation/AutoStationarity.py +66 -114
  24. validmind/tests/data_validation/ClassImbalance.py +48 -117
  25. validmind/tests/data_validation/DatasetDescription.py +180 -209
  26. validmind/tests/data_validation/DatasetSplit.py +50 -75
  27. validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
  28. validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
  29. validmind/tests/data_validation/Duplicates.py +21 -90
  30. validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
  31. validmind/tests/data_validation/HighCardinality.py +32 -80
  32. validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
  33. validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
  34. validmind/tests/data_validation/IQROutliersTable.py +40 -80
  35. validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
  36. validmind/tests/data_validation/KPSS.py +33 -81
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
  38. validmind/tests/data_validation/MissingValues.py +17 -58
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
  40. validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
  41. validmind/tests/data_validation/RollingStatsPlot.py +50 -81
  42. validmind/tests/data_validation/SeasonalDecompose.py +102 -184
  43. validmind/tests/data_validation/Skewness.py +27 -64
  44. validmind/tests/data_validation/SpreadPlot.py +34 -57
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
  48. validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
  49. validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
  50. validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
  51. validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
  52. validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
  53. validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
  54. validmind/tests/data_validation/TooManyZeroValues.py +21 -70
  55. validmind/tests/data_validation/UniqueRows.py +23 -62
  56. validmind/tests/data_validation/WOEBinPlots.py +83 -109
  57. validmind/tests/data_validation/WOEBinTable.py +28 -69
  58. validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
  59. validmind/tests/data_validation/nlp/CommonWords.py +49 -57
  60. validmind/tests/data_validation/nlp/Hashtags.py +27 -49
  61. validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
  62. validmind/tests/data_validation/nlp/Mentions.py +32 -63
  63. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
  64. validmind/tests/data_validation/nlp/Punctuations.py +63 -47
  65. validmind/tests/data_validation/nlp/Sentiment.py +4 -0
  66. validmind/tests/data_validation/nlp/StopWords.py +62 -91
  67. validmind/tests/data_validation/nlp/TextDescription.py +116 -159
  68. validmind/tests/data_validation/nlp/Toxicity.py +12 -4
  69. validmind/tests/decorator.py +33 -242
  70. validmind/tests/load.py +212 -153
  71. validmind/tests/model_validation/BertScore.py +13 -7
  72. validmind/tests/model_validation/BleuScore.py +4 -0
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
  74. validmind/tests/model_validation/ContextualRecall.py +3 -0
  75. validmind/tests/model_validation/FeaturesAUC.py +43 -74
  76. validmind/tests/model_validation/MeteorScore.py +3 -0
  77. validmind/tests/model_validation/RegardScore.py +5 -1
  78. validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
  79. validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
  80. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
  81. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
  82. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
  83. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
  84. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
  85. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
  86. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
  87. validmind/tests/model_validation/embeddings/utils.py +53 -0
  88. validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
  89. validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
  90. validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
  91. validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
  92. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
  93. validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
  94. validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
  95. validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
  96. validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
  97. validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
  98. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
  99. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
  100. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
  101. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
  102. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
  103. validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
  104. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
  105. validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
  106. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
  107. validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
  108. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
  109. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
  110. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
  111. validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
  112. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
  113. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
  114. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
  115. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
  116. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
  117. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
  118. validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
  119. validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
  120. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
  121. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
  122. validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
  123. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
  124. validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
  125. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
  126. validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
  127. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
  128. validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
  129. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
  130. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
  132. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
  133. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
  134. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
  135. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
  136. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
  137. validmind/tests/output.py +120 -0
  138. validmind/tests/prompt_validation/Bias.py +55 -98
  139. validmind/tests/prompt_validation/Clarity.py +56 -99
  140. validmind/tests/prompt_validation/Conciseness.py +63 -101
  141. validmind/tests/prompt_validation/Delimitation.py +48 -89
  142. validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
  143. validmind/tests/prompt_validation/Robustness.py +80 -121
  144. validmind/tests/prompt_validation/Specificity.py +61 -95
  145. validmind/tests/prompt_validation/ai_powered_test.py +2 -2
  146. validmind/tests/run.py +314 -496
  147. validmind/tests/test_providers.py +109 -79
  148. validmind/tests/utils.py +91 -0
  149. validmind/unit_metrics/__init__.py +16 -155
  150. validmind/unit_metrics/classification/F1.py +1 -0
  151. validmind/unit_metrics/classification/Precision.py +1 -0
  152. validmind/unit_metrics/classification/ROC_AUC.py +1 -0
  153. validmind/unit_metrics/classification/Recall.py +1 -0
  154. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
  155. validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
  156. validmind/unit_metrics/regression/HuberLoss.py +1 -0
  157. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
  158. validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
  159. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
  160. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
  161. validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
  162. validmind/unit_metrics/regression/QuantileLoss.py +1 -0
  163. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  164. validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
  165. validmind/utils.py +66 -17
  166. validmind/vm_models/__init__.py +2 -17
  167. validmind/vm_models/dataset/dataset.py +31 -4
  168. validmind/vm_models/figure.py +7 -37
  169. validmind/vm_models/model.py +3 -0
  170. validmind/vm_models/result/__init__.py +7 -0
  171. validmind/vm_models/result/result.jinja +21 -0
  172. validmind/vm_models/result/result.py +337 -0
  173. validmind/vm_models/result/utils.py +160 -0
  174. validmind/vm_models/test_suite/runner.py +16 -54
  175. validmind/vm_models/test_suite/summary.py +3 -3
  176. validmind/vm_models/test_suite/test.py +43 -77
  177. validmind/vm_models/test_suite/test_suite.py +8 -40
  178. validmind-2.6.7.dist-info/METADATA +137 -0
  179. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
  180. validmind/tests/data_validation/AutoSeasonality.py +0 -190
  181. validmind/tests/metadata.py +0 -59
  182. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
  183. validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
  184. validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
  185. validmind/unit_metrics/composite.py +0 -238
  186. validmind/vm_models/test/metric.py +0 -98
  187. validmind/vm_models/test/metric_result.py +0 -61
  188. validmind/vm_models/test/output_template.py +0 -55
  189. validmind/vm_models/test/result_summary.py +0 -76
  190. validmind/vm_models/test/result_wrapper.py +0 -488
  191. validmind/vm_models/test/test.py +0 -103
  192. validmind/vm_models/test/threshold_test.py +0 -106
  193. validmind/vm_models/test/threshold_test_result.py +0 -75
  194. validmind/vm_models/test_context.py +0 -259
  195. validmind-2.5.24.dist-info/METADATA +0 -118
  196. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
  197. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
  198. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -2,17 +2,24 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
5
 
7
6
  import numpy as np
8
7
  import plotly.figure_factory as ff
9
- from sklearn import metrics
8
+ from sklearn.metrics import confusion_matrix
10
9
 
11
- from validmind.vm_models import Figure, Metric
10
+ from validmind import tags, tasks
11
+ from validmind.vm_models import VMDataset, VMModel
12
12
 
13
13
 
14
- @dataclass
15
- class ConfusionMatrix(Metric):
14
+ @tags(
15
+ "sklearn",
16
+ "binary_classification",
17
+ "multiclass_classification",
18
+ "model_performance",
19
+ "visualization",
20
+ )
21
+ @tasks("classification", "text_classification")
22
+ def ConfusionMatrix(dataset: VMDataset, model: VMModel):
16
23
  """
17
24
  Evaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix
18
25
  heatmap.
@@ -59,95 +66,56 @@ class ConfusionMatrix(Metric):
59
66
  - Risks of misinterpretation exist because the matrix doesn't directly provide precision, recall, or F1-score data.
60
67
  These metrics have to be computed separately.
61
68
  """
69
+ y_pred = dataset.y_pred(model)
70
+ y_true = dataset.y.astype(y_pred.dtype)
62
71
 
63
- name = "confusion_matrix"
64
- required_inputs = ["model", "dataset"]
65
- tasks = ["classification", "text_classification"]
66
- tags = [
67
- "sklearn",
68
- "binary_classification",
69
- "multiclass_classification",
70
- "model_performance",
71
- "visualization",
72
- ]
73
-
74
- def run(self):
75
- y_true = self.inputs.dataset.y
76
- labels = np.unique(y_true)
77
- labels.sort()
78
- labels = np.array(labels).T.tolist()
79
-
80
- y_pred = self.inputs.dataset.y_pred(self.inputs.model)
81
- y_true = y_true.astype(y_pred.dtype)
82
-
83
- cm = metrics.confusion_matrix(y_true, y_pred, labels=labels)
84
-
85
- text = None
86
- if len(labels) == 2:
87
- tn, fp, fn, tp = cm.ravel()
88
-
89
- # Custom text to display on the heatmap cells
90
- text = [
91
- [
92
- f"<b>True Negatives (TN)</b><br />{tn}",
93
- f"<b>False Positives (FP)</b><br />{fp}",
94
- ],
95
- [
96
- f"<b>False Negatives (FN)</b><br />{fn}",
97
- f"<b>True Positives (TP)</b><br />{tp}",
98
- ],
99
- ]
100
-
101
- fig = ff.create_annotated_heatmap(
102
- z=cm,
103
- colorscale="Blues",
104
- x=labels,
105
- y=labels,
106
- annotation_text=text,
107
- )
108
-
109
- fig["data"][0][
110
- "hovertemplate"
111
- ] = "True Label:%{y}<br>Predicted Label:%{x}<br>Count:%{z}<extra></extra>"
112
-
113
- fig.update_layout(
114
- xaxis=dict(title="Predicted label"),
115
- yaxis=dict(title="True label"),
116
- autosize=False,
117
- width=600,
118
- height=600,
119
- )
120
-
121
- # Add an annotation at the bottom of the heatmap
122
- fig.add_annotation(
123
- x=0.5,
124
- y=-0.1,
125
- xref="paper",
126
- yref="paper",
127
- text=f"Confusion Matrix for {self.inputs.model.input_id} on {self.inputs.dataset.input_id}",
128
- showarrow=False,
129
- font=dict(size=14),
130
- )
131
-
132
- return self.cache_results(
133
- metric_value={
134
- "confusion_matrix": cm,
135
- },
136
- figures=[
137
- Figure(
138
- for_object=self,
139
- key="confusion_matrix",
140
- figure=fig,
141
- )
142
- ],
143
- )
144
-
145
- def test(self):
146
- """Unit Test for Confusion Matrix Metric"""
147
- assert self.result is not None
72
+ labels = np.unique(y_true)
73
+ labels = sorted(labels.tolist())
148
74
 
149
- assert self.result.metric is not None
150
- assert isinstance(self.result.metric.value, dict)
151
- assert "confusion_matrix" in self.result.metric.value
75
+ cm = confusion_matrix(y_true, y_pred, labels=labels)
152
76
 
153
- assert len(self.result.figures) == 1
77
+ text = None
78
+ if len(labels) == 2:
79
+ tn, fp, fn, tp = cm.ravel()
80
+ text = [
81
+ [
82
+ f"<b>True Negatives (TN)</b><br />{tn}",
83
+ f"<b>False Positives (FP)</b><br />{fp}",
84
+ ],
85
+ [
86
+ f"<b>False Negatives (FN)</b><br />{fn}",
87
+ f"<b>True Positives (TP)</b><br />{tp}",
88
+ ],
89
+ ]
90
+
91
+ fig = ff.create_annotated_heatmap(
92
+ z=cm,
93
+ colorscale="Blues",
94
+ x=labels,
95
+ y=labels,
96
+ annotation_text=text,
97
+ )
98
+
99
+ fig["data"][0][
100
+ "hovertemplate"
101
+ ] = "True Label:%{y}<br>Predicted Label:%{x}<br>Count:%{z}<extra></extra>"
102
+
103
+ fig.update_layout(
104
+ xaxis=dict(title="Predicted label"),
105
+ yaxis=dict(title="True label"),
106
+ autosize=False,
107
+ width=600,
108
+ height=600,
109
+ )
110
+
111
+ fig.add_annotation(
112
+ x=0.5,
113
+ y=-0.1,
114
+ xref="paper",
115
+ yref="paper",
116
+ text=f"Confusion Matrix for {model.input_id} on {dataset.input_id}",
117
+ showarrow=False,
118
+ font=dict(size=14),
119
+ )
120
+
121
+ return fig
@@ -6,11 +6,12 @@ import pandas as pd
6
6
  from sklearn.inspection import permutation_importance
7
7
 
8
8
  from validmind import tags, tasks
9
+ from validmind.vm_models import VMDataset, VMModel
9
10
 
10
11
 
11
12
  @tags("model_explainability", "sklearn")
12
13
  @tasks("regression", "time_series_forecasting")
13
- def FeatureImportance(dataset, model, num_features=3):
14
+ def FeatureImportance(dataset: VMDataset, model: VMModel, num_features: int = 3):
14
15
  """
15
16
  Compute feature importance scores for a given model and generate a summary table
16
17
  with the top important features.
@@ -53,20 +54,18 @@ def FeatureImportance(dataset, model, num_features=3):
53
54
  """
54
55
  results_list = []
55
56
 
56
- x = dataset.x_df()
57
- y = dataset.y_df()
58
-
59
57
  pfi_values = permutation_importance(
60
- model.model,
61
- x,
62
- y,
58
+ estimator=model.model,
59
+ X=dataset.x_df(),
60
+ y=dataset.y_df(),
63
61
  random_state=0,
64
62
  n_jobs=-2,
65
63
  )
66
64
 
67
65
  # Create a dictionary to store PFI scores
68
66
  pfi = {
69
- column: pfi_values["importances_mean"][i] for i, column in enumerate(x.columns)
67
+ column: pfi_values["importances_mean"][i]
68
+ for i, column in enumerate(dataset.feature_columns)
70
69
  }
71
70
 
72
71
  # Sort features by their importance
@@ -2,15 +2,15 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  from sklearn import metrics
8
6
 
9
- from .ClusterPerformance import ClusterPerformance
7
+ from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
10
9
 
11
10
 
12
- @dataclass
13
- class FowlkesMallowsScore(ClusterPerformance):
11
+ @tags("sklearn", "model_performance")
12
+ @tasks("clustering")
13
+ def FowlkesMallowsScore(dataset: VMDataset, model: VMModel):
14
14
  """
15
15
  Evaluates the similarity between predicted and actual cluster assignments in a model using the Fowlkes-Mallows
16
16
  score.
@@ -52,14 +52,11 @@ class FowlkesMallowsScore(ClusterPerformance):
52
52
  - It does not handle mismatching numbers of clusters between the true and predicted labels. As such, it may return
53
53
  misleading results if the predicted labels suggest a different number of clusters than what is in the true labels.
54
54
  """
55
-
56
- name = "fowlkes_mallows_score"
57
- required_inputs = ["model", "dataset"]
58
- tasks = ["clustering"]
59
- tags = [
60
- "sklearn",
61
- "model_performance",
55
+ return [
56
+ {
57
+ "Fowlkes-Mallows score": metrics.fowlkes_mallows_score(
58
+ labels_true=dataset.y,
59
+ labels_pred=dataset.y_pred(model),
60
+ )
61
+ }
62
62
  ]
63
-
64
- def metric_info(self):
65
- return {"Fowlkes-Mallows score": metrics.fowlkes_mallows_score}
@@ -2,15 +2,15 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  from sklearn import metrics
8
6
 
9
- from .ClusterPerformance import ClusterPerformance
7
+ from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
10
9
 
11
10
 
12
- @dataclass
13
- class HomogeneityScore(ClusterPerformance):
11
+ @tags("sklearn", "model_performance")
12
+ @tasks("clustering")
13
+ def HomogeneityScore(dataset: VMDataset, model: VMModel):
14
14
  """
15
15
  Assesses clustering homogeneity by comparing true and predicted labels, scoring from 0 (heterogeneous) to 1
16
16
  (homogeneous).
@@ -50,14 +50,11 @@ class HomogeneityScore(ClusterPerformance):
50
50
  - The score does not address the actual number of clusters formed, or the evenness of cluster sizes. It only checks
51
51
  the homogeneity within the given clusters created by the model.
52
52
  """
53
-
54
- name = "homogeneity_score"
55
- required_inputs = ["model", "dataset"]
56
- tasks = ["clustering"]
57
- tags = [
58
- "sklearn",
59
- "model_performance",
53
+ return [
54
+ {
55
+ "Homogeneity Score": metrics.homogeneity_score(
56
+ labels_true=dataset.y,
57
+ labels_pred=dataset.y_pred(model),
58
+ )
59
+ }
60
60
  ]
61
-
62
- def metric_info(self):
63
- return {"Homogeneity Score": metrics.homogeneity_score}
@@ -2,17 +2,23 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
5
+ from typing import Union
6
6
 
7
- import pandas as pd
8
7
  from sklearn.model_selection import GridSearchCV
9
8
 
9
+ from validmind import tags, tasks
10
10
  from validmind.errors import SkipTestError
11
- from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
11
+ from validmind.vm_models import VMDataset, VMModel
12
12
 
13
13
 
14
- @dataclass
15
- class HyperParametersTuning(Metric):
14
+ @tags("sklearn", "model_performance")
15
+ @tasks("classification", "clustering")
16
+ def HyperParametersTuning(
17
+ model: VMModel,
18
+ dataset: VMDataset,
19
+ param_grid: Union[dict, None] = None,
20
+ scoring: Union[str, None] = None,
21
+ ):
16
22
  """
17
23
  Exerts exhaustive grid search to identify optimal hyperparameters for the model, improving performance.
18
24
 
@@ -54,51 +60,15 @@ class HyperParametersTuning(Metric):
54
60
  - There's a potential risk of overfitting the model if the training set is not representative of the data that the
55
61
  model will be applied to.
56
62
  """
57
-
58
- name = "hyper_parameters_tuning"
59
- required_inputs = ["model", "dataset"]
60
- tasks = ["classification", "clustering"]
61
- tags = ["sklearn", "model_performance"]
62
- default_params = {"param_grid": None, "scoring": None}
63
-
64
- def run(self):
65
- param_grid = self.params["param_grid"]
66
- if param_grid is None:
67
- raise SkipTestError(
68
- "param_grid in dictonary format must be provided to run this test"
69
- )
70
-
71
- model = self.inputs.model.model
72
- estimators = GridSearchCV(
73
- model, param_grid=param_grid, scoring=self.params["scoring"]
74
- )
75
- estimators.fit(self.inputs.dataset.x, self.inputs.dataset.y)
76
-
77
- results = [
78
- {
79
- "Best Model": f"{estimators.best_estimator_}",
80
- "Best Parameters": estimators.best_params_,
81
- }
82
- ]
83
- return self.cache_results(
84
- {
85
- "parameters_tuning": pd.DataFrame(results).to_dict(orient="records"),
86
- }
87
- )
88
-
89
- def summary(self, metric_value):
90
- """
91
- Build one table for summarizing the hyper parameters tunning
92
- """
93
- summary_regression = metric_value["parameters_tuning"]
94
-
95
- return ResultSummary(
96
- results=[
97
- ResultTable(
98
- data=summary_regression,
99
- metadata=ResultTableMetadata(
100
- title="Hyper Parameters Tuning Results"
101
- ),
102
- ),
103
- ]
104
- )
63
+ if not param_grid:
64
+ raise SkipTestError("'param_grid' dictionary must be provided to run this test")
65
+
66
+ estimators = GridSearchCV(model.model, param_grid=param_grid, scoring=scoring)
67
+ estimators.fit(dataset.x, dataset.y)
68
+
69
+ return [
70
+ {
71
+ "Best Model": estimators.best_estimator_,
72
+ "Best Parameters": estimators.best_params_,
73
+ }
74
+ ]
@@ -2,20 +2,25 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
5
+ from typing import List, Union
6
6
 
7
7
  import numpy as np
8
8
  import plotly.graph_objects as go
9
9
  from plotly.subplots import make_subplots
10
10
  from scipy.spatial.distance import cdist
11
+ from sklearn import clone
11
12
  from sklearn.metrics import silhouette_score
12
13
 
14
+ from validmind import tags, tasks
13
15
  from validmind.errors import SkipTestError
14
- from validmind.vm_models import Figure, Metric
16
+ from validmind.vm_models import VMDataset, VMModel
15
17
 
16
18
 
17
- @dataclass
18
- class KMeansClustersOptimization(Metric):
19
+ @tags("sklearn", "model_performance", "kmeans")
20
+ @tasks("clustering")
21
+ def KMeansClustersOptimization(
22
+ model: VMModel, dataset: VMDataset, n_clusters: Union[List[int], None] = None
23
+ ):
19
24
  """
20
25
  Optimizes the number of clusters in K-means models using Elbow and Silhouette methods.
21
26
 
@@ -62,80 +67,61 @@ class KMeansClustersOptimization(Metric):
62
67
  - Assumes spherical clusters (due to using the Euclidean distance in the Elbow method), which might not align with
63
68
  the actual structure of the data.
64
69
  """
65
-
66
- name = "clusters_optimize_elbow_method"
67
- required_inputs = ["model", "dataset"]
68
- tasks = ["clustering"]
69
- tags = ["sklearn", "model_performance", "kmeans"]
70
-
71
- default_params = {"n_clusters": None}
72
-
73
- def run(self):
74
- n_clusters = self.params["n_clusters"]
75
- if n_clusters is None:
76
- raise SkipTestError("n_clusters parameter must be provide in list format")
77
- model = self.inputs.model.model
78
-
79
- distortions = {}
80
- silhouette_avg = {}
81
-
82
- for k in n_clusters:
83
- # Building and fitting the model
84
- kmeanModel = model.set_params(n_clusters=k)
85
- kmeanModel = kmeanModel.fit(self.inputs.dataset.x)
86
- # Calculate silhouette coefficients for each data point
87
- silhouette_avg[k] = silhouette_score(
88
- self.inputs.dataset.x,
89
- kmeanModel.predict(self.inputs.dataset.x),
90
- )
91
-
92
- distortions[k] = (
93
- sum(
94
- np.min(
95
- cdist(
96
- self.inputs.dataset.x,
97
- kmeanModel.cluster_centers_,
98
- "euclidean",
99
- ),
100
- axis=1,
101
- )
102
- )
103
- / self.inputs.dataset.x.shape[0]
104
- )
105
- fig = make_subplots(
106
- rows=1,
107
- cols=2,
108
- subplot_titles=(
109
- "The Silhouette value of each cluster",
110
- "The Elbow Method using Distortion",
111
- ),
70
+ if not n_clusters:
71
+ raise SkipTestError(
72
+ "Cluster range must be provided via the 'n_clusters' parameter"
112
73
  )
113
74
 
114
- fig.add_trace(
115
- go.Scatter(x=list(silhouette_avg.keys()), y=list(silhouette_avg.values())),
116
- row=1,
117
- col=1,
118
- )
119
- fig.update_xaxes(title_text="Number of clusters", row=1, col=1)
120
- fig.update_yaxes(title_text="Avg Silhouette Score", row=1, col=1)
75
+ distortions = {}
76
+ silhouette_avg = {}
121
77
 
122
- fig.add_trace(
123
- go.Scatter(x=list(distortions.keys()), y=list(distortions.values())),
124
- row=1,
125
- col=2,
126
- )
127
- # Update xaxis properties
128
- fig.update_xaxes(title_text="Number of clusters", showgrid=False, row=1, col=2)
129
- fig.update_yaxes(title_text="Distortion", showgrid=False, row=1, col=2)
78
+ for k in n_clusters:
79
+ kmeanModel = clone(model.model).set_params(n_clusters=k).fit(dataset.x)
130
80
 
131
- fig.update_layout(showlegend=False)
81
+ silhouette_avg[k] = silhouette_score(
82
+ dataset.x,
83
+ kmeanModel.predict(dataset.x),
84
+ )
132
85
 
133
- figures = [
134
- Figure(
135
- for_object=self,
136
- key=self.key,
137
- figure=fig,
86
+ distortions[k] = (
87
+ sum(
88
+ np.min(
89
+ cdist(
90
+ dataset.x,
91
+ kmeanModel.cluster_centers_,
92
+ "euclidean",
93
+ ),
94
+ axis=1,
95
+ )
138
96
  )
139
- ]
97
+ / dataset.x.shape[0]
98
+ )
140
99
 
141
- return self.cache_results(figures=figures)
100
+ fig = make_subplots(
101
+ rows=1,
102
+ cols=2,
103
+ subplot_titles=(
104
+ "The Silhouette value of each cluster",
105
+ "The Elbow Method using Distortion",
106
+ ),
107
+ )
108
+
109
+ fig.add_trace(
110
+ go.Scatter(x=list(silhouette_avg.keys()), y=list(silhouette_avg.values())),
111
+ row=1,
112
+ col=1,
113
+ )
114
+ fig.update_xaxes(title_text="Number of clusters", row=1, col=1)
115
+ fig.update_yaxes(title_text="Avg Silhouette Score", row=1, col=1)
116
+
117
+ fig.add_trace(
118
+ go.Scatter(x=list(distortions.keys()), y=list(distortions.values())),
119
+ row=1,
120
+ col=2,
121
+ )
122
+ fig.update_xaxes(title_text="Number of clusters", showgrid=False, row=1, col=2)
123
+ fig.update_yaxes(title_text="Distortion", showgrid=False, row=1, col=2)
124
+
125
+ fig.update_layout(showlegend=False)
126
+
127
+ return fig
@@ -1,24 +1,17 @@
1
1
  # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+ from sklearn.metrics import accuracy_score
4
5
 
5
- from dataclasses import dataclass
6
- from typing import List
6
+ from validmind.tests import tags, tasks
7
+ from validmind.vm_models import VMDataset, VMModel
7
8
 
8
- import pandas as pd
9
- from sklearn import metrics
10
9
 
11
- from validmind.vm_models import (
12
- ResultSummary,
13
- ResultTable,
14
- ResultTableMetadata,
15
- ThresholdTest,
16
- ThresholdTestResult,
10
+ @tags(
11
+ "sklearn", "binary_classification", "multiclass_classification", "model_performance"
17
12
  )
18
-
19
-
20
- @dataclass
21
- class MinimumAccuracy(ThresholdTest):
13
+ @tasks("classification", "text_classification")
14
+ def MinimumAccuracy(dataset: VMDataset, model: VMModel, min_threshold: float = 0.7):
22
15
  """
23
16
  Checks if the model's prediction accuracy meets or surpasses a specified threshold.
24
17
 
@@ -55,73 +48,12 @@ class MinimumAccuracy(ThresholdTest):
55
48
  - Inability to measure the model's precision, recall, or capacity to manage false positives or false negatives.
56
49
  - Focused on overall correctness and may not be sufficient for all types of model analytics.
57
50
  """
58
-
59
- name = "accuracy_score"
60
- required_inputs = ["model", "dataset"]
61
- default_params = {"min_threshold": 0.7}
62
- tasks = ["classification", "text_classification"]
63
- tags = [
64
- "sklearn",
65
- "binary_classification",
66
- "multiclass_classification",
67
- "model_performance",
68
- ]
69
-
70
- def summary(self, results: List[ThresholdTestResult], all_passed: bool):
71
- """
72
- The accuracy score test returns results like these:
73
- [{"values": {"score": 0.734375, "threshold": 0.7}, "passed": true}]
74
- """
75
- result = results[0]
76
- results_table = [
77
- {
78
- "Score": result.values["score"],
79
- "Threshold": result.values["threshold"],
80
- "Pass/Fail": "Pass" if result.passed else "Fail",
81
- }
82
- ]
83
-
84
- return ResultSummary(
85
- results=[
86
- ResultTable(
87
- data=pd.DataFrame(results_table),
88
- metadata=ResultTableMetadata(
89
- title="Minimum Accuracy Test on Test Data"
90
- ),
91
- )
92
- ]
93
- )
94
-
95
- def run(self):
96
- y_true = self.inputs.dataset.y
97
- class_pred = self.inputs.dataset.y_pred(self.inputs.model)
98
- y_true = y_true.astype(class_pred.dtype)
99
-
100
- accuracy_score = metrics.accuracy_score(y_true, class_pred)
101
-
102
- passed = accuracy_score > self.params["min_threshold"]
103
- results = [
104
- ThresholdTestResult(
105
- passed=passed,
106
- values={
107
- "score": accuracy_score,
108
- "threshold": self.params["min_threshold"],
109
- },
110
- )
111
- ]
112
-
113
- return self.cache_results(results, passed=all([r.passed for r in results]))
114
-
115
- def test(self):
116
- # Test that there is a result and it's not None
117
- assert self.result is not None
118
- # Test that results are contained in a list
119
- assert isinstance(self.result.test_results.results, list)
120
- # Verify that there is exactly one result
121
- assert len(self.result.test_results.results) == 1
122
- # Extract the single result for clarity
123
- test_result = self.result.test_results.results[0]
124
- # Check the 'passed' condition logic against the test outcome
125
- assert test_result.passed == (
126
- test_result.values["score"] >= test_result.values["threshold"]
127
- )
51
+ accuracy = accuracy_score(dataset.y, dataset.y_pred(model))
52
+
53
+ return [
54
+ {
55
+ "Score": accuracy,
56
+ "Threshold": min_threshold,
57
+ "Pass/Fail": "Pass" if accuracy > min_threshold else "Fail",
58
+ }
59
+ ], accuracy > min_threshold