validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +80 -119
  3. validmind/ai/test_result_description/config.yaml +29 -0
  4. validmind/ai/test_result_description/context.py +73 -0
  5. validmind/ai/test_result_description/image_processing.py +124 -0
  6. validmind/ai/test_result_description/system.jinja +39 -0
  7. validmind/ai/test_result_description/user.jinja +25 -0
  8. validmind/api_client.py +89 -43
  9. validmind/client.py +2 -2
  10. validmind/client_config.py +11 -14
  11. validmind/datasets/credit_risk/__init__.py +1 -0
  12. validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
  13. validmind/datasets/credit_risk/lending_club_bias.py +142 -0
  14. validmind/datasets/regression/fred_timeseries.py +67 -138
  15. validmind/template.py +1 -0
  16. validmind/test_suites/__init__.py +0 -2
  17. validmind/test_suites/statsmodels_timeseries.py +1 -1
  18. validmind/test_suites/summarization.py +0 -1
  19. validmind/test_suites/time_series.py +0 -43
  20. validmind/tests/__types__.py +14 -15
  21. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  22. validmind/tests/data_validation/ADF.py +31 -24
  23. validmind/tests/data_validation/AutoAR.py +9 -9
  24. validmind/tests/data_validation/AutoMA.py +23 -16
  25. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  26. validmind/tests/data_validation/AutoStationarity.py +21 -16
  27. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  28. validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +34 -34
  29. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +85 -124
  30. validmind/tests/data_validation/ClassImbalance.py +15 -12
  31. validmind/tests/data_validation/DFGLSArch.py +19 -13
  32. validmind/tests/data_validation/DatasetDescription.py +17 -11
  33. validmind/tests/data_validation/DatasetSplit.py +7 -5
  34. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  35. validmind/tests/data_validation/Duplicates.py +33 -25
  36. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  37. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  38. validmind/tests/data_validation/HighCardinality.py +19 -12
  39. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  40. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  41. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  42. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  43. validmind/tests/data_validation/JarqueBera.py +70 -0
  44. validmind/tests/data_validation/KPSS.py +34 -29
  45. validmind/tests/data_validation/LJungBox.py +66 -0
  46. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  47. validmind/tests/data_validation/MissingValues.py +32 -27
  48. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  49. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  50. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  51. validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
  52. validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
  53. validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
  54. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
  55. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  56. validmind/tests/data_validation/RunsTest.py +72 -0
  57. validmind/tests/data_validation/ScatterPlot.py +63 -78
  58. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  59. validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +35 -30
  60. validmind/tests/data_validation/Skewness.py +35 -37
  61. validmind/tests/data_validation/SpreadPlot.py +35 -35
  62. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  63. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  64. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  65. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  66. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  67. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  68. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  69. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  70. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  71. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  72. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  73. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  74. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  75. validmind/tests/data_validation/UniqueRows.py +11 -6
  76. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  77. validmind/tests/data_validation/WOEBinTable.py +35 -30
  78. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  79. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  80. validmind/tests/data_validation/nlp/Hashtags.py +42 -40
  81. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  82. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  83. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  84. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  85. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  86. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  87. validmind/tests/data_validation/nlp/TextDescription.py +39 -36
  88. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  89. validmind/tests/decorator.py +81 -42
  90. validmind/tests/model_validation/BertScore.py +36 -27
  91. validmind/tests/model_validation/BleuScore.py +25 -19
  92. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  93. validmind/tests/model_validation/ContextualRecall.py +38 -13
  94. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  95. validmind/tests/model_validation/MeteorScore.py +46 -33
  96. validmind/tests/model_validation/ModelMetadata.py +32 -64
  97. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  98. validmind/tests/model_validation/RegardScore.py +30 -14
  99. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  100. validmind/tests/model_validation/RougeScore.py +36 -30
  101. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  102. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  103. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  104. validmind/tests/model_validation/TokenDisparity.py +31 -23
  105. validmind/tests/model_validation/ToxicityScore.py +26 -17
  106. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  107. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  108. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  109. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  110. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  111. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  112. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  113. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  114. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  115. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  116. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  117. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  118. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  119. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  120. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  121. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  122. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  123. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  124. validmind/tests/model_validation/ragas/AspectCritique.py +12 -6
  125. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  126. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  127. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  128. validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
  129. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  130. validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
  131. validmind/tests/model_validation/ragas/utils.py +6 -0
  132. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  133. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  134. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  135. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  136. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  137. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  138. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  139. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  140. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  141. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  142. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  143. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  144. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  145. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  146. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  147. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  148. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  149. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  150. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +32 -26
  151. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  152. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  153. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  154. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  155. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  156. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  157. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -94
  158. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  159. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
  160. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +66 -5
  161. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  162. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  163. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  164. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  165. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  166. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  167. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +59 -32
  168. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  169. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  170. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  171. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +86 -119
  172. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  173. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  174. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  175. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  176. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  177. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  178. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  179. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  180. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  181. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  182. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  183. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  184. validmind/tests/prompt_validation/Bias.py +14 -11
  185. validmind/tests/prompt_validation/Clarity.py +16 -14
  186. validmind/tests/prompt_validation/Conciseness.py +7 -5
  187. validmind/tests/prompt_validation/Delimitation.py +23 -22
  188. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  189. validmind/tests/prompt_validation/Robustness.py +12 -10
  190. validmind/tests/prompt_validation/Specificity.py +13 -11
  191. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  192. validmind/tests/run.py +68 -23
  193. validmind/unit_metrics/__init__.py +81 -144
  194. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  195. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  196. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  197. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  198. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  199. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  200. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  201. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  202. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  203. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  204. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  205. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  206. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  207. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  208. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  209. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  210. validmind/utils.py +4 -0
  211. validmind/vm_models/dataset/dataset.py +2 -0
  212. validmind/vm_models/figure.py +5 -0
  213. validmind/vm_models/test/metric.py +1 -0
  214. validmind/vm_models/test/result_wrapper.py +143 -158
  215. validmind/vm_models/test/threshold_test.py +1 -0
  216. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/METADATA +4 -3
  217. validmind-2.5.18.dist-info/RECORD +324 -0
  218. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  219. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  220. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  221. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  222. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  223. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  224. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  225. validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -73
  226. validmind/tests/model_validation/statsmodels/LJungBox.py +0 -66
  227. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  228. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  229. validmind/tests/model_validation/statsmodels/RunsTest.py +0 -71
  230. validmind-2.5.8.dist-info/RECORD +0 -318
  231. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/LICENSE +0 -0
  232. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/WHEEL +0 -0
  233. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/entry_points.txt +0 -0
validmind/tests/run.py CHANGED
@@ -17,6 +17,7 @@ from validmind.vm_models import (
17
17
  MetricResult,
18
18
  ResultSummary,
19
19
  ResultTable,
20
+ ResultTableMetadata,
20
21
  TestContext,
21
22
  TestInput,
22
23
  ThresholdTestResults,
@@ -147,6 +148,26 @@ def _combine_figures(figure_lists: List[List[Any]], input_groups: List[Dict[str,
147
148
  return [figure for figures in figure_lists for figure in figures]
148
149
 
149
150
 
151
+ def _combine_unit_metrics(results: List[MetricResultWrapper]):
152
+ if not results[0].scalar:
153
+ return
154
+
155
+ for result in results:
156
+ table = ResultTable(
157
+ data=[{"value": result.scalar}],
158
+ metadata=ResultTableMetadata(title="Unit Metrics"),
159
+ )
160
+ if not result.metric:
161
+ result.metric = MetricResult(
162
+ ref_id="will_be_overwritten",
163
+ key=result.result_id,
164
+ value=result.scalar,
165
+ summary=ResultSummary(results=[table]),
166
+ )
167
+ else:
168
+ result.metric.summary.results.append(table)
169
+
170
+
150
171
  def metric_comparison(
151
172
  results: List[MetricResultWrapper],
152
173
  test_id: TestID,
@@ -172,22 +193,41 @@ def metric_comparison(
172
193
  raise ValueError(f"Unsupported type for value: {v}")
173
194
  input_group_strings.append(new_group)
174
195
 
175
- merged_summary = _combine_summaries(
176
- [
177
- {"inputs": input_group_strings[i], "summary": result.metric.summary}
178
- for i, result in enumerate(results)
179
- ]
180
- )
181
- merged_figures = _combine_figures(
182
- [result.figures for result in results], input_groups
183
- )
184
-
185
- # Patch figure metadata so they are connected to the comparison result
186
- if merged_figures and len(merged_figures):
187
- for i, figure in enumerate(merged_figures):
188
- figure.key = f"{figure.key}-{i}"
189
- figure.metadata["_name"] = test_id
190
- figure.metadata["_ref_id"] = ref_id
196
+ # handle unit metrics (scalar values) by adding it to the summary
197
+ _combine_unit_metrics(results)
198
+
199
+ # Check if the results list contains a result object with a metric
200
+ if any(
201
+ hasattr(result, "metric")
202
+ and hasattr(result.metric, "summary")
203
+ and result.metric.summary
204
+ for result in results
205
+ ):
206
+ # Compute merged summaries only if there is a result with a metric
207
+ merged_summary = _combine_summaries(
208
+ [
209
+ {"inputs": input_group_strings[i], "summary": result.metric.summary}
210
+ for i, result in enumerate(results)
211
+ ]
212
+ )
213
+ else:
214
+ merged_summary = None
215
+
216
+ # Check if the results list contains a result object with figures
217
+ if any(hasattr(result, "figures") and result.figures for result in results):
218
+ # Compute merged figures only if there is at least one result with figures
219
+ merged_figures = _combine_figures(
220
+ [result.figures for result in results],
221
+ input_groups,
222
+ )
223
+ # Patch figure metadata so they are connected to the comparison result
224
+ if merged_figures and len(merged_figures):
225
+ for i, figure in enumerate(merged_figures):
226
+ figure.key = f"{figure.key}-{i}"
227
+ figure.metadata["_name"] = test_id
228
+ figure.metadata["_ref_id"] = ref_id
229
+ else:
230
+ merged_figures = None
191
231
 
192
232
  return MetricResultWrapper(
193
233
  result_id=test_id,
@@ -196,7 +236,7 @@ def metric_comparison(
196
236
  test_id=test_id,
197
237
  default_description=f"Comparison test result for {test_id}",
198
238
  summary=merged_summary.serialize() if merged_summary else None,
199
- figures=merged_figures,
239
+ figures=merged_figures if merged_figures else None,
200
240
  should_generate=generate_description,
201
241
  ),
202
242
  ],
@@ -294,6 +334,8 @@ def threshold_test_comparison(
294
334
  def run_comparison_test(
295
335
  test_id: TestID,
296
336
  input_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]]],
337
+ name: str = None,
338
+ unit_metrics: List[TestID] = None,
297
339
  params: Dict[str, Any] = None,
298
340
  show: bool = True,
299
341
  output_template: str = None,
@@ -308,6 +350,8 @@ def run_comparison_test(
308
350
  results = [
309
351
  run_test(
310
352
  test_id,
353
+ name=name,
354
+ unit_metrics=unit_metrics,
311
355
  inputs=inputs,
312
356
  show=False,
313
357
  params=params,
@@ -387,33 +431,34 @@ def run_test(
387
431
  "When providing an `input_grid`, you cannot also provide `inputs` or `kwargs`"
388
432
  )
389
433
 
434
+ if unit_metrics:
435
+ metric_id_name = "".join(word[0].upper() + word[1:] for word in name.split())
436
+ test_id = f"validmind.composite_metric.{metric_id_name}" or test_id
437
+
390
438
  if input_grid:
391
439
  return run_comparison_test(
392
440
  test_id,
393
441
  input_grid,
442
+ name=name,
443
+ unit_metrics=unit_metrics,
394
444
  params=params,
395
445
  output_template=output_template,
396
446
  show=show,
397
447
  generate_description=__generate_description,
398
448
  )
399
449
 
400
- if test_id and test_id.startswith("validmind.unit_metrics"):
450
+ if test_id.startswith("validmind.unit_metrics"):
401
451
  # TODO: as we move towards a more unified approach to metrics
402
452
  # we will want to make everything functional and remove the
403
453
  # separation between unit metrics and "normal" metrics
404
454
  return run_metric(test_id, inputs=inputs, params=params, show=show)
405
455
 
406
456
  if unit_metrics:
407
- metric_id_name = "".join(word[0].upper() + word[1:] for word in name.split())
408
- test_id = f"validmind.composite_metric.{metric_id_name}"
409
-
410
457
  error, TestClass = load_composite_metric(
411
458
  unit_metrics=unit_metrics, metric_name=metric_id_name
412
459
  )
413
-
414
460
  if error:
415
461
  raise LoadTestError(error)
416
-
417
462
  else:
418
463
  TestClass = load_test(test_id, reload=True)
419
464
 
@@ -2,145 +2,111 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ import glob
5
6
  import hashlib
6
7
  import json
8
+ import os
7
9
  from importlib import import_module
10
+ from textwrap import dedent
11
+
12
+ from IPython.display import Markdown, display
8
13
 
9
14
  from validmind.input_registry import input_registry
10
15
  from validmind.tests.decorator import _build_result, _inspect_signature
11
- from validmind.utils import get_model_info, test_id_to_name
16
+ from validmind.utils import test_id_to_name
12
17
 
13
18
  unit_metric_results_cache = {}
14
19
 
15
20
 
16
- def _serialize_params(params):
17
- """
18
- Serialize the parameters to a unique hash, handling None values.
19
- This function serializes the parameters dictionary to a JSON string,
20
- then creates a SHA-256 hash of the string to ensure a unique identifier
21
- for the parameters. If params is None, a default hash is returned.
22
-
23
- Args:
24
- params (dict or None): The parameters to be serialized.
25
-
26
- Returns:
27
- str: A SHA-256 hash of the JSON string representation of the params,
28
- or a default hash if params is None.
29
- """
30
- if params is None:
31
- # Handle None by returning a hash of an empty dictionary or a predefined value
32
- params_json = json.dumps({})
33
- else:
34
- params_json = json.dumps(params, sort_keys=True)
35
-
36
- hash_object = hashlib.sha256(params_json.encode())
37
- return hash_object.hexdigest()
38
-
39
-
40
- def _serialize_model(model):
41
- """
42
- Generate a SHA-256 hash for a scikit-learn model based on its type and parameters.
43
-
44
- Args:
45
- model VMModel: The model to be serialized.
46
-
47
- Returns:
48
- str: A SHA-256 hash of the model's description.
49
- """
50
-
51
- model_info = get_model_info(model)
21
+ def _serialize_dataset(dataset, model=None, sample_size=1000):
22
+ columns = [*dataset.feature_columns, dataset.target_column]
23
+ if model:
24
+ columns.append(dataset.prediction_column(model))
52
25
 
53
- model_json = json.dumps(model_info, sort_keys=True)
26
+ df = dataset._df[columns]
54
27
 
55
- # Create a SHA-256 hash of the JSON string
56
- hash_object = hashlib.sha256(model_json.encode())
57
- return hash_object.hexdigest()
28
+ return hashlib.md5(
29
+ df.sample(n=min(sample_size, df.shape[0]), random_state=42)
30
+ .to_string(header=True, index=True)
31
+ .encode()
32
+ ).hexdigest()
58
33
 
59
34
 
60
- def _serialize_dataset(dataset, model):
61
- """
62
- Serialize the description of the dataset input to a unique hash.
35
+ def _get_metric_cache_key(metric_id, inputs, params):
36
+ cache_elements = [
37
+ metric_id,
38
+ hashlib.md5(json.dumps(params, sort_keys=True).encode()).hexdigest(),
39
+ ]
63
40
 
64
- This function generates a hash based on the dataset's structure, including
65
- the target and feature columns, the prediction column associated with a specific model ID,
66
- and directly incorporates the model ID and prediction column name to ensure uniqueness.
41
+ if "model" in inputs:
42
+ cache_elements.append(inputs["model"].input_id)
67
43
 
68
- Args:
69
- dataset: The dataset object, which should have properties like _df (pandas DataFrame),
70
- target_column (string), feature_columns (list of strings), and extra_columns (dict).
71
- model (VMModel): The model whose predictions will be included in the serialized dataset
72
-
73
- Returns:
74
- str: MD5 hash of the dataset
44
+ if "dataset" in inputs:
45
+ cache_elements.append(inputs["dataset"].input_id)
46
+ cache_elements.append(
47
+ _serialize_dataset(inputs["dataset"], inputs.get("model"))
48
+ )
75
49
 
76
- Note:
77
- Including the model ID and prediction column name in the hash calculation ensures uniqueness,
78
- especially in cases where the predictions are sparse or the dataset has not significantly changed.
79
- This approach guarantees that the hash will distinguish between model-generated predictions
80
- and pre-computed prediction columns, addressing potential hash collisions.
81
- """
82
- return _fast_hash(
83
- dataset._df[
84
- [
85
- *dataset.feature_columns,
86
- dataset.target_column,
87
- dataset.prediction_column(model),
88
- ]
89
- ]
90
- )
50
+ return hashlib.md5("_".join(cache_elements).encode()).hexdigest()
91
51
 
92
52
 
93
- def _fast_hash(df, sample_size=1000):
94
- """
95
- Generates a fast hash by sampling, converting to string and md5 hashing.
53
+ def describe_metric(metric_id, raw=False):
54
+ """Describe a metric
96
55
 
97
56
  Args:
98
- df (pd.DataFrame): The DataFrame to hash.
99
- sample_size (int): The maximum number of rows to include in the sample.
57
+ metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
58
+ raw (bool): Whether to return the description as a dictionary
100
59
 
101
60
  Returns:
102
- str: MD5 hash of the DataFrame.
61
+ dict: A dictionary containing the metric description
103
62
  """
104
- df_sample = df.sample(n=min(sample_size, len(df)), random_state=42)
105
-
106
- return hashlib.md5(
107
- df_sample.to_string(header=True, index=True).encode()
108
- ).hexdigest()
63
+ metric = load_metric(metric_id)
64
+ inputs, params = _inspect_signature(metric)
109
65
 
66
+ if raw:
67
+ return {
68
+ "id": metric_id,
69
+ "description": metric.__doc__,
70
+ "inputs": inputs,
71
+ "params": params,
72
+ }
110
73
 
111
- def get_metric_cache_key(metric_id, params, inputs):
112
- cache_elements = [metric_id]
74
+ inputs = ", ".join(inputs.keys())
75
+ params = ", ".join(params.keys())
76
+ description_md = f"""
77
+ ### {test_id_to_name(metric_id)} (*'{metric_id}'*)
113
78
 
114
- # Serialize params if not None
115
- serialized_params = _serialize_params(params) if params else "None"
116
- cache_elements.append(serialized_params)
79
+ {metric.__doc__ or ""}
117
80
 
118
- # Check if 'inputs' is a dictionary
119
- if not isinstance(inputs, dict):
120
- raise TypeError("Expected 'inputs' to be a dictionary.")
81
+ **Inputs**: {inputs}
121
82
 
122
- # Check for 'model' and 'dataset' keys in 'inputs'
123
- if "model" not in inputs or "dataset" not in inputs:
124
- raise ValueError("Missing 'model' or 'dataset' in 'inputs'.")
125
-
126
- dataset = inputs["dataset"]
127
- model = inputs["model"]
83
+ **Parameters**: {params}
84
+ """
85
+ display(Markdown(dedent(description_md)))
128
86
 
129
- cache_elements.append(_serialize_dataset(dataset, model))
130
87
 
131
- cache_elements.append(_serialize_model(model))
88
+ def list_metrics():
89
+ """List all available metrics
132
90
 
133
- # Combine elements to form the cache key
134
- combined_elements = "_".join(cache_elements)
135
- key = hashlib.sha256(combined_elements.encode()).hexdigest()
136
- return key
91
+ Returns:
92
+ list: A list of metric ids
93
+ """
94
+ # current directory of this file is the __init__.py file in the validmind/unit_metrics directory
95
+ # glob for all metrics in the unit_metrics directory (indicated by capitalized python files)
96
+ # recursive since we want to include subdirectories
97
+ curr_dir = os.path.dirname(os.path.realpath(__file__))
98
+ return [
99
+ f"{__name__}.{os.path.relpath(metric, curr_dir).replace('/', '.')[:-3]}"
100
+ for metric in glob.glob(f"{curr_dir}/**/*.py", recursive=True)
101
+ if os.path.isfile(metric) and os.path.basename(metric)[0].isupper()
102
+ ]
137
103
 
138
104
 
139
105
  def load_metric(metric_id):
140
106
  """Load a metric class from a string
141
107
 
142
108
  Args:
143
- metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.sklearn.F1')
109
+ metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
144
110
 
145
111
  Returns:
146
112
  callable: The metric function
@@ -152,7 +118,7 @@ def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False)
152
118
  """Run a single metric and cache the results
153
119
 
154
120
  Args:
155
- metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.sklearn.F1')
121
+ metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
156
122
  inputs (dict): A dictionary of the metric inputs
157
123
  params (dict): A dictionary of the metric parameters
158
124
  show (bool): Whether to display the results
@@ -164,7 +130,7 @@ def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False)
164
130
  }
165
131
  params = params or {}
166
132
 
167
- cache_key = get_metric_cache_key(metric_id, params, inputs)
133
+ cache_key = _get_metric_cache_key(metric_id, inputs, params)
168
134
 
169
135
  if cache_key not in unit_metric_results_cache:
170
136
  metric = load_metric(metric_id)
@@ -182,53 +148,24 @@ def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False)
182
148
  result,
183
149
  # store the input ids that were used to calculate the result
184
150
  [v.input_id for v in inputs.values()],
151
+ # store the params that were used to calculate the result
152
+ params,
185
153
  )
186
154
 
187
- value = unit_metric_results_cache[cache_key][0]
155
+ cached_result = unit_metric_results_cache[cache_key]
188
156
 
189
157
  if value_only:
190
- return value
191
-
192
- output_template = f"""
193
- <table>
194
- <thead>
195
- <tr>
196
- <th>Metric</th>
197
- <th>Value</th>
198
- </tr>
199
- </thead>
200
- <tbody>
201
- <tr>
202
- <td><strong>{test_id_to_name(metric_id)}</strong></td>
203
- <td>{value:.4f}</td>
204
- </tr>
205
- </tbody>
206
- </table>
207
- <style>
208
- th, td {{
209
- padding: 5px;
210
- text-align: left;
211
- }}
212
- </style>
213
- """
214
- result = _build_result(
215
- results=value,
158
+ return cached_result[0]
159
+
160
+ result_wrapper = _build_result(
161
+ results=cached_result[0],
216
162
  test_id=metric_id,
217
- description="",
218
- output_template=output_template,
219
- inputs=unit_metric_results_cache[cache_key][1],
163
+ inputs=cached_result[1],
164
+ params=cached_result[2],
165
+ generate_description=False,
220
166
  )
221
167
 
222
- # in case the user tries to log the result object
223
- def log():
224
- raise Exception(
225
- "Cannot log unit metrics directly..."
226
- "You can run this unit metric as part of a composite metric and log that"
227
- )
228
-
229
- result.log = log
230
-
231
168
  if show:
232
- result.show()
169
+ result_wrapper.show()
233
170
 
234
- return result
171
+ return result_wrapper
@@ -7,8 +7,8 @@ from sklearn.metrics import accuracy_score
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("classification", "sklearn", "unit_metric")
11
10
  @tasks("classification")
11
+ @tags("classification")
12
12
  def Accuracy(dataset, model):
13
13
  """Calculates the accuracy of a model"""
14
14
  return accuracy_score(dataset.y, dataset.y_pred(model))
@@ -7,7 +7,7 @@ from sklearn.metrics import f1_score
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("classification", "sklearn", "unit_metric")
11
10
  @tasks("classification")
11
+ @tags("classification")
12
12
  def F1(model, dataset, **kwargs):
13
13
  return f1_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -7,7 +7,7 @@ from sklearn.metrics import precision_score
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("classification", "sklearn", "unit_metric")
11
10
  @tasks("classification")
11
+ @tags("classification")
12
12
  def Precision(model, dataset, **kwargs):
13
13
  return precision_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -9,10 +9,9 @@ from sklearn.preprocessing import LabelBinarizer
9
9
  from validmind import tags, tasks
10
10
 
11
11
 
12
- @tags("classification", "sklearn", "unit_metric")
13
12
  @tasks("classification")
13
+ @tags("classification")
14
14
  def ROC_AUC(model, dataset, **kwargs):
15
-
16
15
  y_true = dataset.y
17
16
 
18
17
  if len(unique(y_true)) > 2:
@@ -7,7 +7,7 @@ from sklearn.metrics import recall_score
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("classification", "sklearn", "unit_metric")
11
10
  @tasks("classification")
11
+ @tags("classification")
12
12
  def Recall(model, dataset, **kwargs):
13
13
  return recall_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -7,7 +7,7 @@ from sklearn.metrics import r2_score as _r2_score
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("regression", "sklearn", "unit_metric")
10
+ @tags("regression")
11
11
  @tasks("regression")
12
12
  def AdjustedRSquaredScore(model, dataset):
13
13
  r2_score = _r2_score(
@@ -7,7 +7,7 @@ import numpy as np
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("regression", "unit_metric")
10
+ @tags("regression")
11
11
  @tasks("regression")
12
12
  def GiniCoefficient(dataset, model):
13
13
  y_true = dataset.y
@@ -7,7 +7,7 @@ import numpy as np
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("regression", "unit_metric")
10
+ @tags("regression")
11
11
  @tasks("regression")
12
12
  def HuberLoss(model, dataset):
13
13
  y_true = dataset.y
@@ -7,7 +7,7 @@ import numpy as np
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("regression", "unit_metric")
10
+ @tags("regression")
11
11
  @tasks("regression")
12
12
  def KolmogorovSmirnovStatistic(dataset, model):
13
13
  y_true = dataset.y.flatten()
@@ -7,7 +7,7 @@ from sklearn.metrics import mean_absolute_error as _mean_absolute_error
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("regression", "sklearn", "unit_metric")
10
+ @tags("regression")
11
11
  @tasks("regression")
12
12
  def MeanAbsoluteError(model, dataset, **kwargs):
13
13
  return _mean_absolute_error(dataset.y, dataset.y_pred(model), **kwargs)
@@ -7,7 +7,7 @@ import numpy as np
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("regression", "unit_metric")
10
+ @tags("regression")
11
11
  @tasks("regression")
12
12
  def MeanAbsolutePercentageError(model, dataset):
13
13
  y_true = dataset.y
@@ -7,7 +7,7 @@ import numpy as np
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("regression", "unit_metric")
10
+ @tags("regression")
11
11
  @tasks("regression")
12
12
  def MeanBiasDeviation(model, dataset):
13
13
  return np.mean(dataset.y - dataset.y_pred(model))
@@ -7,7 +7,7 @@ from sklearn.metrics import mean_squared_error
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("regression", "sklearn", "unit_metric")
10
+ @tags("regression")
11
11
  @tasks("regression")
12
12
  def MeanSquaredError(model, dataset, **kwargs):
13
13
  return mean_squared_error(dataset.y, dataset.y_pred(model), **kwargs)
@@ -7,7 +7,7 @@ import numpy as np
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("regression", "unit_metric")
10
+ @tags("regression")
11
11
  @tasks("regression")
12
12
  def QuantileLoss(model, dataset, quantile=0.5):
13
13
  error = dataset.y - dataset.y_pred(model)
@@ -7,7 +7,7 @@ from sklearn.metrics import r2_score
7
7
  from validmind import tags, tasks
8
8
 
9
9
 
10
- @tags("regression", "sklearn", "unit_metric")
10
+ @tags("regression")
11
11
  @tasks("regression")
12
12
  def RSquaredError(model, dataset):
13
13
  return r2_score(dataset.y, dataset.y_pred(model))
@@ -8,7 +8,7 @@ from sklearn.metrics import mean_squared_error
8
8
  from validmind import tags, tasks
9
9
 
10
10
 
11
- @tags("regression", "sklearn", "unit_metric")
11
+ @tags("regression")
12
12
  @tasks("regression")
13
13
  def RootMeanSquaredError(model, dataset, **kwargs):
14
14
  return np.sqrt(
validmind/utils.py CHANGED
@@ -175,6 +175,10 @@ def format_records(df):
175
175
  continue
176
176
  not_zero = df[col][df[col] != 0]
177
177
  min_number = not_zero.min()
178
+ if math.isnan(min_number) or math.isinf(min_number):
179
+ df[col] = df[col].round(DEFAULT_SMALL_NUMBER_DECIMALS)
180
+ continue
181
+
178
182
  _, min_scale = precision_and_scale(min_number)
179
183
 
180
184
  if min_number >= 10:
@@ -323,6 +323,7 @@ class VMDataset(VMInput):
323
323
 
324
324
  if column_name and column_name in self.feature_columns:
325
325
  self.feature_columns.remove(column_name)
326
+ self._set_feature_columns(self.feature_columns)
326
327
 
327
328
  return self.extra_columns.prediction_column(model, column_name)
328
329
 
@@ -333,6 +334,7 @@ class VMDataset(VMInput):
333
334
 
334
335
  if column_name and column_name in self.feature_columns:
335
336
  self.feature_columns.remove(column_name)
337
+ self._set_feature_columns(self.feature_columns)
336
338
 
337
339
  return self.extra_columns.probability_column(model, column_name)
338
340
 
@@ -157,6 +157,11 @@ class Figure:
157
157
 
158
158
  return f"data:image/png;base64,{b64_data}"
159
159
 
160
+ elif is_png_image(self.figure):
161
+ b64_data = base64.b64encode(self.figure).decode("utf-8")
162
+
163
+ return f"data:image/png;base64,{b64_data}"
164
+
160
165
  raise UnsupportedFigureError(
161
166
  f"Unrecognized figure type: {get_full_typename(self.figure)}"
162
167
  )