validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +80 -119
- validmind/ai/test_result_description/config.yaml +29 -0
- validmind/ai/test_result_description/context.py +73 -0
- validmind/ai/test_result_description/image_processing.py +124 -0
- validmind/ai/test_result_description/system.jinja +39 -0
- validmind/ai/test_result_description/user.jinja +25 -0
- validmind/api_client.py +89 -43
- validmind/client.py +2 -2
- validmind/client_config.py +11 -14
- validmind/datasets/credit_risk/__init__.py +1 -0
- validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
- validmind/datasets/credit_risk/lending_club_bias.py +142 -0
- validmind/datasets/regression/fred_timeseries.py +67 -138
- validmind/template.py +1 -0
- validmind/test_suites/__init__.py +0 -2
- validmind/test_suites/statsmodels_timeseries.py +1 -1
- validmind/test_suites/summarization.py +0 -1
- validmind/test_suites/time_series.py +0 -43
- validmind/tests/__types__.py +14 -15
- validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
- validmind/tests/data_validation/ADF.py +31 -24
- validmind/tests/data_validation/AutoAR.py +9 -9
- validmind/tests/data_validation/AutoMA.py +23 -16
- validmind/tests/data_validation/AutoSeasonality.py +18 -16
- validmind/tests/data_validation/AutoStationarity.py +21 -16
- validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
- validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +34 -34
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +85 -124
- validmind/tests/data_validation/ClassImbalance.py +15 -12
- validmind/tests/data_validation/DFGLSArch.py +19 -13
- validmind/tests/data_validation/DatasetDescription.py +17 -11
- validmind/tests/data_validation/DatasetSplit.py +7 -5
- validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
- validmind/tests/data_validation/Duplicates.py +33 -25
- validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
- validmind/tests/data_validation/HighCardinality.py +19 -12
- validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
- validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
- validmind/tests/data_validation/IQROutliersTable.py +40 -36
- validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
- validmind/tests/data_validation/JarqueBera.py +70 -0
- validmind/tests/data_validation/KPSS.py +34 -29
- validmind/tests/data_validation/LJungBox.py +66 -0
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
- validmind/tests/data_validation/MissingValues.py +32 -27
- validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
- validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
- validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
- validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
- validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
- validmind/tests/data_validation/RollingStatsPlot.py +31 -23
- validmind/tests/data_validation/RunsTest.py +72 -0
- validmind/tests/data_validation/ScatterPlot.py +63 -78
- validmind/tests/data_validation/SeasonalDecompose.py +38 -34
- validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +35 -30
- validmind/tests/data_validation/Skewness.py +35 -37
- validmind/tests/data_validation/SpreadPlot.py +35 -35
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
- validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
- validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
- validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
- validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
- validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
- validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
- validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
- validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
- validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
- validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
- validmind/tests/data_validation/TooManyZeroValues.py +16 -11
- validmind/tests/data_validation/UniqueRows.py +11 -6
- validmind/tests/data_validation/WOEBinPlots.py +23 -16
- validmind/tests/data_validation/WOEBinTable.py +35 -30
- validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
- validmind/tests/data_validation/nlp/CommonWords.py +21 -14
- validmind/tests/data_validation/nlp/Hashtags.py +42 -40
- validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
- validmind/tests/data_validation/nlp/Mentions.py +21 -15
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
- validmind/tests/data_validation/nlp/Punctuations.py +24 -20
- validmind/tests/data_validation/nlp/Sentiment.py +27 -8
- validmind/tests/data_validation/nlp/StopWords.py +26 -19
- validmind/tests/data_validation/nlp/TextDescription.py +39 -36
- validmind/tests/data_validation/nlp/Toxicity.py +32 -9
- validmind/tests/decorator.py +81 -42
- validmind/tests/model_validation/BertScore.py +36 -27
- validmind/tests/model_validation/BleuScore.py +25 -19
- validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
- validmind/tests/model_validation/ContextualRecall.py +38 -13
- validmind/tests/model_validation/FeaturesAUC.py +32 -13
- validmind/tests/model_validation/MeteorScore.py +46 -33
- validmind/tests/model_validation/ModelMetadata.py +32 -64
- validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
- validmind/tests/model_validation/RegardScore.py +30 -14
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
- validmind/tests/model_validation/RougeScore.py +36 -30
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
- validmind/tests/model_validation/TokenDisparity.py +31 -23
- validmind/tests/model_validation/ToxicityScore.py +26 -17
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
- validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
- validmind/tests/model_validation/ragas/AspectCritique.py +12 -6
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
- validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
- validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
- validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
- validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
- validmind/tests/model_validation/ragas/utils.py +6 -0
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
- validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
- validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +32 -26
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
- validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
- validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
- validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -94
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +66 -5
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
- validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +59 -32
- validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
- validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +86 -119
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
- validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
- validmind/tests/prompt_validation/Bias.py +14 -11
- validmind/tests/prompt_validation/Clarity.py +16 -14
- validmind/tests/prompt_validation/Conciseness.py +7 -5
- validmind/tests/prompt_validation/Delimitation.py +23 -22
- validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
- validmind/tests/prompt_validation/Robustness.py +12 -10
- validmind/tests/prompt_validation/Specificity.py +13 -11
- validmind/tests/prompt_validation/ai_powered_test.py +6 -0
- validmind/tests/run.py +68 -23
- validmind/unit_metrics/__init__.py +81 -144
- validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
- validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
- validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
- validmind/unit_metrics/regression/HuberLoss.py +1 -1
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
- validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
- validmind/unit_metrics/regression/QuantileLoss.py +1 -1
- validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
- validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
- validmind/utils.py +4 -0
- validmind/vm_models/dataset/dataset.py +2 -0
- validmind/vm_models/figure.py +5 -0
- validmind/vm_models/test/metric.py +1 -0
- validmind/vm_models/test/result_wrapper.py +143 -158
- validmind/vm_models/test/threshold_test.py +1 -0
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/METADATA +4 -3
- validmind-2.5.18.dist-info/RECORD +324 -0
- validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
- validmind/tests/data_validation/BivariateHistograms.py +0 -117
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
- validmind/tests/data_validation/MissingValuesRisk.py +0 -88
- validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
- validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -73
- validmind/tests/model_validation/statsmodels/LJungBox.py +0 -66
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
- validmind/tests/model_validation/statsmodels/RunsTest.py +0 -71
- validmind-2.5.8.dist-info/RECORD +0 -318
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/LICENSE +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/WHEEL +0 -0
- {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/entry_points.txt +0 -0
validmind/tests/run.py
CHANGED
@@ -17,6 +17,7 @@ from validmind.vm_models import (
|
|
17
17
|
MetricResult,
|
18
18
|
ResultSummary,
|
19
19
|
ResultTable,
|
20
|
+
ResultTableMetadata,
|
20
21
|
TestContext,
|
21
22
|
TestInput,
|
22
23
|
ThresholdTestResults,
|
@@ -147,6 +148,26 @@ def _combine_figures(figure_lists: List[List[Any]], input_groups: List[Dict[str,
|
|
147
148
|
return [figure for figures in figure_lists for figure in figures]
|
148
149
|
|
149
150
|
|
151
|
+
def _combine_unit_metrics(results: List[MetricResultWrapper]):
|
152
|
+
if not results[0].scalar:
|
153
|
+
return
|
154
|
+
|
155
|
+
for result in results:
|
156
|
+
table = ResultTable(
|
157
|
+
data=[{"value": result.scalar}],
|
158
|
+
metadata=ResultTableMetadata(title="Unit Metrics"),
|
159
|
+
)
|
160
|
+
if not result.metric:
|
161
|
+
result.metric = MetricResult(
|
162
|
+
ref_id="will_be_overwritten",
|
163
|
+
key=result.result_id,
|
164
|
+
value=result.scalar,
|
165
|
+
summary=ResultSummary(results=[table]),
|
166
|
+
)
|
167
|
+
else:
|
168
|
+
result.metric.summary.results.append(table)
|
169
|
+
|
170
|
+
|
150
171
|
def metric_comparison(
|
151
172
|
results: List[MetricResultWrapper],
|
152
173
|
test_id: TestID,
|
@@ -172,22 +193,41 @@ def metric_comparison(
|
|
172
193
|
raise ValueError(f"Unsupported type for value: {v}")
|
173
194
|
input_group_strings.append(new_group)
|
174
195
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
196
|
+
# handle unit metrics (scalar values) by adding it to the summary
|
197
|
+
_combine_unit_metrics(results)
|
198
|
+
|
199
|
+
# Check if the results list contains a result object with a metric
|
200
|
+
if any(
|
201
|
+
hasattr(result, "metric")
|
202
|
+
and hasattr(result.metric, "summary")
|
203
|
+
and result.metric.summary
|
204
|
+
for result in results
|
205
|
+
):
|
206
|
+
# Compute merged summaries only if there is a result with a metric
|
207
|
+
merged_summary = _combine_summaries(
|
208
|
+
[
|
209
|
+
{"inputs": input_group_strings[i], "summary": result.metric.summary}
|
210
|
+
for i, result in enumerate(results)
|
211
|
+
]
|
212
|
+
)
|
213
|
+
else:
|
214
|
+
merged_summary = None
|
215
|
+
|
216
|
+
# Check if the results list contains a result object with figures
|
217
|
+
if any(hasattr(result, "figures") and result.figures for result in results):
|
218
|
+
# Compute merged figures only if there is at least one result with figures
|
219
|
+
merged_figures = _combine_figures(
|
220
|
+
[result.figures for result in results],
|
221
|
+
input_groups,
|
222
|
+
)
|
223
|
+
# Patch figure metadata so they are connected to the comparison result
|
224
|
+
if merged_figures and len(merged_figures):
|
225
|
+
for i, figure in enumerate(merged_figures):
|
226
|
+
figure.key = f"{figure.key}-{i}"
|
227
|
+
figure.metadata["_name"] = test_id
|
228
|
+
figure.metadata["_ref_id"] = ref_id
|
229
|
+
else:
|
230
|
+
merged_figures = None
|
191
231
|
|
192
232
|
return MetricResultWrapper(
|
193
233
|
result_id=test_id,
|
@@ -196,7 +236,7 @@ def metric_comparison(
|
|
196
236
|
test_id=test_id,
|
197
237
|
default_description=f"Comparison test result for {test_id}",
|
198
238
|
summary=merged_summary.serialize() if merged_summary else None,
|
199
|
-
figures=merged_figures,
|
239
|
+
figures=merged_figures if merged_figures else None,
|
200
240
|
should_generate=generate_description,
|
201
241
|
),
|
202
242
|
],
|
@@ -294,6 +334,8 @@ def threshold_test_comparison(
|
|
294
334
|
def run_comparison_test(
|
295
335
|
test_id: TestID,
|
296
336
|
input_grid: Union[Dict[str, List[Any]], List[Dict[str, Any]]],
|
337
|
+
name: str = None,
|
338
|
+
unit_metrics: List[TestID] = None,
|
297
339
|
params: Dict[str, Any] = None,
|
298
340
|
show: bool = True,
|
299
341
|
output_template: str = None,
|
@@ -308,6 +350,8 @@ def run_comparison_test(
|
|
308
350
|
results = [
|
309
351
|
run_test(
|
310
352
|
test_id,
|
353
|
+
name=name,
|
354
|
+
unit_metrics=unit_metrics,
|
311
355
|
inputs=inputs,
|
312
356
|
show=False,
|
313
357
|
params=params,
|
@@ -387,33 +431,34 @@ def run_test(
|
|
387
431
|
"When providing an `input_grid`, you cannot also provide `inputs` or `kwargs`"
|
388
432
|
)
|
389
433
|
|
434
|
+
if unit_metrics:
|
435
|
+
metric_id_name = "".join(word[0].upper() + word[1:] for word in name.split())
|
436
|
+
test_id = f"validmind.composite_metric.{metric_id_name}" or test_id
|
437
|
+
|
390
438
|
if input_grid:
|
391
439
|
return run_comparison_test(
|
392
440
|
test_id,
|
393
441
|
input_grid,
|
442
|
+
name=name,
|
443
|
+
unit_metrics=unit_metrics,
|
394
444
|
params=params,
|
395
445
|
output_template=output_template,
|
396
446
|
show=show,
|
397
447
|
generate_description=__generate_description,
|
398
448
|
)
|
399
449
|
|
400
|
-
if test_id
|
450
|
+
if test_id.startswith("validmind.unit_metrics"):
|
401
451
|
# TODO: as we move towards a more unified approach to metrics
|
402
452
|
# we will want to make everything functional and remove the
|
403
453
|
# separation between unit metrics and "normal" metrics
|
404
454
|
return run_metric(test_id, inputs=inputs, params=params, show=show)
|
405
455
|
|
406
456
|
if unit_metrics:
|
407
|
-
metric_id_name = "".join(word[0].upper() + word[1:] for word in name.split())
|
408
|
-
test_id = f"validmind.composite_metric.{metric_id_name}"
|
409
|
-
|
410
457
|
error, TestClass = load_composite_metric(
|
411
458
|
unit_metrics=unit_metrics, metric_name=metric_id_name
|
412
459
|
)
|
413
|
-
|
414
460
|
if error:
|
415
461
|
raise LoadTestError(error)
|
416
|
-
|
417
462
|
else:
|
418
463
|
TestClass = load_test(test_id, reload=True)
|
419
464
|
|
@@ -2,145 +2,111 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
import glob
|
5
6
|
import hashlib
|
6
7
|
import json
|
8
|
+
import os
|
7
9
|
from importlib import import_module
|
10
|
+
from textwrap import dedent
|
11
|
+
|
12
|
+
from IPython.display import Markdown, display
|
8
13
|
|
9
14
|
from validmind.input_registry import input_registry
|
10
15
|
from validmind.tests.decorator import _build_result, _inspect_signature
|
11
|
-
from validmind.utils import
|
16
|
+
from validmind.utils import test_id_to_name
|
12
17
|
|
13
18
|
unit_metric_results_cache = {}
|
14
19
|
|
15
20
|
|
16
|
-
def
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
then creates a SHA-256 hash of the string to ensure a unique identifier
|
21
|
-
for the parameters. If params is None, a default hash is returned.
|
22
|
-
|
23
|
-
Args:
|
24
|
-
params (dict or None): The parameters to be serialized.
|
25
|
-
|
26
|
-
Returns:
|
27
|
-
str: A SHA-256 hash of the JSON string representation of the params,
|
28
|
-
or a default hash if params is None.
|
29
|
-
"""
|
30
|
-
if params is None:
|
31
|
-
# Handle None by returning a hash of an empty dictionary or a predefined value
|
32
|
-
params_json = json.dumps({})
|
33
|
-
else:
|
34
|
-
params_json = json.dumps(params, sort_keys=True)
|
35
|
-
|
36
|
-
hash_object = hashlib.sha256(params_json.encode())
|
37
|
-
return hash_object.hexdigest()
|
38
|
-
|
39
|
-
|
40
|
-
def _serialize_model(model):
|
41
|
-
"""
|
42
|
-
Generate a SHA-256 hash for a scikit-learn model based on its type and parameters.
|
43
|
-
|
44
|
-
Args:
|
45
|
-
model VMModel: The model to be serialized.
|
46
|
-
|
47
|
-
Returns:
|
48
|
-
str: A SHA-256 hash of the model's description.
|
49
|
-
"""
|
50
|
-
|
51
|
-
model_info = get_model_info(model)
|
21
|
+
def _serialize_dataset(dataset, model=None, sample_size=1000):
|
22
|
+
columns = [*dataset.feature_columns, dataset.target_column]
|
23
|
+
if model:
|
24
|
+
columns.append(dataset.prediction_column(model))
|
52
25
|
|
53
|
-
|
26
|
+
df = dataset._df[columns]
|
54
27
|
|
55
|
-
|
56
|
-
|
57
|
-
|
28
|
+
return hashlib.md5(
|
29
|
+
df.sample(n=min(sample_size, df.shape[0]), random_state=42)
|
30
|
+
.to_string(header=True, index=True)
|
31
|
+
.encode()
|
32
|
+
).hexdigest()
|
58
33
|
|
59
34
|
|
60
|
-
def
|
61
|
-
|
62
|
-
|
35
|
+
def _get_metric_cache_key(metric_id, inputs, params):
|
36
|
+
cache_elements = [
|
37
|
+
metric_id,
|
38
|
+
hashlib.md5(json.dumps(params, sort_keys=True).encode()).hexdigest(),
|
39
|
+
]
|
63
40
|
|
64
|
-
|
65
|
-
|
66
|
-
and directly incorporates the model ID and prediction column name to ensure uniqueness.
|
41
|
+
if "model" in inputs:
|
42
|
+
cache_elements.append(inputs["model"].input_id)
|
67
43
|
|
68
|
-
|
69
|
-
dataset
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
Returns:
|
74
|
-
str: MD5 hash of the dataset
|
44
|
+
if "dataset" in inputs:
|
45
|
+
cache_elements.append(inputs["dataset"].input_id)
|
46
|
+
cache_elements.append(
|
47
|
+
_serialize_dataset(inputs["dataset"], inputs.get("model"))
|
48
|
+
)
|
75
49
|
|
76
|
-
|
77
|
-
Including the model ID and prediction column name in the hash calculation ensures uniqueness,
|
78
|
-
especially in cases where the predictions are sparse or the dataset has not significantly changed.
|
79
|
-
This approach guarantees that the hash will distinguish between model-generated predictions
|
80
|
-
and pre-computed prediction columns, addressing potential hash collisions.
|
81
|
-
"""
|
82
|
-
return _fast_hash(
|
83
|
-
dataset._df[
|
84
|
-
[
|
85
|
-
*dataset.feature_columns,
|
86
|
-
dataset.target_column,
|
87
|
-
dataset.prediction_column(model),
|
88
|
-
]
|
89
|
-
]
|
90
|
-
)
|
50
|
+
return hashlib.md5("_".join(cache_elements).encode()).hexdigest()
|
91
51
|
|
92
52
|
|
93
|
-
def
|
94
|
-
"""
|
95
|
-
Generates a fast hash by sampling, converting to string and md5 hashing.
|
53
|
+
def describe_metric(metric_id, raw=False):
|
54
|
+
"""Describe a metric
|
96
55
|
|
97
56
|
Args:
|
98
|
-
|
99
|
-
|
57
|
+
metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
|
58
|
+
raw (bool): Whether to return the description as a dictionary
|
100
59
|
|
101
60
|
Returns:
|
102
|
-
|
61
|
+
dict: A dictionary containing the metric description
|
103
62
|
"""
|
104
|
-
|
105
|
-
|
106
|
-
return hashlib.md5(
|
107
|
-
df_sample.to_string(header=True, index=True).encode()
|
108
|
-
).hexdigest()
|
63
|
+
metric = load_metric(metric_id)
|
64
|
+
inputs, params = _inspect_signature(metric)
|
109
65
|
|
66
|
+
if raw:
|
67
|
+
return {
|
68
|
+
"id": metric_id,
|
69
|
+
"description": metric.__doc__,
|
70
|
+
"inputs": inputs,
|
71
|
+
"params": params,
|
72
|
+
}
|
110
73
|
|
111
|
-
|
112
|
-
|
74
|
+
inputs = ", ".join(inputs.keys())
|
75
|
+
params = ", ".join(params.keys())
|
76
|
+
description_md = f"""
|
77
|
+
### {test_id_to_name(metric_id)} (*'{metric_id}'*)
|
113
78
|
|
114
|
-
|
115
|
-
serialized_params = _serialize_params(params) if params else "None"
|
116
|
-
cache_elements.append(serialized_params)
|
79
|
+
{metric.__doc__ or ""}
|
117
80
|
|
118
|
-
|
119
|
-
if not isinstance(inputs, dict):
|
120
|
-
raise TypeError("Expected 'inputs' to be a dictionary.")
|
81
|
+
**Inputs**: {inputs}
|
121
82
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
dataset = inputs["dataset"]
|
127
|
-
model = inputs["model"]
|
83
|
+
**Parameters**: {params}
|
84
|
+
"""
|
85
|
+
display(Markdown(dedent(description_md)))
|
128
86
|
|
129
|
-
cache_elements.append(_serialize_dataset(dataset, model))
|
130
87
|
|
131
|
-
|
88
|
+
def list_metrics():
|
89
|
+
"""List all available metrics
|
132
90
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
91
|
+
Returns:
|
92
|
+
list: A list of metric ids
|
93
|
+
"""
|
94
|
+
# current directory of this file is the __init__.py file in the validmind/unit_metrics directory
|
95
|
+
# glob for all metrics in the unit_metrics directory (indicated by capitalized python files)
|
96
|
+
# recursive since we want to include subdirectories
|
97
|
+
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
98
|
+
return [
|
99
|
+
f"{__name__}.{os.path.relpath(metric, curr_dir).replace('/', '.')[:-3]}"
|
100
|
+
for metric in glob.glob(f"{curr_dir}/**/*.py", recursive=True)
|
101
|
+
if os.path.isfile(metric) and os.path.basename(metric)[0].isupper()
|
102
|
+
]
|
137
103
|
|
138
104
|
|
139
105
|
def load_metric(metric_id):
|
140
106
|
"""Load a metric class from a string
|
141
107
|
|
142
108
|
Args:
|
143
|
-
metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.
|
109
|
+
metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
|
144
110
|
|
145
111
|
Returns:
|
146
112
|
callable: The metric function
|
@@ -152,7 +118,7 @@ def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False)
|
|
152
118
|
"""Run a single metric and cache the results
|
153
119
|
|
154
120
|
Args:
|
155
|
-
metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.
|
121
|
+
metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
|
156
122
|
inputs (dict): A dictionary of the metric inputs
|
157
123
|
params (dict): A dictionary of the metric parameters
|
158
124
|
show (bool): Whether to display the results
|
@@ -164,7 +130,7 @@ def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False)
|
|
164
130
|
}
|
165
131
|
params = params or {}
|
166
132
|
|
167
|
-
cache_key =
|
133
|
+
cache_key = _get_metric_cache_key(metric_id, inputs, params)
|
168
134
|
|
169
135
|
if cache_key not in unit_metric_results_cache:
|
170
136
|
metric = load_metric(metric_id)
|
@@ -182,53 +148,24 @@ def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False)
|
|
182
148
|
result,
|
183
149
|
# store the input ids that were used to calculate the result
|
184
150
|
[v.input_id for v in inputs.values()],
|
151
|
+
# store the params that were used to calculate the result
|
152
|
+
params,
|
185
153
|
)
|
186
154
|
|
187
|
-
|
155
|
+
cached_result = unit_metric_results_cache[cache_key]
|
188
156
|
|
189
157
|
if value_only:
|
190
|
-
return
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
<thead>
|
195
|
-
<tr>
|
196
|
-
<th>Metric</th>
|
197
|
-
<th>Value</th>
|
198
|
-
</tr>
|
199
|
-
</thead>
|
200
|
-
<tbody>
|
201
|
-
<tr>
|
202
|
-
<td><strong>{test_id_to_name(metric_id)}</strong></td>
|
203
|
-
<td>{value:.4f}</td>
|
204
|
-
</tr>
|
205
|
-
</tbody>
|
206
|
-
</table>
|
207
|
-
<style>
|
208
|
-
th, td {{
|
209
|
-
padding: 5px;
|
210
|
-
text-align: left;
|
211
|
-
}}
|
212
|
-
</style>
|
213
|
-
"""
|
214
|
-
result = _build_result(
|
215
|
-
results=value,
|
158
|
+
return cached_result[0]
|
159
|
+
|
160
|
+
result_wrapper = _build_result(
|
161
|
+
results=cached_result[0],
|
216
162
|
test_id=metric_id,
|
217
|
-
|
218
|
-
|
219
|
-
|
163
|
+
inputs=cached_result[1],
|
164
|
+
params=cached_result[2],
|
165
|
+
generate_description=False,
|
220
166
|
)
|
221
167
|
|
222
|
-
# in case the user tries to log the result object
|
223
|
-
def log():
|
224
|
-
raise Exception(
|
225
|
-
"Cannot log unit metrics directly..."
|
226
|
-
"You can run this unit metric as part of a composite metric and log that"
|
227
|
-
)
|
228
|
-
|
229
|
-
result.log = log
|
230
|
-
|
231
168
|
if show:
|
232
|
-
|
169
|
+
result_wrapper.show()
|
233
170
|
|
234
|
-
return
|
171
|
+
return result_wrapper
|
@@ -7,8 +7,8 @@ from sklearn.metrics import accuracy_score
|
|
7
7
|
from validmind import tags, tasks
|
8
8
|
|
9
9
|
|
10
|
-
@tags("classification", "sklearn", "unit_metric")
|
11
10
|
@tasks("classification")
|
11
|
+
@tags("classification")
|
12
12
|
def Accuracy(dataset, model):
|
13
13
|
"""Calculates the accuracy of a model"""
|
14
14
|
return accuracy_score(dataset.y, dataset.y_pred(model))
|
@@ -7,7 +7,7 @@ from sklearn.metrics import f1_score
|
|
7
7
|
from validmind import tags, tasks
|
8
8
|
|
9
9
|
|
10
|
-
@tags("classification", "sklearn", "unit_metric")
|
11
10
|
@tasks("classification")
|
11
|
+
@tags("classification")
|
12
12
|
def F1(model, dataset, **kwargs):
|
13
13
|
return f1_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -7,7 +7,7 @@ from sklearn.metrics import precision_score
|
|
7
7
|
from validmind import tags, tasks
|
8
8
|
|
9
9
|
|
10
|
-
@tags("classification", "sklearn", "unit_metric")
|
11
10
|
@tasks("classification")
|
11
|
+
@tags("classification")
|
12
12
|
def Precision(model, dataset, **kwargs):
|
13
13
|
return precision_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -9,10 +9,9 @@ from sklearn.preprocessing import LabelBinarizer
|
|
9
9
|
from validmind import tags, tasks
|
10
10
|
|
11
11
|
|
12
|
-
@tags("classification", "sklearn", "unit_metric")
|
13
12
|
@tasks("classification")
|
13
|
+
@tags("classification")
|
14
14
|
def ROC_AUC(model, dataset, **kwargs):
|
15
|
-
|
16
15
|
y_true = dataset.y
|
17
16
|
|
18
17
|
if len(unique(y_true)) > 2:
|
@@ -7,7 +7,7 @@ from sklearn.metrics import recall_score
|
|
7
7
|
from validmind import tags, tasks
|
8
8
|
|
9
9
|
|
10
|
-
@tags("classification", "sklearn", "unit_metric")
|
11
10
|
@tasks("classification")
|
11
|
+
@tags("classification")
|
12
12
|
def Recall(model, dataset, **kwargs):
|
13
13
|
return recall_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -7,7 +7,7 @@ from sklearn.metrics import mean_absolute_error as _mean_absolute_error
|
|
7
7
|
from validmind import tags, tasks
|
8
8
|
|
9
9
|
|
10
|
-
@tags("regression"
|
10
|
+
@tags("regression")
|
11
11
|
@tasks("regression")
|
12
12
|
def MeanAbsoluteError(model, dataset, **kwargs):
|
13
13
|
return _mean_absolute_error(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -7,7 +7,7 @@ from sklearn.metrics import mean_squared_error
|
|
7
7
|
from validmind import tags, tasks
|
8
8
|
|
9
9
|
|
10
|
-
@tags("regression"
|
10
|
+
@tags("regression")
|
11
11
|
@tasks("regression")
|
12
12
|
def MeanSquaredError(model, dataset, **kwargs):
|
13
13
|
return mean_squared_error(dataset.y, dataset.y_pred(model), **kwargs)
|
validmind/utils.py
CHANGED
@@ -175,6 +175,10 @@ def format_records(df):
|
|
175
175
|
continue
|
176
176
|
not_zero = df[col][df[col] != 0]
|
177
177
|
min_number = not_zero.min()
|
178
|
+
if math.isnan(min_number) or math.isinf(min_number):
|
179
|
+
df[col] = df[col].round(DEFAULT_SMALL_NUMBER_DECIMALS)
|
180
|
+
continue
|
181
|
+
|
178
182
|
_, min_scale = precision_and_scale(min_number)
|
179
183
|
|
180
184
|
if min_number >= 10:
|
@@ -323,6 +323,7 @@ class VMDataset(VMInput):
|
|
323
323
|
|
324
324
|
if column_name and column_name in self.feature_columns:
|
325
325
|
self.feature_columns.remove(column_name)
|
326
|
+
self._set_feature_columns(self.feature_columns)
|
326
327
|
|
327
328
|
return self.extra_columns.prediction_column(model, column_name)
|
328
329
|
|
@@ -333,6 +334,7 @@ class VMDataset(VMInput):
|
|
333
334
|
|
334
335
|
if column_name and column_name in self.feature_columns:
|
335
336
|
self.feature_columns.remove(column_name)
|
337
|
+
self._set_feature_columns(self.feature_columns)
|
336
338
|
|
337
339
|
return self.extra_columns.probability_column(model, column_name)
|
338
340
|
|
validmind/vm_models/figure.py
CHANGED
@@ -157,6 +157,11 @@ class Figure:
|
|
157
157
|
|
158
158
|
return f"data:image/png;base64,{b64_data}"
|
159
159
|
|
160
|
+
elif is_png_image(self.figure):
|
161
|
+
b64_data = base64.b64encode(self.figure).decode("utf-8")
|
162
|
+
|
163
|
+
return f"data:image/png;base64,{b64_data}"
|
164
|
+
|
160
165
|
raise UnsupportedFigureError(
|
161
166
|
f"Unrecognized figure type: {get_full_typename(self.figure)}"
|
162
167
|
)
|