validmind 2.5.25__py3-none-any.whl → 2.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -17
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +66 -85
- validmind/ai/test_result_description/context.py +2 -2
- validmind/ai/utils.py +26 -1
- validmind/api_client.py +43 -79
- validmind/client.py +5 -7
- validmind/client_config.py +1 -1
- validmind/datasets/__init__.py +1 -1
- validmind/datasets/classification/customer_churn.py +7 -5
- validmind/datasets/nlp/__init__.py +2 -2
- validmind/errors.py +6 -10
- validmind/html_templates/content_blocks.py +18 -16
- validmind/logging.py +21 -16
- validmind/tests/__init__.py +28 -5
- validmind/tests/__types__.py +186 -170
- validmind/tests/_store.py +7 -21
- validmind/tests/comparison.py +362 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
- validmind/tests/data_validation/ADF.py +49 -83
- validmind/tests/data_validation/AutoAR.py +59 -96
- validmind/tests/data_validation/AutoMA.py +59 -96
- validmind/tests/data_validation/AutoStationarity.py +66 -114
- validmind/tests/data_validation/ClassImbalance.py +48 -117
- validmind/tests/data_validation/DatasetDescription.py +180 -209
- validmind/tests/data_validation/DatasetSplit.py +50 -75
- validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
- validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
- validmind/tests/data_validation/Duplicates.py +21 -90
- validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
- validmind/tests/data_validation/HighCardinality.py +32 -80
- validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
- validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
- validmind/tests/data_validation/IQROutliersTable.py +40 -80
- validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
- validmind/tests/data_validation/KPSS.py +33 -81
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
- validmind/tests/data_validation/MissingValues.py +17 -58
- validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
- validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
- validmind/tests/data_validation/RollingStatsPlot.py +50 -81
- validmind/tests/data_validation/SeasonalDecompose.py +102 -184
- validmind/tests/data_validation/Skewness.py +27 -64
- validmind/tests/data_validation/SpreadPlot.py +34 -57
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
- validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
- validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
- validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
- validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
- validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
- validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
- validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
- validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
- validmind/tests/data_validation/TooManyZeroValues.py +21 -70
- validmind/tests/data_validation/UniqueRows.py +23 -62
- validmind/tests/data_validation/WOEBinPlots.py +83 -109
- validmind/tests/data_validation/WOEBinTable.py +28 -69
- validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
- validmind/tests/data_validation/nlp/CommonWords.py +49 -57
- validmind/tests/data_validation/nlp/Hashtags.py +27 -49
- validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
- validmind/tests/data_validation/nlp/Mentions.py +32 -63
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
- validmind/tests/data_validation/nlp/Punctuations.py +63 -47
- validmind/tests/data_validation/nlp/Sentiment.py +4 -0
- validmind/tests/data_validation/nlp/StopWords.py +62 -91
- validmind/tests/data_validation/nlp/TextDescription.py +116 -159
- validmind/tests/data_validation/nlp/Toxicity.py +12 -4
- validmind/tests/decorator.py +33 -242
- validmind/tests/load.py +212 -153
- validmind/tests/model_validation/BertScore.py +13 -7
- validmind/tests/model_validation/BleuScore.py +4 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
- validmind/tests/model_validation/ContextualRecall.py +3 -0
- validmind/tests/model_validation/FeaturesAUC.py +43 -74
- validmind/tests/model_validation/MeteorScore.py +3 -0
- validmind/tests/model_validation/RegardScore.py +5 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
- validmind/tests/model_validation/embeddings/utils.py +53 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
- validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
- validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
- validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
- validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
- validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
- validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
- validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
- validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
- validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
- validmind/tests/output.py +120 -0
- validmind/tests/prompt_validation/Bias.py +55 -98
- validmind/tests/prompt_validation/Clarity.py +56 -99
- validmind/tests/prompt_validation/Conciseness.py +63 -101
- validmind/tests/prompt_validation/Delimitation.py +48 -89
- validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
- validmind/tests/prompt_validation/Robustness.py +80 -121
- validmind/tests/prompt_validation/Specificity.py +61 -95
- validmind/tests/prompt_validation/ai_powered_test.py +2 -2
- validmind/tests/run.py +314 -496
- validmind/tests/test_providers.py +109 -79
- validmind/tests/utils.py +91 -0
- validmind/unit_metrics/__init__.py +16 -155
- validmind/unit_metrics/classification/F1.py +1 -0
- validmind/unit_metrics/classification/Precision.py +1 -0
- validmind/unit_metrics/classification/ROC_AUC.py +1 -0
- validmind/unit_metrics/classification/Recall.py +1 -0
- validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
- validmind/unit_metrics/regression/HuberLoss.py +1 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
- validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
- validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
- validmind/unit_metrics/regression/QuantileLoss.py +1 -0
- validmind/unit_metrics/regression/RSquaredScore.py +2 -1
- validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
- validmind/utils.py +66 -17
- validmind/vm_models/__init__.py +2 -17
- validmind/vm_models/dataset/dataset.py +31 -4
- validmind/vm_models/figure.py +7 -37
- validmind/vm_models/model.py +3 -0
- validmind/vm_models/result/__init__.py +7 -0
- validmind/vm_models/result/result.jinja +21 -0
- validmind/vm_models/result/result.py +337 -0
- validmind/vm_models/result/utils.py +160 -0
- validmind/vm_models/test_suite/runner.py +16 -54
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +43 -77
- validmind/vm_models/test_suite/test_suite.py +8 -40
- validmind-2.6.8.dist-info/METADATA +137 -0
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/RECORD +182 -189
- validmind/tests/data_validation/AutoSeasonality.py +0 -190
- validmind/tests/metadata.py +0 -59
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
- validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
- validmind/unit_metrics/composite.py +0 -238
- validmind/vm_models/test/metric.py +0 -98
- validmind/vm_models/test/metric_result.py +0 -61
- validmind/vm_models/test/output_template.py +0 -55
- validmind/vm_models/test/result_summary.py +0 -76
- validmind/vm_models/test/result_wrapper.py +0 -488
- validmind/vm_models/test/test.py +0 -103
- validmind/vm_models/test/threshold_test.py +0 -106
- validmind/vm_models/test/threshold_test_result.py +0 -75
- validmind/vm_models/test_context.py +0 -259
- validmind-2.5.25.dist-info/METADATA +0 -118
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/LICENSE +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/WHEEL +0 -0
- {validmind-2.5.25.dist-info → validmind-2.6.8.dist-info}/entry_points.txt +0 -0
@@ -4,28 +4,46 @@
|
|
4
4
|
|
5
5
|
import importlib.util
|
6
6
|
import os
|
7
|
+
import re
|
7
8
|
import sys
|
8
|
-
from
|
9
|
+
from pathlib import Path
|
10
|
+
from typing import List, Protocol
|
9
11
|
|
10
12
|
from validmind.logging import get_logger
|
11
13
|
|
12
|
-
from ._store import test_provider_store
|
13
|
-
|
14
14
|
logger = get_logger(__name__)
|
15
15
|
|
16
|
+
# list all files in directory of this file
|
17
|
+
__private_files = [f.name for f in Path(__file__).parent.glob("*.py")]
|
18
|
+
|
19
|
+
|
20
|
+
def _is_test_file(path: Path) -> bool:
|
21
|
+
return (
|
22
|
+
path.name[0].isupper()
|
23
|
+
or re.search(r"def\s*" + re.escape(path.stem), path.read_text())
|
24
|
+
) and path.name not in __private_files
|
25
|
+
|
16
26
|
|
17
27
|
class TestProvider(Protocol):
|
18
28
|
"""Protocol for user-defined test providers"""
|
19
29
|
|
20
|
-
def
|
21
|
-
"""
|
30
|
+
def list_tests(self) -> List[str]:
|
31
|
+
"""List all tests in the given namespace
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
list: A list of test IDs
|
35
|
+
"""
|
36
|
+
...
|
37
|
+
|
38
|
+
def load_test(self, test_id: str) -> callable:
|
39
|
+
"""Load the test function identified by the given test_id
|
22
40
|
|
23
41
|
Args:
|
24
42
|
test_id (str): The test ID (does not contain the namespace under which
|
25
43
|
the test is registered)
|
26
44
|
|
27
45
|
Returns:
|
28
|
-
|
46
|
+
callable: The test function
|
29
47
|
|
30
48
|
Raises:
|
31
49
|
FileNotFoundError: If the test is not found
|
@@ -33,22 +51,6 @@ class TestProvider(Protocol):
|
|
33
51
|
...
|
34
52
|
|
35
53
|
|
36
|
-
class LocalTestProviderLoadModuleError(Exception):
|
37
|
-
"""
|
38
|
-
When the local file module can't be loaded.
|
39
|
-
"""
|
40
|
-
|
41
|
-
pass
|
42
|
-
|
43
|
-
|
44
|
-
class LocalTestProviderLoadTestError(Exception):
|
45
|
-
"""
|
46
|
-
When local file module was loaded but the test class can't be located.
|
47
|
-
"""
|
48
|
-
|
49
|
-
pass
|
50
|
-
|
51
|
-
|
52
54
|
class LocalTestProvider:
|
53
55
|
"""
|
54
56
|
Test providers in ValidMind are responsible for loading tests from different sources,
|
@@ -69,6 +71,11 @@ class LocalTestProvider:
|
|
69
71
|
# Register the test provider with a namespace
|
70
72
|
register_test_provider("my_namespace", test_provider)
|
71
73
|
|
74
|
+
# List all tests in the namespace (returns a list of test IDs)
|
75
|
+
test_provider.list_tests()
|
76
|
+
# this is used by the validmind.tests.list_tests() function to aggregate all tests
|
77
|
+
# from all test providers
|
78
|
+
|
72
79
|
# Load a test using the test_id (namespace + path to test class module)
|
73
80
|
test = test_provider.load_test("my_namespace.my_test_class")
|
74
81
|
# full path to the test class module is /path/to/tests/folder/my_test_class.py
|
@@ -86,7 +93,32 @@ class LocalTestProvider:
|
|
86
93
|
Args:
|
87
94
|
root_folder (str): The root directory for local tests.
|
88
95
|
"""
|
89
|
-
self.root_folder = root_folder
|
96
|
+
self.root_folder = os.path.abspath(root_folder)
|
97
|
+
|
98
|
+
def list_tests(self):
|
99
|
+
"""List all tests in the given namespace
|
100
|
+
|
101
|
+
Returns:
|
102
|
+
list: A list of test IDs
|
103
|
+
"""
|
104
|
+
test_ids = []
|
105
|
+
|
106
|
+
for root, _, files in os.walk(self.root_folder):
|
107
|
+
for filename in files:
|
108
|
+
if not filename.endswith(".py") or filename.startswith("__"):
|
109
|
+
continue
|
110
|
+
|
111
|
+
path = Path(root) / filename
|
112
|
+
if not _is_test_file(path):
|
113
|
+
continue
|
114
|
+
|
115
|
+
rel_path = path.relative_to(self.root_folder)
|
116
|
+
|
117
|
+
test_id_parts = [p.stem for p in rel_path.parents if p.stem][::-1]
|
118
|
+
test_id_parts.append(path.stem)
|
119
|
+
test_ids.append(".".join(test_id_parts))
|
120
|
+
|
121
|
+
return sorted(test_ids)
|
90
122
|
|
91
123
|
def load_test(self, test_id: str):
|
92
124
|
"""
|
@@ -100,60 +132,58 @@ class LocalTestProvider:
|
|
100
132
|
The test class that matches the last part of the test_id.
|
101
133
|
|
102
134
|
Raises:
|
103
|
-
|
135
|
+
LocalTestProviderLoadModuleError: If the test module cannot be imported
|
136
|
+
LocalTestProviderLoadTestError: If the test class cannot be found in the module
|
104
137
|
"""
|
105
|
-
|
106
|
-
file_path = os.path.join(self.root_folder,
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
test_provider (TestProvider): The test provider
|
158
|
-
"""
|
159
|
-
test_provider_store.register_test_provider(namespace, test_provider)
|
138
|
+
# Convert test_id to file path
|
139
|
+
file_path = os.path.join(self.root_folder, f"{test_id.replace('.', '/')}.py")
|
140
|
+
file_path = os.path.abspath(file_path)
|
141
|
+
|
142
|
+
module_dir = os.path.dirname(file_path)
|
143
|
+
module_name = test_id.split(".")[-1]
|
144
|
+
|
145
|
+
# module specification
|
146
|
+
spec = importlib.util.spec_from_file_location(
|
147
|
+
name=module_name,
|
148
|
+
location=file_path,
|
149
|
+
submodule_search_locations=[module_dir],
|
150
|
+
)
|
151
|
+
|
152
|
+
# module instance from specification
|
153
|
+
module = importlib.util.module_from_spec(spec)
|
154
|
+
|
155
|
+
# add module to sys.modules
|
156
|
+
sys.modules[module_name] = module
|
157
|
+
# execute the module
|
158
|
+
spec.loader.exec_module(module)
|
159
|
+
|
160
|
+
# test function should match the module (file) name exactly
|
161
|
+
return getattr(module, module_name)
|
162
|
+
|
163
|
+
|
164
|
+
class ValidMindTestProvider:
|
165
|
+
"""Test provider for ValidMind tests"""
|
166
|
+
|
167
|
+
def __init__(self):
|
168
|
+
# two subproviders: unit_metrics and normal tests
|
169
|
+
self.metrics_provider = LocalTestProvider(
|
170
|
+
os.path.join(os.path.dirname(__file__), "..", "unit_metrics")
|
171
|
+
)
|
172
|
+
self.tests_provider = LocalTestProvider(os.path.dirname(__file__))
|
173
|
+
|
174
|
+
def list_tests(self) -> List[str]:
|
175
|
+
"""List all tests in the ValidMind test provider"""
|
176
|
+
metric_ids = [
|
177
|
+
f"unit_metrics.{test}" for test in self.metrics_provider.list_tests()
|
178
|
+
]
|
179
|
+
test_ids = self.tests_provider.list_tests()
|
180
|
+
|
181
|
+
return metric_ids + test_ids
|
182
|
+
|
183
|
+
def load_test(self, test_id: str) -> callable:
|
184
|
+
"""Load a ValidMind test or unit metric"""
|
185
|
+
return (
|
186
|
+
self.metrics_provider.load_test(test_id.replace("unit_metrics.", ""))
|
187
|
+
if test_id.startswith("unit_metrics.")
|
188
|
+
else self.tests_provider.load_test(test_id)
|
189
|
+
)
|
validmind/tests/utils.py
CHANGED
@@ -6,6 +6,13 @@
|
|
6
6
|
|
7
7
|
import inspect
|
8
8
|
|
9
|
+
import numpy as np
|
10
|
+
import pandas as pd
|
11
|
+
|
12
|
+
from validmind.logging import get_logger
|
13
|
+
|
14
|
+
logger = get_logger(__name__)
|
15
|
+
|
9
16
|
|
10
17
|
def test_description(test_class, truncate=True):
|
11
18
|
description = inspect.getdoc(test_class).strip()
|
@@ -14,3 +21,87 @@ def test_description(test_class, truncate=True):
|
|
14
21
|
return description.strip().split("\n")[0] + "..."
|
15
22
|
|
16
23
|
return description
|
24
|
+
|
25
|
+
|
26
|
+
def remove_nan_pairs(y_true, y_pred, dataset_id=None):
|
27
|
+
"""
|
28
|
+
Remove pairs where either true or predicted values are NaN/None.
|
29
|
+
Args:
|
30
|
+
y_true: List or array of true values
|
31
|
+
y_pred: List or array of predicted values
|
32
|
+
dataset_id: Optional identifier for the dataset (for logging)
|
33
|
+
Returns:
|
34
|
+
tuple: (cleaned_y_true, cleaned_y_pred)
|
35
|
+
"""
|
36
|
+
# Convert to numpy arrays for easier handling
|
37
|
+
y_true = np.array(y_true)
|
38
|
+
y_pred = np.array(y_pred)
|
39
|
+
|
40
|
+
# Find indices where either value is NaN/None
|
41
|
+
nan_mask = ~(pd.isnull(y_true) | pd.isnull(y_pred))
|
42
|
+
nan_count = len(y_true) - np.sum(nan_mask)
|
43
|
+
|
44
|
+
if nan_count > 0:
|
45
|
+
dataset_info = f" from dataset '{dataset_id}'" if dataset_id else ""
|
46
|
+
logger.warning(
|
47
|
+
f"Found {nan_count} row(s){dataset_info} with NaN/None values. "
|
48
|
+
f"Removing these pairs. {len(y_true)} -> {np.sum(nan_mask)} pairs remaining."
|
49
|
+
)
|
50
|
+
return y_true[nan_mask], y_pred[nan_mask]
|
51
|
+
|
52
|
+
return y_true, y_pred
|
53
|
+
|
54
|
+
|
55
|
+
def ensure_equal_lengths(y_true, y_pred, dataset_id=None):
|
56
|
+
"""
|
57
|
+
Check if true and predicted values have matching lengths, log warning if they don't,
|
58
|
+
and truncate to the shorter length if necessary. Also removes any NaN/None values.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
y_true: List or array of true values
|
62
|
+
y_pred: List or array of predicted values
|
63
|
+
dataset_id: Optional identifier for the dataset (for logging)
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
tuple: (cleaned_y_true, cleaned_y_pred)
|
67
|
+
"""
|
68
|
+
# First remove any NaN values
|
69
|
+
y_true, y_pred = remove_nan_pairs(y_true, y_pred, dataset_id)
|
70
|
+
|
71
|
+
# Then handle length mismatches
|
72
|
+
if len(y_true) != len(y_pred):
|
73
|
+
dataset_info = f" from dataset '{dataset_id}'" if dataset_id else ""
|
74
|
+
min_length = min(len(y_true), len(y_pred))
|
75
|
+
logger.warning(
|
76
|
+
f"Length mismatch{dataset_info}: "
|
77
|
+
f"true values ({len(y_true)}) != predicted values ({len(y_pred)}). "
|
78
|
+
f"Truncating to first {min_length} pairs."
|
79
|
+
)
|
80
|
+
return y_true[:min_length], y_pred[:min_length]
|
81
|
+
|
82
|
+
return y_true, y_pred
|
83
|
+
|
84
|
+
|
85
|
+
def validate_prediction(y_true, y_pred, dataset_id=None):
|
86
|
+
"""
|
87
|
+
Comprehensive validation of true and predicted value pairs.
|
88
|
+
Handles NaN/None values and length mismatches.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
y_true: List or array of true values
|
92
|
+
y_pred: List or array of predicted values
|
93
|
+
dataset_id: Optional identifier for the dataset (for logging)
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
tuple: (cleaned_y_true, cleaned_y_pred) with matching lengths and no NaN values
|
97
|
+
|
98
|
+
Example:
|
99
|
+
>>> y_true, y_pred = validate_prediction_pairs(dataset.y, model.predict(dataset.X), dataset.input_id)
|
100
|
+
"""
|
101
|
+
# First remove any NaN values
|
102
|
+
y_true, y_pred = remove_nan_pairs(y_true, y_pred, dataset_id)
|
103
|
+
|
104
|
+
# Then handle any length mismatches
|
105
|
+
y_true, y_pred = ensure_equal_lengths(y_true, y_pred, dataset_id)
|
106
|
+
|
107
|
+
return y_true, y_pred
|
@@ -2,170 +2,31 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
import
|
6
|
-
import
|
7
|
-
import
|
8
|
-
import os
|
9
|
-
from importlib import import_module
|
10
|
-
from textwrap import dedent
|
5
|
+
from validmind.tests._store import test_provider_store
|
6
|
+
from validmind.tests.load import describe_test
|
7
|
+
from validmind.tests.run import run_test
|
11
8
|
|
12
|
-
from IPython.display import Markdown, display
|
13
9
|
|
14
|
-
|
15
|
-
|
16
|
-
|
10
|
+
def list_metrics(**kwargs):
|
11
|
+
"""List all metrics"""
|
12
|
+
vm_provider = test_provider_store.get_test_provider("validmind")
|
13
|
+
vm_metrics_provider = vm_provider.metrics_provider
|
17
14
|
|
18
|
-
|
15
|
+
prefix = "validmind.unit_metrics."
|
19
16
|
|
20
|
-
|
21
|
-
def _serialize_dataset(dataset, model=None, sample_size=1000):
|
22
|
-
columns = [*dataset.feature_columns, dataset.target_column]
|
23
|
-
if model:
|
24
|
-
columns.append(dataset.prediction_column(model))
|
25
|
-
|
26
|
-
df = dataset._df[columns]
|
27
|
-
|
28
|
-
return hashlib.md5(
|
29
|
-
df.sample(n=min(sample_size, df.shape[0]), random_state=42)
|
30
|
-
.to_string(header=True, index=True)
|
31
|
-
.encode()
|
32
|
-
).hexdigest()
|
33
|
-
|
34
|
-
|
35
|
-
def _get_metric_cache_key(metric_id, inputs, params):
|
36
|
-
cache_elements = [
|
37
|
-
metric_id,
|
38
|
-
hashlib.md5(json.dumps(params, sort_keys=True).encode()).hexdigest(),
|
39
|
-
]
|
40
|
-
|
41
|
-
if "model" in inputs:
|
42
|
-
cache_elements.append(inputs["model"].input_id)
|
43
|
-
|
44
|
-
if "dataset" in inputs:
|
45
|
-
cache_elements.append(inputs["dataset"].input_id)
|
46
|
-
cache_elements.append(
|
47
|
-
_serialize_dataset(inputs["dataset"], inputs.get("model"))
|
48
|
-
)
|
49
|
-
|
50
|
-
return hashlib.md5("_".join(cache_elements).encode()).hexdigest()
|
51
|
-
|
52
|
-
|
53
|
-
def describe_metric(metric_id, raw=False):
|
54
|
-
"""Describe a metric
|
55
|
-
|
56
|
-
Args:
|
57
|
-
metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
|
58
|
-
raw (bool): Whether to return the description as a dictionary
|
59
|
-
|
60
|
-
Returns:
|
61
|
-
dict: A dictionary containing the metric description
|
62
|
-
"""
|
63
|
-
metric = load_metric(metric_id)
|
64
|
-
inputs, params = _inspect_signature(metric)
|
65
|
-
|
66
|
-
if raw:
|
67
|
-
return {
|
68
|
-
"id": metric_id,
|
69
|
-
"description": metric.__doc__,
|
70
|
-
"inputs": inputs,
|
71
|
-
"params": params,
|
72
|
-
}
|
73
|
-
|
74
|
-
inputs = ", ".join(inputs.keys())
|
75
|
-
params = ", ".join(params.keys())
|
76
|
-
description_md = f"""
|
77
|
-
### {test_id_to_name(metric_id)} (*'{metric_id}'*)
|
78
|
-
|
79
|
-
{metric.__doc__ or ""}
|
80
|
-
|
81
|
-
**Inputs**: {inputs}
|
82
|
-
|
83
|
-
**Parameters**: {params}
|
84
|
-
"""
|
85
|
-
display(Markdown(dedent(description_md)))
|
86
|
-
|
87
|
-
|
88
|
-
def list_metrics():
|
89
|
-
"""List all available metrics
|
90
|
-
|
91
|
-
Returns:
|
92
|
-
list: A list of metric ids
|
93
|
-
"""
|
94
|
-
# current directory of this file is the __init__.py file in the validmind/unit_metrics directory
|
95
|
-
# glob for all metrics in the unit_metrics directory (indicated by capitalized python files)
|
96
|
-
# recursive since we want to include subdirectories
|
97
|
-
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
98
17
|
return [
|
99
|
-
f"{
|
100
|
-
for metric in glob.glob(f"{curr_dir}/**/*.py", recursive=True)
|
101
|
-
if os.path.isfile(metric) and os.path.basename(metric)[0].isupper()
|
18
|
+
f"{prefix}{test_id}" for test_id in vm_metrics_provider.list_tests(**kwargs)
|
102
19
|
]
|
103
20
|
|
104
21
|
|
105
|
-
def
|
106
|
-
"""
|
107
|
-
|
108
|
-
Args:
|
109
|
-
metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
|
110
|
-
|
111
|
-
Returns:
|
112
|
-
callable: The metric function
|
113
|
-
"""
|
114
|
-
return getattr(import_module(metric_id), metric_id.split(".")[-1])
|
115
|
-
|
116
|
-
|
117
|
-
def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False):
|
118
|
-
"""Run a single metric and cache the results
|
119
|
-
|
120
|
-
Args:
|
121
|
-
metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
|
122
|
-
inputs (dict): A dictionary of the metric inputs
|
123
|
-
params (dict): A dictionary of the metric parameters
|
124
|
-
show (bool): Whether to display the results
|
125
|
-
value_only (bool): Whether to return only the value
|
126
|
-
"""
|
127
|
-
inputs = {
|
128
|
-
k: input_registry.get(v) if isinstance(v, str) else v
|
129
|
-
for k, v in (inputs or {}).items()
|
130
|
-
}
|
131
|
-
params = params or {}
|
132
|
-
|
133
|
-
cache_key = _get_metric_cache_key(metric_id, inputs, params)
|
134
|
-
|
135
|
-
if cache_key not in unit_metric_results_cache:
|
136
|
-
metric = load_metric(metric_id)
|
137
|
-
_inputs, _params = _inspect_signature(metric)
|
138
|
-
|
139
|
-
result = metric(
|
140
|
-
**{k: v for k, v in inputs.items() if k in _inputs.keys()},
|
141
|
-
**{
|
142
|
-
k: v
|
143
|
-
for k, v in params.items()
|
144
|
-
if k in _params.keys() or "kwargs" in _params.keys()
|
145
|
-
},
|
146
|
-
)
|
147
|
-
unit_metric_results_cache[cache_key] = (
|
148
|
-
result,
|
149
|
-
# store the input ids that were used to calculate the result
|
150
|
-
[v.input_id for v in inputs.values()],
|
151
|
-
# store the params that were used to calculate the result
|
152
|
-
params,
|
153
|
-
)
|
154
|
-
|
155
|
-
cached_result = unit_metric_results_cache[cache_key]
|
22
|
+
def describe_metric(metric_id: str, **kwargs):
|
23
|
+
"""Describe a metric"""
|
24
|
+
return describe_test(metric_id, **kwargs)
|
156
25
|
|
157
|
-
if value_only:
|
158
|
-
return cached_result[0]
|
159
26
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
inputs=cached_result[1],
|
164
|
-
params=cached_result[2],
|
165
|
-
generate_description=False,
|
166
|
-
)
|
27
|
+
def run_metric(metric_id: str, **kwargs):
|
28
|
+
"""Run a metric"""
|
29
|
+
return run_test(metric_id, **kwargs)
|
167
30
|
|
168
|
-
if show:
|
169
|
-
result_wrapper.show()
|
170
31
|
|
171
|
-
|
32
|
+
__all__ = ["list_metrics", "describe_metric", "run_metric"]
|
@@ -10,6 +10,7 @@ from validmind import tags, tasks
|
|
10
10
|
@tags("regression")
|
11
11
|
@tasks("regression")
|
12
12
|
def AdjustedRSquaredScore(model, dataset):
|
13
|
+
"""Calculates the adjusted R-squared score for a regression model."""
|
13
14
|
r2_score = _r2_score(
|
14
15
|
dataset.y,
|
15
16
|
dataset.y_pred(model),
|
@@ -10,6 +10,7 @@ from validmind import tags, tasks
|
|
10
10
|
@tags("regression")
|
11
11
|
@tasks("regression")
|
12
12
|
def KolmogorovSmirnovStatistic(dataset, model):
|
13
|
+
"""Calculates the Kolmogorov-Smirnov statistic for a regression model."""
|
13
14
|
y_true = dataset.y.flatten()
|
14
15
|
y_pred = dataset.y_pred(model)
|
15
16
|
|
@@ -10,4 +10,5 @@ from validmind import tags, tasks
|
|
10
10
|
@tags("regression")
|
11
11
|
@tasks("regression")
|
12
12
|
def MeanAbsoluteError(model, dataset, **kwargs):
|
13
|
+
"""Calculates the mean absolute error for a regression model."""
|
13
14
|
return _mean_absolute_error(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -10,6 +10,7 @@ from validmind import tags, tasks
|
|
10
10
|
@tags("regression")
|
11
11
|
@tasks("regression")
|
12
12
|
def MeanAbsolutePercentageError(model, dataset):
|
13
|
+
"""Calculates the mean absolute percentage error for a regression model."""
|
13
14
|
y_true = dataset.y
|
14
15
|
y_pred = dataset.y_pred(model)
|
15
16
|
|
@@ -10,4 +10,5 @@ from validmind import tags, tasks
|
|
10
10
|
@tags("regression")
|
11
11
|
@tasks("regression")
|
12
12
|
def MeanSquaredError(model, dataset, **kwargs):
|
13
|
+
"""Calculates the mean squared error for a regression model."""
|
13
14
|
return mean_squared_error(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -10,6 +10,7 @@ from validmind import tags, tasks
|
|
10
10
|
@tags("regression")
|
11
11
|
@tasks("regression")
|
12
12
|
def QuantileLoss(model, dataset, quantile=0.5):
|
13
|
+
"""Calculates the quantile loss for a regression model."""
|
13
14
|
error = dataset.y - dataset.y_pred(model)
|
14
15
|
|
15
16
|
return np.mean(np.maximum(quantile * error, (quantile - 1) * error))
|
@@ -9,5 +9,6 @@ from validmind import tags, tasks
|
|
9
9
|
|
10
10
|
@tags("regression")
|
11
11
|
@tasks("regression")
|
12
|
-
def
|
12
|
+
def RSquaredScore(model, dataset):
|
13
|
+
"""Calculates the R-squared score for a regression model."""
|
13
14
|
return r2_score(dataset.y, dataset.y_pred(model))
|
@@ -11,6 +11,7 @@ from validmind import tags, tasks
|
|
11
11
|
@tags("regression")
|
12
12
|
@tasks("regression")
|
13
13
|
def RootMeanSquaredError(model, dataset, **kwargs):
|
14
|
+
"""Calculates the root mean squared error for a regression model."""
|
14
15
|
return np.sqrt(
|
15
16
|
mean_squared_error(
|
16
17
|
dataset.y,
|