validmind 2.5.24__py3-none-any.whl → 2.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. validmind/__init__.py +8 -17
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +66 -85
  4. validmind/ai/test_result_description/context.py +2 -2
  5. validmind/ai/utils.py +26 -1
  6. validmind/api_client.py +43 -79
  7. validmind/client.py +5 -7
  8. validmind/client_config.py +1 -1
  9. validmind/datasets/__init__.py +1 -1
  10. validmind/datasets/classification/customer_churn.py +7 -5
  11. validmind/datasets/nlp/__init__.py +2 -2
  12. validmind/errors.py +6 -10
  13. validmind/html_templates/content_blocks.py +18 -16
  14. validmind/logging.py +21 -16
  15. validmind/tests/__init__.py +28 -5
  16. validmind/tests/__types__.py +186 -170
  17. validmind/tests/_store.py +7 -21
  18. validmind/tests/comparison.py +362 -0
  19. validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
  20. validmind/tests/data_validation/ADF.py +49 -83
  21. validmind/tests/data_validation/AutoAR.py +59 -96
  22. validmind/tests/data_validation/AutoMA.py +59 -96
  23. validmind/tests/data_validation/AutoStationarity.py +66 -114
  24. validmind/tests/data_validation/ClassImbalance.py +48 -117
  25. validmind/tests/data_validation/DatasetDescription.py +180 -209
  26. validmind/tests/data_validation/DatasetSplit.py +50 -75
  27. validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
  28. validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
  29. validmind/tests/data_validation/Duplicates.py +21 -90
  30. validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
  31. validmind/tests/data_validation/HighCardinality.py +32 -80
  32. validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
  33. validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
  34. validmind/tests/data_validation/IQROutliersTable.py +40 -80
  35. validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
  36. validmind/tests/data_validation/KPSS.py +33 -81
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
  38. validmind/tests/data_validation/MissingValues.py +17 -58
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
  40. validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
  41. validmind/tests/data_validation/RollingStatsPlot.py +50 -81
  42. validmind/tests/data_validation/SeasonalDecompose.py +102 -184
  43. validmind/tests/data_validation/Skewness.py +27 -64
  44. validmind/tests/data_validation/SpreadPlot.py +34 -57
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
  48. validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
  49. validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
  50. validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
  51. validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
  52. validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
  53. validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
  54. validmind/tests/data_validation/TooManyZeroValues.py +21 -70
  55. validmind/tests/data_validation/UniqueRows.py +23 -62
  56. validmind/tests/data_validation/WOEBinPlots.py +83 -109
  57. validmind/tests/data_validation/WOEBinTable.py +28 -69
  58. validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
  59. validmind/tests/data_validation/nlp/CommonWords.py +49 -57
  60. validmind/tests/data_validation/nlp/Hashtags.py +27 -49
  61. validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
  62. validmind/tests/data_validation/nlp/Mentions.py +32 -63
  63. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
  64. validmind/tests/data_validation/nlp/Punctuations.py +63 -47
  65. validmind/tests/data_validation/nlp/Sentiment.py +4 -0
  66. validmind/tests/data_validation/nlp/StopWords.py +62 -91
  67. validmind/tests/data_validation/nlp/TextDescription.py +116 -159
  68. validmind/tests/data_validation/nlp/Toxicity.py +12 -4
  69. validmind/tests/decorator.py +33 -242
  70. validmind/tests/load.py +212 -153
  71. validmind/tests/model_validation/BertScore.py +13 -7
  72. validmind/tests/model_validation/BleuScore.py +4 -0
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
  74. validmind/tests/model_validation/ContextualRecall.py +3 -0
  75. validmind/tests/model_validation/FeaturesAUC.py +43 -74
  76. validmind/tests/model_validation/MeteorScore.py +3 -0
  77. validmind/tests/model_validation/RegardScore.py +5 -1
  78. validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
  79. validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
  80. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
  81. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
  82. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
  83. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
  84. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
  85. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
  86. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
  87. validmind/tests/model_validation/embeddings/utils.py +53 -0
  88. validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
  89. validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
  90. validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
  91. validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
  92. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
  93. validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
  94. validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
  95. validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
  96. validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
  97. validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
  98. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
  99. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
  100. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
  101. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
  102. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
  103. validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
  104. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
  105. validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
  106. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
  107. validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
  108. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
  109. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
  110. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
  111. validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
  112. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
  113. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
  114. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
  115. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
  116. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
  117. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
  118. validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
  119. validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
  120. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
  121. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
  122. validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
  123. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
  124. validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
  125. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
  126. validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
  127. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
  128. validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
  129. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
  130. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
  132. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
  133. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
  134. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
  135. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
  136. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
  137. validmind/tests/output.py +120 -0
  138. validmind/tests/prompt_validation/Bias.py +55 -98
  139. validmind/tests/prompt_validation/Clarity.py +56 -99
  140. validmind/tests/prompt_validation/Conciseness.py +63 -101
  141. validmind/tests/prompt_validation/Delimitation.py +48 -89
  142. validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
  143. validmind/tests/prompt_validation/Robustness.py +80 -121
  144. validmind/tests/prompt_validation/Specificity.py +61 -95
  145. validmind/tests/prompt_validation/ai_powered_test.py +2 -2
  146. validmind/tests/run.py +314 -496
  147. validmind/tests/test_providers.py +109 -79
  148. validmind/tests/utils.py +91 -0
  149. validmind/unit_metrics/__init__.py +16 -155
  150. validmind/unit_metrics/classification/F1.py +1 -0
  151. validmind/unit_metrics/classification/Precision.py +1 -0
  152. validmind/unit_metrics/classification/ROC_AUC.py +1 -0
  153. validmind/unit_metrics/classification/Recall.py +1 -0
  154. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
  155. validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
  156. validmind/unit_metrics/regression/HuberLoss.py +1 -0
  157. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
  158. validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
  159. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
  160. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
  161. validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
  162. validmind/unit_metrics/regression/QuantileLoss.py +1 -0
  163. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  164. validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
  165. validmind/utils.py +66 -17
  166. validmind/vm_models/__init__.py +2 -17
  167. validmind/vm_models/dataset/dataset.py +31 -4
  168. validmind/vm_models/figure.py +7 -37
  169. validmind/vm_models/model.py +3 -0
  170. validmind/vm_models/result/__init__.py +7 -0
  171. validmind/vm_models/result/result.jinja +21 -0
  172. validmind/vm_models/result/result.py +337 -0
  173. validmind/vm_models/result/utils.py +160 -0
  174. validmind/vm_models/test_suite/runner.py +16 -54
  175. validmind/vm_models/test_suite/summary.py +3 -3
  176. validmind/vm_models/test_suite/test.py +43 -77
  177. validmind/vm_models/test_suite/test_suite.py +8 -40
  178. validmind-2.6.7.dist-info/METADATA +137 -0
  179. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
  180. validmind/tests/data_validation/AutoSeasonality.py +0 -190
  181. validmind/tests/metadata.py +0 -59
  182. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
  183. validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
  184. validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
  185. validmind/unit_metrics/composite.py +0 -238
  186. validmind/vm_models/test/metric.py +0 -98
  187. validmind/vm_models/test/metric_result.py +0 -61
  188. validmind/vm_models/test/output_template.py +0 -55
  189. validmind/vm_models/test/result_summary.py +0 -76
  190. validmind/vm_models/test/result_wrapper.py +0 -488
  191. validmind/vm_models/test/test.py +0 -103
  192. validmind/vm_models/test/threshold_test.py +0 -106
  193. validmind/vm_models/test/threshold_test_result.py +0 -75
  194. validmind/vm_models/test_context.py +0 -259
  195. validmind-2.5.24.dist-info/METADATA +0 -118
  196. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
  197. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
  198. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -4,28 +4,46 @@
4
4
 
5
5
  import importlib.util
6
6
  import os
7
+ import re
7
8
  import sys
8
- from typing import Protocol
9
+ from pathlib import Path
10
+ from typing import List, Protocol
9
11
 
10
12
  from validmind.logging import get_logger
11
13
 
12
- from ._store import test_provider_store
13
-
14
14
  logger = get_logger(__name__)
15
15
 
16
+ # list all files in directory of this file
17
+ __private_files = [f.name for f in Path(__file__).parent.glob("*.py")]
18
+
19
+
20
+ def _is_test_file(path: Path) -> bool:
21
+ return (
22
+ path.name[0].isupper()
23
+ or re.search(r"def\s*" + re.escape(path.stem), path.read_text())
24
+ ) and path.name not in __private_files
25
+
16
26
 
17
27
  class TestProvider(Protocol):
18
28
  """Protocol for user-defined test providers"""
19
29
 
20
- def load_test(self, test_id: str):
21
- """Load the test by test ID
30
+ def list_tests(self) -> List[str]:
31
+ """List all tests in the given namespace
32
+
33
+ Returns:
34
+ list: A list of test IDs
35
+ """
36
+ ...
37
+
38
+ def load_test(self, test_id: str) -> callable:
39
+ """Load the test function identified by the given test_id
22
40
 
23
41
  Args:
24
42
  test_id (str): The test ID (does not contain the namespace under which
25
43
  the test is registered)
26
44
 
27
45
  Returns:
28
- Test: A test class or function
46
+ callable: The test function
29
47
 
30
48
  Raises:
31
49
  FileNotFoundError: If the test is not found
@@ -33,22 +51,6 @@ class TestProvider(Protocol):
33
51
  ...
34
52
 
35
53
 
36
- class LocalTestProviderLoadModuleError(Exception):
37
- """
38
- When the local file module can't be loaded.
39
- """
40
-
41
- pass
42
-
43
-
44
- class LocalTestProviderLoadTestError(Exception):
45
- """
46
- When local file module was loaded but the test class can't be located.
47
- """
48
-
49
- pass
50
-
51
-
52
54
  class LocalTestProvider:
53
55
  """
54
56
  Test providers in ValidMind are responsible for loading tests from different sources,
@@ -69,6 +71,11 @@ class LocalTestProvider:
69
71
  # Register the test provider with a namespace
70
72
  register_test_provider("my_namespace", test_provider)
71
73
 
74
+ # List all tests in the namespace (returns a list of test IDs)
75
+ test_provider.list_tests()
76
+ # this is used by the validmind.tests.list_tests() function to aggregate all tests
77
+ # from all test providers
78
+
72
79
  # Load a test using the test_id (namespace + path to test class module)
73
80
  test = test_provider.load_test("my_namespace.my_test_class")
74
81
  # full path to the test class module is /path/to/tests/folder/my_test_class.py
@@ -86,7 +93,32 @@ class LocalTestProvider:
86
93
  Args:
87
94
  root_folder (str): The root directory for local tests.
88
95
  """
89
- self.root_folder = root_folder
96
+ self.root_folder = os.path.abspath(root_folder)
97
+
98
+ def list_tests(self):
99
+ """List all tests in the given namespace
100
+
101
+ Returns:
102
+ list: A list of test IDs
103
+ """
104
+ test_ids = []
105
+
106
+ for root, _, files in os.walk(self.root_folder):
107
+ for filename in files:
108
+ if not filename.endswith(".py") or filename.startswith("__"):
109
+ continue
110
+
111
+ path = Path(root) / filename
112
+ if not _is_test_file(path):
113
+ continue
114
+
115
+ rel_path = path.relative_to(self.root_folder)
116
+
117
+ test_id_parts = [p.stem for p in rel_path.parents if p.stem][::-1]
118
+ test_id_parts.append(path.stem)
119
+ test_ids.append(".".join(test_id_parts))
120
+
121
+ return sorted(test_ids)
90
122
 
91
123
  def load_test(self, test_id: str):
92
124
  """
@@ -100,60 +132,58 @@ class LocalTestProvider:
100
132
  The test class that matches the last part of the test_id.
101
133
 
102
134
  Raises:
103
- Exception: If the test can't be imported or loaded.
135
+ LocalTestProviderLoadModuleError: If the test module cannot be imported
136
+ LocalTestProviderLoadTestError: If the test class cannot be found in the module
104
137
  """
105
- test_path = f"{test_id.replace('.', '/')}.py"
106
- file_path = os.path.join(self.root_folder, test_path)
107
-
108
- logger.debug(f"Loading test {test_id} from {file_path}")
109
-
110
- # Check if the module uses relative imports
111
- with open(file_path, "r") as file:
112
- lines = file.readlines()
113
-
114
- # handle test with relative imports
115
- if any(line.strip().startswith("from .") for line in lines):
116
- logger.debug("Found relative imports, using alternative import method")
117
-
118
- parent_folder = os.path.dirname(file_path)
119
- if parent_folder not in sys.path:
120
- sys.path.append(os.path.dirname(parent_folder))
121
-
122
- try:
123
- module = importlib.import_module(
124
- f"{os.path.basename(parent_folder)}.{test_id.split('.')[-1]}"
125
- )
126
- except Exception as e:
127
- # error will be handled/re-raised by `load_test` func
128
- raise LocalTestProviderLoadModuleError(
129
- f"Failed to load the module from {file_path}. Error: {str(e)}"
130
- )
131
-
132
- else:
133
- try:
134
- spec = importlib.util.spec_from_file_location(test_id, file_path)
135
- module = importlib.util.module_from_spec(spec)
136
- spec.loader.exec_module(module)
137
- except Exception as e:
138
- # error will be handled/re-raised by `load_test` func
139
- raise LocalTestProviderLoadModuleError(
140
- f"Failed to load the module from {file_path}. Error: {str(e)}"
141
- )
142
-
143
- try:
144
- # find the test class that matches the last part of the test_id
145
- return getattr(module, test_id.split(".")[-1])
146
- except AttributeError as e:
147
- raise LocalTestProviderLoadTestError(
148
- f"Failed to find the test class in the module. Error: {str(e)}"
149
- )
150
-
151
-
152
- def register_test_provider(namespace: str, test_provider: "TestProvider") -> None:
153
- """Register an external test provider
154
-
155
- Args:
156
- namespace (str): The namespace of the test provider
157
- test_provider (TestProvider): The test provider
158
- """
159
- test_provider_store.register_test_provider(namespace, test_provider)
138
+ # Convert test_id to file path
139
+ file_path = os.path.join(self.root_folder, f"{test_id.replace('.', '/')}.py")
140
+ file_path = os.path.abspath(file_path)
141
+
142
+ module_dir = os.path.dirname(file_path)
143
+ module_name = test_id.split(".")[-1]
144
+
145
+ # module specification
146
+ spec = importlib.util.spec_from_file_location(
147
+ name=module_name,
148
+ location=file_path,
149
+ submodule_search_locations=[module_dir],
150
+ )
151
+
152
+ # module instance from specification
153
+ module = importlib.util.module_from_spec(spec)
154
+
155
+ # add module to sys.modules
156
+ sys.modules[module_name] = module
157
+ # execute the module
158
+ spec.loader.exec_module(module)
159
+
160
+ # test function should match the module (file) name exactly
161
+ return getattr(module, module_name)
162
+
163
+
164
+ class ValidMindTestProvider:
165
+ """Test provider for ValidMind tests"""
166
+
167
+ def __init__(self):
168
+ # two subproviders: unit_metrics and normal tests
169
+ self.metrics_provider = LocalTestProvider(
170
+ os.path.join(os.path.dirname(__file__), "..", "unit_metrics")
171
+ )
172
+ self.tests_provider = LocalTestProvider(os.path.dirname(__file__))
173
+
174
+ def list_tests(self) -> List[str]:
175
+ """List all tests in the ValidMind test provider"""
176
+ metric_ids = [
177
+ f"unit_metrics.{test}" for test in self.metrics_provider.list_tests()
178
+ ]
179
+ test_ids = self.tests_provider.list_tests()
180
+
181
+ return metric_ids + test_ids
182
+
183
+ def load_test(self, test_id: str) -> callable:
184
+ """Load a ValidMind test or unit metric"""
185
+ return (
186
+ self.metrics_provider.load_test(test_id.replace("unit_metrics.", ""))
187
+ if test_id.startswith("unit_metrics.")
188
+ else self.tests_provider.load_test(test_id)
189
+ )
validmind/tests/utils.py CHANGED
@@ -6,6 +6,13 @@
6
6
 
7
7
  import inspect
8
8
 
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ from validmind.logging import get_logger
13
+
14
+ logger = get_logger(__name__)
15
+
9
16
 
10
17
  def test_description(test_class, truncate=True):
11
18
  description = inspect.getdoc(test_class).strip()
@@ -14,3 +21,87 @@ def test_description(test_class, truncate=True):
14
21
  return description.strip().split("\n")[0] + "..."
15
22
 
16
23
  return description
24
+
25
+
26
+ def remove_nan_pairs(y_true, y_pred, dataset_id=None):
27
+ """
28
+ Remove pairs where either true or predicted values are NaN/None.
29
+ Args:
30
+ y_true: List or array of true values
31
+ y_pred: List or array of predicted values
32
+ dataset_id: Optional identifier for the dataset (for logging)
33
+ Returns:
34
+ tuple: (cleaned_y_true, cleaned_y_pred)
35
+ """
36
+ # Convert to numpy arrays for easier handling
37
+ y_true = np.array(y_true)
38
+ y_pred = np.array(y_pred)
39
+
40
+ # Find indices where either value is NaN/None
41
+ nan_mask = ~(pd.isnull(y_true) | pd.isnull(y_pred))
42
+ nan_count = len(y_true) - np.sum(nan_mask)
43
+
44
+ if nan_count > 0:
45
+ dataset_info = f" from dataset '{dataset_id}'" if dataset_id else ""
46
+ logger.warning(
47
+ f"Found {nan_count} row(s){dataset_info} with NaN/None values. "
48
+ f"Removing these pairs. {len(y_true)} -> {np.sum(nan_mask)} pairs remaining."
49
+ )
50
+ return y_true[nan_mask], y_pred[nan_mask]
51
+
52
+ return y_true, y_pred
53
+
54
+
55
+ def ensure_equal_lengths(y_true, y_pred, dataset_id=None):
56
+ """
57
+ Check if true and predicted values have matching lengths, log warning if they don't,
58
+ and truncate to the shorter length if necessary. Also removes any NaN/None values.
59
+
60
+ Args:
61
+ y_true: List or array of true values
62
+ y_pred: List or array of predicted values
63
+ dataset_id: Optional identifier for the dataset (for logging)
64
+
65
+ Returns:
66
+ tuple: (cleaned_y_true, cleaned_y_pred)
67
+ """
68
+ # First remove any NaN values
69
+ y_true, y_pred = remove_nan_pairs(y_true, y_pred, dataset_id)
70
+
71
+ # Then handle length mismatches
72
+ if len(y_true) != len(y_pred):
73
+ dataset_info = f" from dataset '{dataset_id}'" if dataset_id else ""
74
+ min_length = min(len(y_true), len(y_pred))
75
+ logger.warning(
76
+ f"Length mismatch{dataset_info}: "
77
+ f"true values ({len(y_true)}) != predicted values ({len(y_pred)}). "
78
+ f"Truncating to first {min_length} pairs."
79
+ )
80
+ return y_true[:min_length], y_pred[:min_length]
81
+
82
+ return y_true, y_pred
83
+
84
+
85
+ def validate_prediction(y_true, y_pred, dataset_id=None):
86
+ """
87
+ Comprehensive validation of true and predicted value pairs.
88
+ Handles NaN/None values and length mismatches.
89
+
90
+ Args:
91
+ y_true: List or array of true values
92
+ y_pred: List or array of predicted values
93
+ dataset_id: Optional identifier for the dataset (for logging)
94
+
95
+ Returns:
96
+ tuple: (cleaned_y_true, cleaned_y_pred) with matching lengths and no NaN values
97
+
98
+ Example:
99
+ >>> y_true, y_pred = validate_prediction_pairs(dataset.y, model.predict(dataset.X), dataset.input_id)
100
+ """
101
+ # First remove any NaN values
102
+ y_true, y_pred = remove_nan_pairs(y_true, y_pred, dataset_id)
103
+
104
+ # Then handle any length mismatches
105
+ y_true, y_pred = ensure_equal_lengths(y_true, y_pred, dataset_id)
106
+
107
+ return y_true, y_pred
@@ -2,170 +2,31 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- import glob
6
- import hashlib
7
- import json
8
- import os
9
- from importlib import import_module
10
- from textwrap import dedent
5
+ from validmind.tests._store import test_provider_store
6
+ from validmind.tests.load import describe_test
7
+ from validmind.tests.run import run_test
11
8
 
12
- from IPython.display import Markdown, display
13
9
 
14
- from validmind.input_registry import input_registry
15
- from validmind.tests.decorator import _build_result, _inspect_signature
16
- from validmind.utils import test_id_to_name
10
+ def list_metrics(**kwargs):
11
+ """List all metrics"""
12
+ vm_provider = test_provider_store.get_test_provider("validmind")
13
+ vm_metrics_provider = vm_provider.metrics_provider
17
14
 
18
- unit_metric_results_cache = {}
15
+ prefix = "validmind.unit_metrics."
19
16
 
20
-
21
- def _serialize_dataset(dataset, model=None, sample_size=1000):
22
- columns = [*dataset.feature_columns, dataset.target_column]
23
- if model:
24
- columns.append(dataset.prediction_column(model))
25
-
26
- df = dataset._df[columns]
27
-
28
- return hashlib.md5(
29
- df.sample(n=min(sample_size, df.shape[0]), random_state=42)
30
- .to_string(header=True, index=True)
31
- .encode()
32
- ).hexdigest()
33
-
34
-
35
- def _get_metric_cache_key(metric_id, inputs, params):
36
- cache_elements = [
37
- metric_id,
38
- hashlib.md5(json.dumps(params, sort_keys=True).encode()).hexdigest(),
39
- ]
40
-
41
- if "model" in inputs:
42
- cache_elements.append(inputs["model"].input_id)
43
-
44
- if "dataset" in inputs:
45
- cache_elements.append(inputs["dataset"].input_id)
46
- cache_elements.append(
47
- _serialize_dataset(inputs["dataset"], inputs.get("model"))
48
- )
49
-
50
- return hashlib.md5("_".join(cache_elements).encode()).hexdigest()
51
-
52
-
53
- def describe_metric(metric_id, raw=False):
54
- """Describe a metric
55
-
56
- Args:
57
- metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
58
- raw (bool): Whether to return the description as a dictionary
59
-
60
- Returns:
61
- dict: A dictionary containing the metric description
62
- """
63
- metric = load_metric(metric_id)
64
- inputs, params = _inspect_signature(metric)
65
-
66
- if raw:
67
- return {
68
- "id": metric_id,
69
- "description": metric.__doc__,
70
- "inputs": inputs,
71
- "params": params,
72
- }
73
-
74
- inputs = ", ".join(inputs.keys())
75
- params = ", ".join(params.keys())
76
- description_md = f"""
77
- ### {test_id_to_name(metric_id)} (*'{metric_id}'*)
78
-
79
- {metric.__doc__ or ""}
80
-
81
- **Inputs**: {inputs}
82
-
83
- **Parameters**: {params}
84
- """
85
- display(Markdown(dedent(description_md)))
86
-
87
-
88
- def list_metrics():
89
- """List all available metrics
90
-
91
- Returns:
92
- list: A list of metric ids
93
- """
94
- # current directory of this file is the __init__.py file in the validmind/unit_metrics directory
95
- # glob for all metrics in the unit_metrics directory (indicated by capitalized python files)
96
- # recursive since we want to include subdirectories
97
- curr_dir = os.path.dirname(os.path.realpath(__file__))
98
17
  return [
99
- f"{__name__}.{os.path.relpath(metric, curr_dir).replace('/', '.')[:-3]}"
100
- for metric in glob.glob(f"{curr_dir}/**/*.py", recursive=True)
101
- if os.path.isfile(metric) and os.path.basename(metric)[0].isupper()
18
+ f"{prefix}{test_id}" for test_id in vm_metrics_provider.list_tests(**kwargs)
102
19
  ]
103
20
 
104
21
 
105
- def load_metric(metric_id):
106
- """Load a metric class from a string
107
-
108
- Args:
109
- metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
110
-
111
- Returns:
112
- callable: The metric function
113
- """
114
- return getattr(import_module(metric_id), metric_id.split(".")[-1])
115
-
116
-
117
- def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False):
118
- """Run a single metric and cache the results
119
-
120
- Args:
121
- metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.F1')
122
- inputs (dict): A dictionary of the metric inputs
123
- params (dict): A dictionary of the metric parameters
124
- show (bool): Whether to display the results
125
- value_only (bool): Whether to return only the value
126
- """
127
- inputs = {
128
- k: input_registry.get(v) if isinstance(v, str) else v
129
- for k, v in (inputs or {}).items()
130
- }
131
- params = params or {}
132
-
133
- cache_key = _get_metric_cache_key(metric_id, inputs, params)
134
-
135
- if cache_key not in unit_metric_results_cache:
136
- metric = load_metric(metric_id)
137
- _inputs, _params = _inspect_signature(metric)
138
-
139
- result = metric(
140
- **{k: v for k, v in inputs.items() if k in _inputs.keys()},
141
- **{
142
- k: v
143
- for k, v in params.items()
144
- if k in _params.keys() or "kwargs" in _params.keys()
145
- },
146
- )
147
- unit_metric_results_cache[cache_key] = (
148
- result,
149
- # store the input ids that were used to calculate the result
150
- [v.input_id for v in inputs.values()],
151
- # store the params that were used to calculate the result
152
- params,
153
- )
154
-
155
- cached_result = unit_metric_results_cache[cache_key]
22
+ def describe_metric(metric_id: str, **kwargs):
23
+ """Describe a metric"""
24
+ return describe_test(metric_id, **kwargs)
156
25
 
157
- if value_only:
158
- return cached_result[0]
159
26
 
160
- result_wrapper = _build_result(
161
- results=cached_result[0],
162
- test_id=metric_id,
163
- inputs=cached_result[1],
164
- params=cached_result[2],
165
- generate_description=False,
166
- )
27
+ def run_metric(metric_id: str, **kwargs):
28
+ """Run a metric"""
29
+ return run_test(metric_id, **kwargs)
167
30
 
168
- if show:
169
- result_wrapper.show()
170
31
 
171
- return result_wrapper
32
+ __all__ = ["list_metrics", "describe_metric", "run_metric"]
@@ -10,4 +10,5 @@ from validmind import tags, tasks
10
10
  @tasks("classification")
11
11
  @tags("classification")
12
12
  def F1(model, dataset, **kwargs):
13
+ """Calculates the F1 score for a classification model."""
13
14
  return f1_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -10,4 +10,5 @@ from validmind import tags, tasks
10
10
  @tasks("classification")
11
11
  @tags("classification")
12
12
  def Precision(model, dataset, **kwargs):
13
+ """Calculates the precision for a classification model."""
13
14
  return precision_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -12,6 +12,7 @@ from validmind import tags, tasks
12
12
  @tasks("classification")
13
13
  @tags("classification")
14
14
  def ROC_AUC(model, dataset, **kwargs):
15
+ """Calculates the ROC AUC for a classification model."""
15
16
  y_true = dataset.y
16
17
 
17
18
  if len(unique(y_true)) > 2:
@@ -10,4 +10,5 @@ from validmind import tags, tasks
10
10
  @tasks("classification")
11
11
  @tags("classification")
12
12
  def Recall(model, dataset, **kwargs):
13
+ """Calculates the recall for a classification model."""
13
14
  return recall_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -10,6 +10,7 @@ from validmind import tags, tasks
10
10
  @tags("regression")
11
11
  @tasks("regression")
12
12
  def AdjustedRSquaredScore(model, dataset):
13
+ """Calculates the adjusted R-squared score for a regression model."""
13
14
  r2_score = _r2_score(
14
15
  dataset.y,
15
16
  dataset.y_pred(model),
@@ -10,6 +10,7 @@ from validmind import tags, tasks
10
10
  @tags("regression")
11
11
  @tasks("regression")
12
12
  def GiniCoefficient(dataset, model):
13
+ """Calculates the Gini coefficient for a regression model."""
13
14
  y_true = dataset.y
14
15
  y_pred = dataset.y_pred(model)
15
16
 
@@ -10,6 +10,7 @@ from validmind import tags, tasks
10
10
  @tags("regression")
11
11
  @tasks("regression")
12
12
  def HuberLoss(model, dataset):
13
+ """Calculates the Huber loss for a regression model."""
13
14
  y_true = dataset.y
14
15
  y_pred = dataset.y_pred(model)
15
16
 
@@ -10,6 +10,7 @@ from validmind import tags, tasks
10
10
  @tags("regression")
11
11
  @tasks("regression")
12
12
  def KolmogorovSmirnovStatistic(dataset, model):
13
+ """Calculates the Kolmogorov-Smirnov statistic for a regression model."""
13
14
  y_true = dataset.y.flatten()
14
15
  y_pred = dataset.y_pred(model)
15
16
 
@@ -10,4 +10,5 @@ from validmind import tags, tasks
10
10
  @tags("regression")
11
11
  @tasks("regression")
12
12
  def MeanAbsoluteError(model, dataset, **kwargs):
13
+ """Calculates the mean absolute error for a regression model."""
13
14
  return _mean_absolute_error(dataset.y, dataset.y_pred(model), **kwargs)
@@ -10,6 +10,7 @@ from validmind import tags, tasks
10
10
  @tags("regression")
11
11
  @tasks("regression")
12
12
  def MeanAbsolutePercentageError(model, dataset):
13
+ """Calculates the mean absolute percentage error for a regression model."""
13
14
  y_true = dataset.y
14
15
  y_pred = dataset.y_pred(model)
15
16
 
@@ -10,4 +10,5 @@ from validmind import tags, tasks
10
10
  @tags("regression")
11
11
  @tasks("regression")
12
12
  def MeanBiasDeviation(model, dataset):
13
+ """Calculates the mean bias deviation for a regression model."""
13
14
  return np.mean(dataset.y - dataset.y_pred(model))
@@ -10,4 +10,5 @@ from validmind import tags, tasks
10
10
  @tags("regression")
11
11
  @tasks("regression")
12
12
  def MeanSquaredError(model, dataset, **kwargs):
13
+ """Calculates the mean squared error for a regression model."""
13
14
  return mean_squared_error(dataset.y, dataset.y_pred(model), **kwargs)
@@ -10,6 +10,7 @@ from validmind import tags, tasks
10
10
  @tags("regression")
11
11
  @tasks("regression")
12
12
  def QuantileLoss(model, dataset, quantile=0.5):
13
+ """Calculates the quantile loss for a regression model."""
13
14
  error = dataset.y - dataset.y_pred(model)
14
15
 
15
16
  return np.mean(np.maximum(quantile * error, (quantile - 1) * error))
@@ -9,5 +9,6 @@ from validmind import tags, tasks
9
9
 
10
10
  @tags("regression")
11
11
  @tasks("regression")
12
- def RSquaredError(model, dataset):
12
+ def RSquaredScore(model, dataset):
13
+ """Calculates the R-squared score for a regression model."""
13
14
  return r2_score(dataset.y, dataset.y_pred(model))
@@ -11,6 +11,7 @@ from validmind import tags, tasks
11
11
  @tags("regression")
12
12
  @tasks("regression")
13
13
  def RootMeanSquaredError(model, dataset, **kwargs):
14
+ """Calculates the root mean squared error for a regression model."""
14
15
  return np.sqrt(
15
16
  mean_squared_error(
16
17
  dataset.y,