validmind 2.5.24__py3-none-any.whl → 2.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. validmind/__init__.py +8 -17
  2. validmind/__version__.py +1 -1
  3. validmind/ai/test_descriptions.py +66 -85
  4. validmind/ai/test_result_description/context.py +2 -2
  5. validmind/ai/utils.py +26 -1
  6. validmind/api_client.py +43 -79
  7. validmind/client.py +5 -7
  8. validmind/client_config.py +1 -1
  9. validmind/datasets/__init__.py +1 -1
  10. validmind/datasets/classification/customer_churn.py +7 -5
  11. validmind/datasets/nlp/__init__.py +2 -2
  12. validmind/errors.py +6 -10
  13. validmind/html_templates/content_blocks.py +18 -16
  14. validmind/logging.py +21 -16
  15. validmind/tests/__init__.py +28 -5
  16. validmind/tests/__types__.py +186 -170
  17. validmind/tests/_store.py +7 -21
  18. validmind/tests/comparison.py +362 -0
  19. validmind/tests/data_validation/ACFandPACFPlot.py +44 -73
  20. validmind/tests/data_validation/ADF.py +49 -83
  21. validmind/tests/data_validation/AutoAR.py +59 -96
  22. validmind/tests/data_validation/AutoMA.py +59 -96
  23. validmind/tests/data_validation/AutoStationarity.py +66 -114
  24. validmind/tests/data_validation/ClassImbalance.py +48 -117
  25. validmind/tests/data_validation/DatasetDescription.py +180 -209
  26. validmind/tests/data_validation/DatasetSplit.py +50 -75
  27. validmind/tests/data_validation/DescriptiveStatistics.py +59 -85
  28. validmind/tests/data_validation/{DFGLSArch.py → DickeyFullerGLS.py} +44 -76
  29. validmind/tests/data_validation/Duplicates.py +21 -90
  30. validmind/tests/data_validation/EngleGrangerCoint.py +53 -75
  31. validmind/tests/data_validation/HighCardinality.py +32 -80
  32. validmind/tests/data_validation/HighPearsonCorrelation.py +29 -97
  33. validmind/tests/data_validation/IQROutliersBarPlot.py +63 -94
  34. validmind/tests/data_validation/IQROutliersTable.py +40 -80
  35. validmind/tests/data_validation/IsolationForestOutliers.py +41 -63
  36. validmind/tests/data_validation/KPSS.py +33 -81
  37. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +47 -95
  38. validmind/tests/data_validation/MissingValues.py +17 -58
  39. validmind/tests/data_validation/MissingValuesBarPlot.py +61 -87
  40. validmind/tests/data_validation/PhillipsPerronArch.py +56 -79
  41. validmind/tests/data_validation/RollingStatsPlot.py +50 -81
  42. validmind/tests/data_validation/SeasonalDecompose.py +102 -184
  43. validmind/tests/data_validation/Skewness.py +27 -64
  44. validmind/tests/data_validation/SpreadPlot.py +34 -57
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +46 -65
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +23 -45
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +27 -46
  48. validmind/tests/data_validation/TargetRateBarPlots.py +54 -93
  49. validmind/tests/data_validation/TimeSeriesFrequency.py +48 -133
  50. validmind/tests/data_validation/TimeSeriesHistogram.py +24 -3
  51. validmind/tests/data_validation/TimeSeriesLinePlot.py +29 -47
  52. validmind/tests/data_validation/TimeSeriesMissingValues.py +59 -135
  53. validmind/tests/data_validation/TimeSeriesOutliers.py +54 -171
  54. validmind/tests/data_validation/TooManyZeroValues.py +21 -70
  55. validmind/tests/data_validation/UniqueRows.py +23 -62
  56. validmind/tests/data_validation/WOEBinPlots.py +83 -109
  57. validmind/tests/data_validation/WOEBinTable.py +28 -69
  58. validmind/tests/data_validation/ZivotAndrewsArch.py +33 -75
  59. validmind/tests/data_validation/nlp/CommonWords.py +49 -57
  60. validmind/tests/data_validation/nlp/Hashtags.py +27 -49
  61. validmind/tests/data_validation/nlp/LanguageDetection.py +7 -13
  62. validmind/tests/data_validation/nlp/Mentions.py +32 -63
  63. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +89 -14
  64. validmind/tests/data_validation/nlp/Punctuations.py +63 -47
  65. validmind/tests/data_validation/nlp/Sentiment.py +4 -0
  66. validmind/tests/data_validation/nlp/StopWords.py +62 -91
  67. validmind/tests/data_validation/nlp/TextDescription.py +116 -159
  68. validmind/tests/data_validation/nlp/Toxicity.py +12 -4
  69. validmind/tests/decorator.py +33 -242
  70. validmind/tests/load.py +212 -153
  71. validmind/tests/model_validation/BertScore.py +13 -7
  72. validmind/tests/model_validation/BleuScore.py +4 -0
  73. validmind/tests/model_validation/ClusterSizeDistribution.py +24 -47
  74. validmind/tests/model_validation/ContextualRecall.py +3 -0
  75. validmind/tests/model_validation/FeaturesAUC.py +43 -74
  76. validmind/tests/model_validation/MeteorScore.py +3 -0
  77. validmind/tests/model_validation/RegardScore.py +5 -1
  78. validmind/tests/model_validation/RegressionResidualsPlot.py +54 -75
  79. validmind/tests/model_validation/embeddings/ClusterDistribution.py +10 -33
  80. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +11 -29
  81. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +19 -31
  82. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +40 -49
  83. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +29 -15
  84. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +25 -11
  85. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +28 -13
  86. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +67 -38
  87. validmind/tests/model_validation/embeddings/utils.py +53 -0
  88. validmind/tests/model_validation/ragas/AnswerCorrectness.py +37 -32
  89. validmind/tests/model_validation/ragas/{AspectCritique.py → AspectCritic.py} +33 -27
  90. validmind/tests/model_validation/ragas/ContextEntityRecall.py +44 -41
  91. validmind/tests/model_validation/ragas/ContextPrecision.py +40 -35
  92. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +133 -0
  93. validmind/tests/model_validation/ragas/ContextRecall.py +40 -35
  94. validmind/tests/model_validation/ragas/Faithfulness.py +42 -30
  95. validmind/tests/model_validation/ragas/NoiseSensitivity.py +59 -35
  96. validmind/tests/model_validation/ragas/{AnswerRelevance.py → ResponseRelevancy.py} +52 -41
  97. validmind/tests/model_validation/ragas/{AnswerSimilarity.py → SemanticSimilarity.py} +39 -34
  98. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +13 -16
  99. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +13 -16
  100. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +51 -89
  101. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +31 -61
  102. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +118 -83
  103. validmind/tests/model_validation/sklearn/CompletenessScore.py +13 -16
  104. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +62 -94
  105. validmind/tests/model_validation/sklearn/FeatureImportance.py +7 -8
  106. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -15
  107. validmind/tests/model_validation/sklearn/HomogeneityScore.py +12 -15
  108. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +23 -53
  109. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +60 -74
  110. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +16 -84
  111. validmind/tests/model_validation/sklearn/MinimumF1Score.py +22 -72
  112. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +29 -78
  113. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +52 -82
  114. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +51 -145
  115. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +60 -78
  116. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +130 -172
  117. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +26 -55
  118. validmind/tests/model_validation/sklearn/ROCCurve.py +43 -77
  119. validmind/tests/model_validation/sklearn/RegressionPerformance.py +41 -94
  120. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +47 -136
  121. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +164 -208
  122. validmind/tests/model_validation/sklearn/SilhouettePlot.py +54 -99
  123. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +50 -124
  124. validmind/tests/model_validation/sklearn/VMeasure.py +12 -15
  125. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +225 -281
  126. validmind/tests/model_validation/statsmodels/AutoARIMA.py +40 -45
  127. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +22 -47
  128. validmind/tests/model_validation/statsmodels/Lilliefors.py +17 -28
  129. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +37 -81
  130. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +37 -105
  131. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +62 -166
  132. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +57 -119
  133. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +20 -57
  134. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +47 -80
  135. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +2 -0
  136. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -2
  137. validmind/tests/output.py +120 -0
  138. validmind/tests/prompt_validation/Bias.py +55 -98
  139. validmind/tests/prompt_validation/Clarity.py +56 -99
  140. validmind/tests/prompt_validation/Conciseness.py +63 -101
  141. validmind/tests/prompt_validation/Delimitation.py +48 -89
  142. validmind/tests/prompt_validation/NegativeInstruction.py +62 -96
  143. validmind/tests/prompt_validation/Robustness.py +80 -121
  144. validmind/tests/prompt_validation/Specificity.py +61 -95
  145. validmind/tests/prompt_validation/ai_powered_test.py +2 -2
  146. validmind/tests/run.py +314 -496
  147. validmind/tests/test_providers.py +109 -79
  148. validmind/tests/utils.py +91 -0
  149. validmind/unit_metrics/__init__.py +16 -155
  150. validmind/unit_metrics/classification/F1.py +1 -0
  151. validmind/unit_metrics/classification/Precision.py +1 -0
  152. validmind/unit_metrics/classification/ROC_AUC.py +1 -0
  153. validmind/unit_metrics/classification/Recall.py +1 -0
  154. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +1 -0
  155. validmind/unit_metrics/regression/GiniCoefficient.py +1 -0
  156. validmind/unit_metrics/regression/HuberLoss.py +1 -0
  157. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -0
  158. validmind/unit_metrics/regression/MeanAbsoluteError.py +1 -0
  159. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -0
  160. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -0
  161. validmind/unit_metrics/regression/MeanSquaredError.py +1 -0
  162. validmind/unit_metrics/regression/QuantileLoss.py +1 -0
  163. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  164. validmind/unit_metrics/regression/RootMeanSquaredError.py +1 -0
  165. validmind/utils.py +66 -17
  166. validmind/vm_models/__init__.py +2 -17
  167. validmind/vm_models/dataset/dataset.py +31 -4
  168. validmind/vm_models/figure.py +7 -37
  169. validmind/vm_models/model.py +3 -0
  170. validmind/vm_models/result/__init__.py +7 -0
  171. validmind/vm_models/result/result.jinja +21 -0
  172. validmind/vm_models/result/result.py +337 -0
  173. validmind/vm_models/result/utils.py +160 -0
  174. validmind/vm_models/test_suite/runner.py +16 -54
  175. validmind/vm_models/test_suite/summary.py +3 -3
  176. validmind/vm_models/test_suite/test.py +43 -77
  177. validmind/vm_models/test_suite/test_suite.py +8 -40
  178. validmind-2.6.7.dist-info/METADATA +137 -0
  179. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/RECORD +182 -189
  180. validmind/tests/data_validation/AutoSeasonality.py +0 -190
  181. validmind/tests/metadata.py +0 -59
  182. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -176
  183. validmind/tests/model_validation/ragas/ContextUtilization.py +0 -161
  184. validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -80
  185. validmind/unit_metrics/composite.py +0 -238
  186. validmind/vm_models/test/metric.py +0 -98
  187. validmind/vm_models/test/metric_result.py +0 -61
  188. validmind/vm_models/test/output_template.py +0 -55
  189. validmind/vm_models/test/result_summary.py +0 -76
  190. validmind/vm_models/test/result_wrapper.py +0 -488
  191. validmind/vm_models/test/test.py +0 -103
  192. validmind/vm_models/test/threshold_test.py +0 -106
  193. validmind/vm_models/test/threshold_test_result.py +0 -75
  194. validmind/vm_models/test_context.py +0 -259
  195. validmind-2.5.24.dist-info/METADATA +0 -118
  196. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/LICENSE +0 -0
  197. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/WHEEL +0 -0
  198. {validmind-2.5.24.dist-info → validmind-2.6.7.dist-info}/entry_points.txt +0 -0
@@ -2,216 +2,28 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- """Decorators for creating and registering metrics with the ValidMind framework."""
6
-
7
- # TODO: as we move entirely to a functional approach a lot of this logic
8
- # should be moved into the __init__ to replace the old class-based stuff
5
+ """Decorators for creating and registering tests with the ValidMind Library."""
9
6
 
10
7
  import inspect
11
8
  import os
12
- from typing import Any, Dict, List, Tuple, Union
13
- from uuid import uuid4
14
-
15
- import pandas as pd
9
+ from functools import wraps
16
10
 
17
- from validmind.ai.test_descriptions import get_description_metadata
18
- from validmind.errors import MissingRequiredTestInputError
19
11
  from validmind.logging import get_logger
20
- from validmind.vm_models import (
21
- Metric,
22
- MetricResult,
23
- ResultSummary,
24
- ResultTable,
25
- ResultTableMetadata,
26
- VMDataset,
27
- VMModel,
28
- )
29
- from validmind.vm_models.figure import (
30
- Figure,
31
- is_matplotlib_figure,
32
- is_plotly_figure,
33
- is_png_image,
34
- )
35
- from validmind.vm_models.test.result_wrapper import MetricResultWrapper
36
12
 
37
13
  from ._store import test_store
14
+ from .load import load_test
38
15
 
39
16
  logger = get_logger(__name__)
40
17
 
41
18
 
42
- _input_type_map = {
43
- "dataset": VMDataset,
44
- "datasets": List[VMDataset],
45
- "model": VMModel,
46
- "models": List[VMModel],
47
- }
48
-
49
-
50
- def _inspect_signature(test_func: callable):
51
- inputs = {}
52
- params = {}
53
-
54
- for name, arg in inspect.signature(test_func).parameters.items():
55
- if name in _input_type_map:
56
- inputs[name] = {
57
- "type": _input_type_map[name],
58
- }
59
- else:
60
- params[name] = {
61
- "type": arg.annotation,
62
- "default": (
63
- arg.default if arg.default is not inspect.Parameter.empty else None
64
- ),
65
- }
66
-
67
- return inputs, params
68
-
69
-
70
- def _build_result( # noqa: C901
71
- results: Union[Any, Tuple[Any, ...]],
72
- test_id: str,
73
- inputs: List[str],
74
- params: Dict[str, Any],
75
- description: str = None,
76
- output_template: str = None,
77
- generate_description: bool = True,
78
- ):
79
- ref_id = str(uuid4())
80
- figure_metadata = {
81
- "_type": "metric",
82
- "_name": test_id,
83
- "_ref_id": ref_id,
84
- }
85
-
86
- tables = []
87
- figures = []
88
- scalars = []
89
-
90
- def process_result_item(item):
91
- # TOOD: build out a more robust/extensible system for this
92
- # TODO: custom type handlers would be really cool
93
-
94
- # unit metrics (scalar values) - for now only one per test
95
- if isinstance(item, int) or isinstance(item, float):
96
- if scalars:
97
- raise ValueError("Only one unit metric may be returned per test.")
98
- scalars.append(item)
99
-
100
- # plots
101
- elif isinstance(item, Figure):
102
- figures.append(item)
103
- elif is_matplotlib_figure(item) or is_plotly_figure(item) or is_png_image(item):
104
- figures.append(
105
- Figure(
106
- key=f"{test_id}:{len(figures) + 1}",
107
- figure=item,
108
- metadata=figure_metadata,
109
- )
110
- )
111
-
112
- # tables
113
- elif isinstance(item, list) or isinstance(item, pd.DataFrame):
114
- tables.append(ResultTable(data=item))
115
- elif isinstance(item, dict):
116
- for table_name, table in item.items():
117
- if not isinstance(table, list) and not isinstance(table, pd.DataFrame):
118
- raise ValueError(
119
- f"Invalid table format: {table_name} must be a list or DataFrame"
120
- )
121
-
122
- tables.append(
123
- ResultTable(
124
- data=table,
125
- metadata=ResultTableMetadata(title=table_name),
126
- )
127
- )
128
-
129
- else:
130
- raise ValueError(f"Invalid return type: {type(item)}")
131
-
132
- # if the results are a tuple, process each item as a separate result
133
- if isinstance(results, tuple):
134
- for item in results:
135
- process_result_item(item)
136
- else:
137
- process_result_item(results)
138
-
139
- metric_inputs = [
140
- sub_i.input_id if hasattr(sub_i, "input_id") else sub_i
141
- for i in inputs
142
- for sub_i in (i if isinstance(i, list) else [i])
143
- ]
144
-
145
- return MetricResultWrapper(
146
- result_id=test_id,
147
- scalar=scalars[0] if scalars else None,
148
- metric=(
149
- MetricResult(
150
- key=test_id,
151
- ref_id=ref_id,
152
- value="Empty",
153
- summary=ResultSummary(results=tables),
154
- )
155
- if tables or figures # if tables or figures than its a traditional metric
156
- else None
157
- ),
158
- figures=figures,
159
- result_metadata=(
160
- [
161
- get_description_metadata(
162
- test_id=test_id,
163
- default_description=description,
164
- summary=ResultSummary(results=tables).serialize(),
165
- figures=figures,
166
- should_generate=generate_description,
167
- )
168
- ]
169
- if tables or figures
170
- else None
171
- ),
172
- inputs=metric_inputs,
173
- params=params,
174
- output_template=output_template,
175
- )
176
-
177
-
178
- def _get_run_method(func, func_inputs, func_params):
179
- def run(self: Metric):
180
- input_kwargs = {} # map function inputs (`dataset` etc) to actual objects
181
- input_ids = [] # store input_ids used so they can be logged
182
- for key in func_inputs.keys():
183
- try:
184
- input_kwargs[key] = getattr(self.inputs, key)
185
- if isinstance(input_kwargs[key], list):
186
- input_ids.extend([i.input_id for i in input_kwargs[key]])
187
- else:
188
- input_ids.append(input_kwargs[key].input_id)
189
- except AttributeError:
190
- raise MissingRequiredTestInputError(f"Missing required input: {key}.")
191
-
192
- param_kwargs = {
193
- key: self.params.get(key, func_params[key]["default"])
194
- for key in func_params.keys()
195
- }
196
-
197
- raw_results = func(**input_kwargs, **param_kwargs)
198
-
199
- self.result = _build_result(
200
- results=raw_results,
201
- test_id=self.test_id,
202
- description=inspect.getdoc(self),
203
- inputs=input_ids,
204
- params=param_kwargs,
205
- output_template=self.output_template,
206
- generate_description=self.generate_description,
207
- )
208
-
209
- return self.result
210
-
211
- return run
19
+ def _get_save_func(func, test_id):
20
+ """Helper function to save a decorated function to a file
212
21
 
22
+ Useful when a custom test function has been created inline in a notebook or
23
+ interactive session and needs to be saved to a file so it can be added to a
24
+ test library.
25
+ """
213
26
 
214
- def _get_save_func(func, test_id):
215
27
  def save(root_folder=".", imports=None):
216
28
  parts = test_id.split(".")
217
29
 
@@ -270,34 +82,26 @@ def _get_save_func(func, test_id):
270
82
  return save
271
83
 
272
84
 
273
- def metric(func_or_id):
274
- """
275
- DEPRECATED, use @vm.test instead
276
- """
277
- # print a deprecation notice and call the test() function instead
278
- logger.warning(
279
- "The @vm.metric decorator is deprecated and will be removed in a future release. "
280
- "Please use @vm.test instead."
281
- )
282
- return test(func_or_id)
85
+ def test(func_or_id):
86
+ """Decorator for creating and registering custom tests
283
87
 
88
+ This decorator registers the function it wraps as a test function within ValidMind
89
+ under the provided ID. Once decorated, the function can be run using the
90
+ `validmind.tests.run_test` function.
284
91
 
285
- def test(func_or_id):
286
- """Decorator for creating and registering metrics with the ValidMind framework.
92
+ The function can take two different types of arguments:
287
93
 
288
- Creates a metric object and registers it with ValidMind under the provided ID. If
289
- no ID is provided, the function name will be used as to build one. So if the
290
- function name is `my_metric`, the metric will be registered under the ID
291
- `validmind.custom_metrics.my_metric`.
94
+ - Inputs: ValidMind model or dataset (or list of models/datasets). These arguments
95
+ must use the following names: `model`, `models`, `dataset`, `datasets`.
96
+ - Parameters: Any additional keyword arguments of any type (must have a default
97
+ value) that can have any name.
292
98
 
293
- This decorator works by creating a new `Metric` class will be created whose `run`
294
- method calls the decorated function. This function should take as arguments the
295
- inputs it requires (`dataset`, `datasets`, `model`, `models`) followed by any
296
- parameters. It can return any number of the following types:
99
+ The function should return one of the following types:
297
100
 
298
101
  - Table: Either a list of dictionaries or a pandas DataFrame
299
102
  - Plot: Either a matplotlib figure or a plotly figure
300
- - Scalar: A single number or string
103
+ - Scalar: A single number (int or float)
104
+ - Boolean: A single boolean value indicating whether the test passed or failed
301
105
 
302
106
  The function may also include a docstring. This docstring will be used and logged
303
107
  as the metric's description.
@@ -312,30 +116,17 @@ def test(func_or_id):
312
116
 
313
117
  def decorator(func):
314
118
  test_id = func_or_id or f"validmind.custom_metrics.{func.__name__}"
119
+ test_func = load_test(test_id, func, reload=True)
120
+ test_store.register_test(test_id, test_func)
315
121
 
316
- inputs, params = _inspect_signature(func)
317
- description = inspect.getdoc(func)
318
- tasks = getattr(func, "__tasks__", [])
319
- tags = getattr(func, "__tags__", [])
320
-
321
- metric_class = type(
322
- func.__name__,
323
- (Metric,),
324
- {
325
- "run": _get_run_method(func, inputs, params),
326
- "required_inputs": list(inputs.keys()),
327
- "default_params": {k: v["default"] for k, v in params.items()},
328
- "__doc__": description,
329
- "tasks": tasks,
330
- "tags": tags,
331
- },
332
- )
333
- test_store.register_custom_test(test_id, metric_class)
122
+ @wraps(test_func)
123
+ def wrapper(*args, **kwargs):
124
+ return test_func(*args, **kwargs)
334
125
 
335
126
  # special function to allow the function to be saved to a file
336
- func.save = _get_save_func(func, test_id)
127
+ wrapper.save = _get_save_func(test_func, test_id)
337
128
 
338
- return func
129
+ return wrapper
339
130
 
340
131
  if callable(func_or_id):
341
132
  return decorator(func_or_id)
@@ -344,10 +135,10 @@ def test(func_or_id):
344
135
 
345
136
 
346
137
  def tasks(*tasks):
347
- """Decorator for specifying the task types that a metric is designed for.
138
+ """Decorator for specifying the task types that a test is designed for.
348
139
 
349
140
  Args:
350
- *tasks: The task types that the metric is designed for.
141
+ *tasks: The task types that the test is designed for.
351
142
  """
352
143
 
353
144
  def decorator(func):
@@ -358,10 +149,10 @@ def tasks(*tasks):
358
149
 
359
150
 
360
151
  def tags(*tags):
361
- """Decorator for specifying tags for a metric.
152
+ """Decorator for specifying tags for a test.
362
153
 
363
154
  Args:
364
- *tags: The tags to apply to the metric.
155
+ *tags: The tags to apply to the test.
365
156
  """
366
157
 
367
158
  def decorator(func):