validmind 2.0.7__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. validmind/__init__.py +3 -3
  2. validmind/__version__.py +1 -1
  3. validmind/ai.py +7 -11
  4. validmind/api_client.py +29 -27
  5. validmind/client.py +10 -3
  6. validmind/datasets/credit_risk/__init__.py +11 -0
  7. validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
  8. validmind/datasets/credit_risk/lending_club.py +394 -0
  9. validmind/logging.py +9 -2
  10. validmind/template.py +2 -2
  11. validmind/test_suites/__init__.py +4 -2
  12. validmind/tests/__init__.py +97 -50
  13. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  14. validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
  15. validmind/tests/data_validation/ScatterPlot.py +8 -2
  16. validmind/tests/decorator.py +138 -14
  17. validmind/tests/model_validation/BertScore.py +1 -1
  18. validmind/tests/model_validation/BertScoreAggregate.py +1 -1
  19. validmind/tests/model_validation/BleuScore.py +1 -1
  20. validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
  21. validmind/tests/model_validation/ContextualRecall.py +1 -1
  22. validmind/tests/model_validation/FeaturesAUC.py +110 -0
  23. validmind/tests/model_validation/MeteorScore.py +1 -1
  24. validmind/tests/model_validation/RegardHistogram.py +1 -1
  25. validmind/tests/model_validation/RegardScore.py +1 -1
  26. validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
  27. validmind/tests/model_validation/RougeMetrics.py +1 -1
  28. validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
  29. validmind/tests/model_validation/SelfCheckNLIScore.py +1 -1
  30. validmind/tests/model_validation/TokenDisparity.py +1 -1
  31. validmind/tests/model_validation/ToxicityHistogram.py +1 -1
  32. validmind/tests/model_validation/ToxicityScore.py +1 -1
  33. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  34. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
  35. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +1 -1
  36. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
  37. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +15 -18
  38. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
  39. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  40. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
  41. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
  42. validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
  43. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
  44. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
  45. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
  46. validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
  47. validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
  48. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
  49. validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
  50. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +27 -3
  51. validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
  52. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
  53. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
  54. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
  55. validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
  56. validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
  57. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
  58. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
  59. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
  60. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
  61. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
  62. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +1 -1
  63. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
  64. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
  65. validmind/tests/test_providers.py +14 -124
  66. validmind/unit_metrics/__init__.py +76 -69
  67. validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
  68. validmind/unit_metrics/classification/sklearn/F1.py +13 -0
  69. validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
  70. validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
  71. validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
  72. validmind/unit_metrics/composite.py +24 -71
  73. validmind/unit_metrics/regression/GiniCoefficient.py +20 -26
  74. validmind/unit_metrics/regression/HuberLoss.py +12 -16
  75. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +18 -24
  76. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +7 -13
  77. validmind/unit_metrics/regression/MeanBiasDeviation.py +5 -14
  78. validmind/unit_metrics/regression/QuantileLoss.py +6 -16
  79. validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +12 -18
  80. validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +6 -15
  81. validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +5 -14
  82. validmind/unit_metrics/regression/sklearn/RSquaredScore.py +6 -15
  83. validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +11 -14
  84. validmind/utils.py +18 -45
  85. validmind/vm_models/__init__.py +0 -2
  86. validmind/vm_models/dataset.py +255 -16
  87. validmind/vm_models/test/metric.py +1 -2
  88. validmind/vm_models/test/result_wrapper.py +12 -13
  89. validmind/vm_models/test/test.py +2 -1
  90. validmind/vm_models/test/threshold_test.py +1 -2
  91. validmind/vm_models/test_suite/summary.py +3 -3
  92. validmind/vm_models/test_suite/test_suite.py +2 -1
  93. {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/METADATA +10 -6
  94. {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/RECORD +97 -96
  95. validmind/tests/__types__.py +0 -62
  96. validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
  97. validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
  98. validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
  99. validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
  100. validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -22
  101. validmind/unit_metrics/sklearn/classification/F1.py +0 -24
  102. validmind/unit_metrics/sklearn/classification/Precision.py +0 -24
  103. validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -22
  104. validmind/unit_metrics/sklearn/classification/Recall.py +0 -22
  105. validmind/vm_models/test/unit_metric.py +0 -88
  106. {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/LICENSE +0 -0
  107. {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/WHEEL +0 -0
  108. {validmind-2.0.7.dist-info → validmind-2.1.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,13 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import f1_score
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("classification", "sklearn", "unit_metric")
11
+ @tasks("classification")
12
+ def F1(model, dataset, **kwargs):
13
+ return f1_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -0,0 +1,13 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import precision_score
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("classification", "sklearn", "unit_metric")
11
+ @tasks("classification")
12
+ def Precision(model, dataset, **kwargs):
13
+ return precision_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -0,0 +1,13 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import roc_auc_score
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("classification", "sklearn", "unit_metric")
11
+ @tasks("classification")
12
+ def ROC_AUC(model, dataset, **kwargs):
13
+ return roc_auc_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -0,0 +1,13 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import recall_score
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("classification", "sklearn", "unit_metric")
11
+ @tasks("classification")
12
+ def Recall(model, dataset, **kwargs):
13
+ return recall_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -2,75 +2,22 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- import ast
6
- import inspect
7
5
  from dataclasses import dataclass
8
- from typing import List
6
+ from typing import List, Tuple, Union
9
7
  from uuid import uuid4
10
8
 
11
- from ..errors import LoadTestError
12
9
  from ..logging import get_logger
13
- from ..utils import clean_docstring, run_async, test_id_to_name
10
+ from ..tests.decorator import _inspect_signature
11
+ from ..utils import run_async, test_id_to_name
14
12
  from ..vm_models.test.metric import Metric
15
13
  from ..vm_models.test.metric_result import MetricResult
16
14
  from ..vm_models.test.result_summary import ResultSummary, ResultTable
17
15
  from ..vm_models.test.result_wrapper import MetricResultWrapper
18
- from . import _get_metric_class, run_metric
16
+ from . import load_metric, run_metric
19
17
 
20
18
  logger = get_logger(__name__)
21
19
 
22
20
 
23
- def _extract_class_methods(cls):
24
- source = inspect.getsource(cls)
25
- tree = ast.parse(source)
26
-
27
- class MethodVisitor(ast.NodeVisitor):
28
- def __init__(self):
29
- self.methods = {}
30
-
31
- def visit_FunctionDef(self, node):
32
- self.methods[node.name] = node
33
- self.generic_visit(node)
34
-
35
- visitor = MethodVisitor()
36
- visitor.visit(tree)
37
-
38
- return visitor.methods
39
-
40
-
41
- def _extract_required_inputs(cls):
42
- methods = _extract_class_methods(cls)
43
-
44
- class Visitor(ast.NodeVisitor):
45
- def __init__(self):
46
- self.properties = set()
47
- self.visited_methods = set()
48
-
49
- def visit_Attribute(self, node):
50
- if isinstance(node.value, ast.Attribute) and node.value.attr == "inputs":
51
- self.properties.add(node.attr)
52
-
53
- self.generic_visit(node)
54
-
55
- def visit_Call(self, node):
56
- if isinstance(node.func, ast.Attribute) and isinstance(
57
- node.func.value, ast.Name
58
- ):
59
- if node.func.value.id == "self" and node.func.attr in methods:
60
- method_name = node.func.attr
61
-
62
- if method_name not in self.visited_methods:
63
- self.visited_methods.add(method_name)
64
- self.visit(methods[method_name])
65
-
66
- self.generic_visit(node)
67
-
68
- visitor = Visitor()
69
- visitor.visit(methods["run"])
70
-
71
- return visitor.properties
72
-
73
-
74
21
  @dataclass
75
22
  class CompositeMetric(Metric):
76
23
  unit_metrics: List[str] = None
@@ -106,7 +53,7 @@ def load_composite_metric(
106
53
  metric_name: str = None,
107
54
  unit_metrics: List[str] = None,
108
55
  output_template: str = None,
109
- ) -> CompositeMetric:
56
+ ) -> Tuple[Union[None, str], Union[CompositeMetric, None]]:
110
57
  # this function can either create a composite metric from a list of unit metrics or
111
58
  # load a stored composite metric based on the test id
112
59
 
@@ -123,8 +70,7 @@ def load_composite_metric(
123
70
  get_metadata, f"composite_metric_def:{test_id}:output_template"
124
71
  )["json"]["output_template"]
125
72
  except Exception:
126
- logger.error(f"Could not load composite metric {test_id}")
127
- raise LoadTestError(f"Could not load composite metric {test_id}")
73
+ return f"Could not load composite metric {test_id}", None
128
74
 
129
75
  description = f"""
130
76
  Composite metric built from the following unit metrics:
@@ -143,13 +89,12 @@ def load_composite_metric(
143
89
 
144
90
  required_inputs = set()
145
91
  for metric_id in unit_metrics:
146
- metric_cls = _get_metric_class(metric_id)
147
- # required_inputs.update(_extract_required_inputs(metric_cls))
148
- required_inputs.update(metric_cls.required_inputs or [])
92
+ inputs, _ = _inspect_signature(load_metric(metric_id))
93
+ required_inputs.update(inputs.keys())
149
94
 
150
95
  class_def.required_inputs = list(required_inputs)
151
96
 
152
- return class_def
97
+ return None, class_def
153
98
 
154
99
 
155
100
  def run_metrics(
@@ -209,22 +154,24 @@ def run_metrics(
209
154
  results = {}
210
155
 
211
156
  for metric_id in metric_ids:
212
- result = run_metric(
157
+ metric_name = test_id_to_name(metric_id)
158
+ results[metric_name] = run_metric(
213
159
  metric_id=metric_id,
214
160
  inputs=inputs,
215
161
  params=params,
162
+ show=False,
163
+ value_only=True,
216
164
  )
217
- results[list(result.summary.keys())[0]] = result.value
218
165
 
219
166
  test_id = f"validmind.composite_metric.{name}" if not test_id else test_id
220
167
 
221
168
  if not output_template:
222
169
 
223
- def row(key):
170
+ def row(name):
224
171
  return f"""
225
172
  <tr>
226
- <td><strong>{key.upper()}</strong></td>
227
- <td>{{{{ value['{key}'] | number }}}}</td>
173
+ <td><strong>{name}</strong></td>
174
+ <td>{{{{ value['{name}'] | number }}}}</td>
228
175
  </tr>
229
176
  """
230
177
 
@@ -238,9 +185,15 @@ def run_metrics(
238
185
  </tr>
239
186
  </thead>
240
187
  <tbody>
241
- {"".join([row(key) for key in results.keys()])}
188
+ {"".join([row(name) for name in results.keys()])}
242
189
  </tbody>
243
190
  </table>
191
+ <style>
192
+ th, td {{
193
+ padding: 5px;
194
+ text-align: left;
195
+ }}
196
+ </style>
244
197
  """
245
198
 
246
199
  result_wrapper = MetricResultWrapper(
@@ -248,7 +201,7 @@ def run_metrics(
248
201
  result_metadata=[
249
202
  {
250
203
  "content_id": f"metric_description:{test_id}",
251
- "text": clean_docstring(description),
204
+ "text": description,
252
205
  },
253
206
  {
254
207
  "content_id": f"composite_metric_def:{test_id}:unit_metrics",
@@ -2,38 +2,32 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  import numpy as np
8
6
 
9
- from validmind.vm_models import UnitMetric
10
-
11
-
12
- @dataclass
13
- class GiniCoefficient(UnitMetric):
14
- required_inputs = ["dataset", "model"]
7
+ from validmind import tags, tasks
15
8
 
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
9
 
20
- # Sort true values and corresponding predicted values
21
- idx = np.argsort(y_true)
22
- y_true_sorted = y_true[idx]
23
- y_pred_sorted = y_pred[idx]
10
+ @tags("regression", "unit_metric")
11
+ @tasks("regression")
12
+ def GiniCoefficient(dataset, model):
13
+ y_true = dataset.y
14
+ y_pred = dataset.y_pred(model)
24
15
 
25
- # Compute cumulative sums
26
- cumsum_true = np.cumsum(y_true_sorted)
27
- cumsum_pred = np.cumsum(y_pred_sorted)
16
+ # Sort true values and corresponding predicted values
17
+ idx = np.argsort(y_true)
18
+ y_true_sorted = y_true[idx]
19
+ y_pred_sorted = y_pred[idx]
28
20
 
29
- # Normalize cumulative sums
30
- cumsum_true_norm = cumsum_true / np.max(cumsum_true)
31
- cumsum_pred_norm = cumsum_pred / np.max(cumsum_pred)
21
+ # Compute cumulative sums
22
+ cumsum_true = np.cumsum(y_true_sorted)
23
+ cumsum_pred = np.cumsum(y_pred_sorted)
32
24
 
33
- # Compute area under the Lorenz curve
34
- area_lorenz = np.trapz(cumsum_pred_norm, x=cumsum_true_norm)
25
+ # Normalize cumulative sums
26
+ cumsum_true_norm = cumsum_true / np.max(cumsum_true)
27
+ cumsum_pred_norm = cumsum_pred / np.max(cumsum_pred)
35
28
 
36
- # Compute Gini coefficient
37
- gini_coeff = 1 - 2 * area_lorenz
29
+ # Compute area under the Lorenz curve
30
+ area_lorenz = np.trapz(cumsum_pred_norm, x=cumsum_true_norm)
38
31
 
39
- return self.cache_results(metric_value=gini_coeff)
32
+ # Compute Gini coefficient
33
+ return 1 - 2 * area_lorenz
@@ -2,26 +2,22 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  import numpy as np
8
6
 
9
- from validmind.vm_models import UnitMetric
7
+ from validmind import tags, tasks
10
8
 
11
9
 
12
- @dataclass
13
- class HuberLoss(UnitMetric):
14
- required_inputs = ["dataset", "model"]
10
+ @tags("regression", "unit_metric")
11
+ @tasks("regression")
12
+ def HuberLoss(model, dataset):
13
+ y_true = dataset.y
14
+ y_pred = dataset.y_pred(model)
15
15
 
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
16
+ # delta - Threshold for the squared error to be linear or quadratic.
17
+ delta = 1.0
18
+ error = y_true - y_pred
19
19
 
20
- # delta - Threshold for the squared error to be linear or quadratic.
21
- delta = 1.0
22
- error = y_true - y_pred
23
- quadratic_part = np.minimum(np.abs(error), delta)
24
- linear_part = np.abs(error) - quadratic_part
25
- value = np.mean(0.5 * quadratic_part**2 + delta * linear_part)
20
+ quadratic_part = np.minimum(np.abs(error), delta)
21
+ linear_part = np.abs(error) - quadratic_part
26
22
 
27
- return self.cache_results(metric_value=value)
23
+ return np.mean(0.5 * quadratic_part**2 + delta * linear_part)
@@ -2,35 +2,29 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  import numpy as np
8
6
 
9
- from validmind.vm_models import UnitMetric
10
-
11
-
12
- @dataclass
13
- class KolmogorovSmirnovStatistic(UnitMetric):
14
- required_inputs = ["dataset", "model"]
7
+ from validmind import tags, tasks
15
8
 
16
- def run(self):
17
- y_true = self.inputs.dataset.y.flatten()
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
9
 
20
- # Sort true values and corresponding predicted values
21
- idx_true = np.argsort(y_true)
22
- idx_pred = np.argsort(y_pred)
23
- y_true_sorted = y_true[idx_true]
24
- y_pred_sorted = y_pred[idx_pred]
10
+ @tags("regression", "unit_metric")
11
+ @tasks("regression")
12
+ def KolmogorovSmirnovStatistic(dataset, model):
13
+ y_true = dataset.y.flatten()
14
+ y_pred = dataset.y_pred(model)
25
15
 
26
- # Compute cumulative distribution functions (CDFs)
27
- cdf_true = np.arange(1, len(y_true_sorted) + 1) / len(y_true_sorted)
28
- cdf_pred = np.arange(1, len(y_pred_sorted) + 1) / len(y_pred_sorted)
16
+ # Sort true values and corresponding predicted values
17
+ idx_true = np.argsort(y_true)
18
+ idx_pred = np.argsort(y_pred)
19
+ y_true_sorted = y_true[idx_true]
20
+ y_pred_sorted = y_pred[idx_pred]
29
21
 
30
- # Compute absolute differences between CDFs
31
- diff_cdf = np.abs(cdf_true - cdf_pred)
22
+ # Compute cumulative distribution functions (CDFs)
23
+ cdf_true = np.arange(1, len(y_true_sorted) + 1) / len(y_true_sorted)
24
+ cdf_pred = np.arange(1, len(y_pred_sorted) + 1) / len(y_pred_sorted)
32
25
 
33
- # Find maximum absolute difference
34
- ks_statistic = np.max(diff_cdf)
26
+ # Compute absolute differences between CDFs
27
+ diff_cdf = np.abs(cdf_true - cdf_pred)
35
28
 
36
- return self.cache_results(metric_value=ks_statistic)
29
+ # Find maximum absolute difference
30
+ return np.max(diff_cdf)
@@ -2,21 +2,15 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  import numpy as np
8
6
 
9
- from validmind.vm_models import UnitMetric
10
-
11
-
12
- @dataclass
13
- class MeanAbsolutePercentageError(UnitMetric):
14
- required_inputs = ["dataset", "model"]
7
+ from validmind import tags, tasks
15
8
 
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
9
 
20
- value = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
10
+ @tags("regression", "unit_metric")
11
+ @tasks("regression")
12
+ def MeanAbsolutePercentageError(model, dataset):
13
+ y_true = dataset.y
14
+ y_pred = dataset.y_pred(model)
21
15
 
22
- return self.cache_results(metric_value=value)
16
+ return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
@@ -2,21 +2,12 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  import numpy as np
8
6
 
9
- from validmind.vm_models import UnitMetric
10
-
11
-
12
- @dataclass
13
- class MeanBiasDeviation(UnitMetric):
14
- required_inputs = ["dataset", "model"]
15
-
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
7
+ from validmind import tags, tasks
19
8
 
20
- value = np.mean(y_pred - y_true)
21
9
 
22
- return self.cache_results(metric_value=value)
10
+ @tags("regression", "unit_metric")
11
+ @tasks("regression")
12
+ def MeanBiasDeviation(model, dataset):
13
+ return np.mean(dataset.y - dataset.y_pred(model))
@@ -2,24 +2,14 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  import numpy as np
8
6
 
9
- from validmind.vm_models import UnitMetric
10
-
11
-
12
- @dataclass
13
- class QuantileLoss(UnitMetric):
14
- required_inputs = ["dataset", "model"]
7
+ from validmind import tags, tasks
15
8
 
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
9
 
20
- error = y_true - y_pred
21
- # Quantile value (between 0 and 1).
22
- quantile = 0.5
23
- value = np.mean(np.maximum(quantile * error, (quantile - 1) * error))
10
+ @tags("regression", "unit_metric")
11
+ @tasks("regression")
12
+ def QuantileLoss(model, dataset, quantile=0.5):
13
+ error = dataset.y - dataset.y_pred(model)
24
14
 
25
- return self.cache_results(metric_value=value)
15
+ return np.mean(np.maximum(quantile * error, (quantile - 1) * error))
@@ -2,26 +2,20 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
5
+ from sklearn.metrics import r2_score as _r2_score
6
6
 
7
- import sklearn.metrics as metrics
7
+ from validmind import tags, tasks
8
8
 
9
- from validmind.vm_models import UnitMetric
10
9
 
10
+ @tags("regression", "sklearn", "unit_metric")
11
+ @tasks("regression")
12
+ def AdjustedRSquaredScore(model, dataset):
13
+ r2_score = _r2_score(
14
+ dataset.y,
15
+ dataset.y_pred(model),
16
+ )
11
17
 
12
- @dataclass
13
- class AdjustedRSquaredScore(UnitMetric):
14
- required_inputs = ["dataset", "model"]
18
+ row_count = len(dataset.y)
19
+ feature_count = len(dataset.get_features_columns())
15
20
 
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
-
20
- X_columns = self.inputs.dataset.get_features_columns()
21
- row_count = len(y_true)
22
- feature_count = len(X_columns)
23
- value = 1 - (1 - metrics.r2_score(y_true, y_pred)) * (row_count - 1) / (
24
- row_count - feature_count
25
- )
26
-
27
- return self.cache_results(metric_value=value)
21
+ return 1 - (1 - r2_score) * (row_count - 1) / (row_count - feature_count)
@@ -2,21 +2,12 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
5
+ from sklearn.metrics import mean_absolute_error as _mean_absolute_error
6
6
 
7
- from sklearn.metrics import mean_absolute_error
7
+ from validmind import tags, tasks
8
8
 
9
- from validmind.vm_models import UnitMetric
10
9
 
11
-
12
- @dataclass
13
- class MeanAbsoluteError(UnitMetric):
14
- required_inputs = ["dataset", "model"]
15
-
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
-
20
- value = mean_absolute_error(y_true, y_pred, **self.params)
21
-
22
- return self.cache_results(metric_value=value)
10
+ @tags("regression", "sklearn", "unit_metric")
11
+ @tasks("regression")
12
+ def MeanAbsoluteError(model, dataset, **kwargs):
13
+ return _mean_absolute_error(dataset.y, dataset.y_pred(model), **kwargs)
@@ -2,21 +2,12 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  from sklearn.metrics import mean_squared_error
8
6
 
9
- from validmind.vm_models import UnitMetric
10
-
11
-
12
- @dataclass
13
- class MeanSquaredError(UnitMetric):
14
- required_inputs = ["dataset", "model"]
15
-
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
7
+ from validmind import tags, tasks
19
8
 
20
- value = mean_squared_error(y_true, y_pred, **self.params)
21
9
 
22
- return self.cache_results(metric_value=value)
10
+ @tags("regression", "sklearn", "unit_metric")
11
+ @tasks("regression")
12
+ def MeanSquaredError(model, dataset, **kwargs):
13
+ return mean_squared_error(dataset.y, dataset.y_pred(model), **kwargs)
@@ -2,21 +2,12 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
5
+ from sklearn.metrics import r2_score
6
6
 
7
- import sklearn.metrics as metrics
7
+ from validmind import tags, tasks
8
8
 
9
- from validmind.vm_models import UnitMetric
10
9
 
11
-
12
- @dataclass
13
- class RSquaredScore(UnitMetric):
14
- required_inputs = ["dataset", "model"]
15
-
16
- def run(self):
17
- y_true = self.inputs.dataset.y
18
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
19
-
20
- value = metrics.r2_score(y_true, y_pred)
21
-
22
- return self.cache_results(metric_value=value)
10
+ @tags("regression", "sklearn", "unit_metric")
11
+ @tasks("regression")
12
+ def RSquaredError(model, dataset):
13
+ return r2_score(dataset.y, dataset.y_pred(model))
@@ -2,22 +2,19 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from dataclasses import dataclass
6
-
7
5
  import numpy as np
8
6
  from sklearn.metrics import mean_squared_error
9
7
 
10
- from validmind.vm_models import UnitMetric
11
-
12
-
13
- @dataclass
14
- class RootMeanSquaredError(UnitMetric):
15
- required_inputs = ["dataset", "model"]
16
-
17
- def run(self):
18
- y_true = self.inputs.dataset.y
19
- y_pred = self.inputs.dataset.y_pred(model_id=self.inputs.model.input_id)
8
+ from validmind import tags, tasks
20
9
 
21
- value = np.sqrt(mean_squared_error(y_true, y_pred, **self.params))
22
10
 
23
- return self.cache_results(metric_value=value)
11
+ @tags("regression", "sklearn", "unit_metric")
12
+ @tasks("regression")
13
+ def RootMeanSquaredError(model, dataset, **kwargs):
14
+ return np.sqrt(
15
+ mean_squared_error(
16
+ dataset.y,
17
+ dataset.y_pred(model),
18
+ **kwargs,
19
+ )
20
+ )