validmind 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. validmind/__init__.py +6 -3
  2. validmind/__version__.py +1 -1
  3. validmind/ai.py +193 -0
  4. validmind/api_client.py +45 -31
  5. validmind/client.py +33 -6
  6. validmind/datasets/classification/customer_churn.py +2 -2
  7. validmind/datasets/credit_risk/__init__.py +11 -0
  8. validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
  9. validmind/datasets/credit_risk/lending_club.py +394 -0
  10. validmind/datasets/nlp/__init__.py +5 -0
  11. validmind/datasets/nlp/cnn_dailymail.py +98 -0
  12. validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +255 -0
  13. validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +1277 -0
  14. validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +4847 -0
  15. validmind/errors.py +11 -1
  16. validmind/logging.py +9 -2
  17. validmind/models/huggingface.py +2 -2
  18. validmind/models/pytorch.py +3 -3
  19. validmind/models/sklearn.py +4 -4
  20. validmind/template.py +2 -2
  21. validmind/test_suites/__init__.py +4 -2
  22. validmind/tests/__init__.py +130 -45
  23. validmind/tests/data_validation/DatasetDescription.py +0 -1
  24. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  25. validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
  26. validmind/tests/data_validation/ScatterPlot.py +8 -2
  27. validmind/tests/data_validation/nlp/StopWords.py +1 -6
  28. validmind/tests/data_validation/nlp/TextDescription.py +20 -9
  29. validmind/tests/decorator.py +313 -0
  30. validmind/tests/model_validation/BertScore.py +1 -1
  31. validmind/tests/model_validation/BertScoreAggregate.py +1 -1
  32. validmind/tests/model_validation/BleuScore.py +1 -1
  33. validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
  34. validmind/tests/model_validation/ContextualRecall.py +1 -1
  35. validmind/tests/model_validation/FeaturesAUC.py +110 -0
  36. validmind/tests/model_validation/MeteorScore.py +92 -0
  37. validmind/tests/model_validation/RegardHistogram.py +6 -7
  38. validmind/tests/model_validation/RegardScore.py +4 -6
  39. validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
  40. validmind/tests/model_validation/RougeMetrics.py +7 -5
  41. validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
  42. validmind/tests/model_validation/SelfCheckNLIScore.py +112 -0
  43. validmind/tests/model_validation/TokenDisparity.py +1 -1
  44. validmind/tests/model_validation/ToxicityHistogram.py +1 -1
  45. validmind/tests/model_validation/ToxicityScore.py +1 -1
  46. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  47. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
  48. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +17 -22
  49. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
  50. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +16 -17
  51. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
  52. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  53. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
  54. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
  55. validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
  56. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
  57. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
  58. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
  59. validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
  60. validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
  61. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
  62. validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
  63. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +55 -5
  64. validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
  65. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +11 -5
  66. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
  67. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
  68. validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
  69. validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
  70. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
  71. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
  72. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
  73. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
  74. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
  75. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +2 -2
  76. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
  77. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
  78. validmind/tests/prompt_validation/ai_powered_test.py +2 -0
  79. validmind/tests/test_providers.py +14 -124
  80. validmind/unit_metrics/__init__.py +75 -70
  81. validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
  82. validmind/unit_metrics/classification/sklearn/F1.py +13 -0
  83. validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
  84. validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
  85. validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
  86. validmind/unit_metrics/composite.py +228 -0
  87. validmind/unit_metrics/regression/GiniCoefficient.py +33 -0
  88. validmind/unit_metrics/regression/HuberLoss.py +23 -0
  89. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +30 -0
  90. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +16 -0
  91. validmind/unit_metrics/regression/MeanBiasDeviation.py +13 -0
  92. validmind/unit_metrics/regression/QuantileLoss.py +15 -0
  93. validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +21 -0
  94. validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +13 -0
  95. validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +13 -0
  96. validmind/unit_metrics/regression/sklearn/RSquaredScore.py +13 -0
  97. validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +20 -0
  98. validmind/utils.py +20 -31
  99. validmind/vm_models/__init__.py +0 -2
  100. validmind/vm_models/dataset.py +623 -29
  101. validmind/vm_models/figure.py +52 -17
  102. validmind/vm_models/test/metric.py +33 -31
  103. validmind/vm_models/test/output_template.py +0 -27
  104. validmind/vm_models/test/result_wrapper.py +68 -36
  105. validmind/vm_models/test/test.py +4 -2
  106. validmind/vm_models/test/threshold_test.py +24 -14
  107. validmind/vm_models/test_context.py +7 -0
  108. validmind/vm_models/test_suite/runner.py +1 -1
  109. validmind/vm_models/test_suite/summary.py +3 -3
  110. validmind/vm_models/test_suite/test.py +1 -1
  111. validmind/vm_models/test_suite/test_suite.py +2 -1
  112. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/METADATA +18 -18
  113. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/RECORD +116 -94
  114. validmind-2.1.0.dist-info/entry_points.txt +3 -0
  115. validmind/tests/__types__.py +0 -62
  116. validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
  117. validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
  118. validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
  119. validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
  120. validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -20
  121. validmind/unit_metrics/sklearn/classification/F1.py +0 -22
  122. validmind/unit_metrics/sklearn/classification/Precision.py +0 -22
  123. validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -20
  124. validmind/unit_metrics/sklearn/classification/Recall.py +0 -20
  125. validmind/vm_models/test/unit_metric.py +0 -88
  126. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/LICENSE +0 -0
  127. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/WHEEL +0 -0
@@ -3,14 +3,13 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import hashlib
6
- import importlib
7
6
  import json
7
+ from importlib import import_module
8
8
 
9
9
  import numpy as np
10
10
 
11
- from validmind.vm_models import TestInput
12
-
13
- from ..utils import get_model_info
11
+ from ..tests.decorator import _build_result, _inspect_signature
12
+ from ..utils import get_model_info, test_id_to_name
14
13
 
15
14
  unit_metric_results_cache = {}
16
15
 
@@ -134,52 +133,6 @@ def _fast_hash(df, sample_size=1000, model_and_prediction_info=None):
134
133
  return hash_obj.hexdigest()
135
134
 
136
135
 
137
- def _get_metric_class(metric_id):
138
- """Get the metric class by metric_id
139
-
140
- This function will load the metric class by metric_id.
141
-
142
- Args:
143
- metric_id (str): The full metric id (e.g. 'validmind.vm_models.test.v2.model_validation.sklearn.F1')
144
-
145
- Returns:
146
- Metric: The metric class
147
- """
148
-
149
- metric_module = importlib.import_module(f"{metric_id}")
150
-
151
- class_name = metric_id.split(".")[-1]
152
-
153
- # Access the class within the F1 module
154
- metric_class = getattr(metric_module, class_name)
155
-
156
- return metric_class
157
-
158
-
159
- def get_input_type(input_obj):
160
- """
161
- Determines whether the input object is a 'dataset' or 'model' based on its class module path.
162
-
163
- Args:
164
- input_obj: The object to type check.
165
-
166
- Returns:
167
- str: 'dataset' or 'model' depending on the object's module, or raises ValueError.
168
- """
169
- # Obtain the class object of input_obj (for clarity and debugging)
170
- class_obj = input_obj.__class__
171
-
172
- # Obtain the module name as a string from the class object
173
- class_module = class_obj.__module__
174
-
175
- if "validmind.vm_models.dataset" in class_module:
176
- return "dataset"
177
- elif "validmind.models" in class_module:
178
- return "model"
179
- else:
180
- raise ValueError("Input must be of type validmind Dataset or Model")
181
-
182
-
183
136
  def get_metric_cache_key(metric_id, params, inputs):
184
137
  cache_elements = [metric_id]
185
138
 
@@ -209,36 +162,88 @@ def get_metric_cache_key(metric_id, params, inputs):
209
162
  return key
210
163
 
211
164
 
212
- def run_metric(metric_id=None, inputs=None, params=None):
213
- """Run a single metric
214
-
215
- This function provides a high level interface for running a single metric. A metric
216
- is a single test that calculates a value based on the input data.
165
+ def load_metric(metric_id):
166
+ """Load a metric class from a string
217
167
 
218
168
  Args:
219
- metric_id (str): The metric name (e.g. 'F1')
220
- params (dict): A dictionary of the metric parameters
169
+ metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.sklearn.F1')
221
170
 
222
171
  Returns:
223
- MetricResult: The metric result object
172
+ callable: The metric function
224
173
  """
225
- cache_key = get_metric_cache_key(metric_id, params, inputs)
174
+ return getattr(import_module(metric_id), metric_id.split(".")[-1])
226
175
 
227
- # Check if the metric value already exists in the global variable
228
- if cache_key in unit_metric_results_cache:
229
- return unit_metric_results_cache[cache_key]
230
176
 
231
- # Load the metric class by metric_id
232
- metric_class = _get_metric_class(metric_id)
177
+ def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False):
178
+ """Run a single metric and cache the results
233
179
 
234
- # Initialize the metric
235
- metric = metric_class(test_id=metric_id, inputs=TestInput(inputs), params=params)
236
-
237
- # Run the metric
238
- result = metric.run()
180
+ Args:
181
+ metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.sklearn.F1')
182
+ inputs (dict): A dictionary of the metric inputs
183
+ params (dict): A dictionary of the metric parameters
184
+ show (bool): Whether to display the results
185
+ value_only (bool): Whether to return only the value
186
+ """
187
+ inputs = inputs or {}
188
+ params = params or {}
239
189
 
240
190
  cache_key = get_metric_cache_key(metric_id, params, inputs)
241
191
 
242
- unit_metric_results_cache[cache_key] = result
192
+ if cache_key not in unit_metric_results_cache:
193
+ metric = load_metric(metric_id)
194
+ _inputs, _params = _inspect_signature(metric)
195
+
196
+ result = metric(
197
+ **{k: v for k, v in inputs.items() if k in _inputs.keys()},
198
+ **{k: v for k, v in params.items() if k in _params.keys()},
199
+ )
200
+ unit_metric_results_cache[cache_key] = (result, list(_inputs.keys()))
201
+
202
+ value = unit_metric_results_cache[cache_key][0]
203
+
204
+ if value_only:
205
+ return value
206
+
207
+ output_template = f"""
208
+ <table>
209
+ <thead>
210
+ <tr>
211
+ <th>Metric</th>
212
+ <th>Value</th>
213
+ </tr>
214
+ </thead>
215
+ <tbody>
216
+ <tr>
217
+ <td><strong>{test_id_to_name(metric_id)}</strong></td>
218
+ <td>{value:.4f}</td>
219
+ </tr>
220
+ </tbody>
221
+ </table>
222
+ <style>
223
+ th, td {{
224
+ padding: 5px;
225
+ text-align: left;
226
+ }}
227
+ </style>
228
+ """
229
+ result = _build_result(
230
+ results=value,
231
+ test_id=metric_id,
232
+ description="",
233
+ output_template=output_template,
234
+ inputs=unit_metric_results_cache[cache_key][1],
235
+ )
236
+
237
+ # in case the user tries to log the result object
238
+ def log(self):
239
+ raise Exception(
240
+ "Cannot log unit metrics directly..."
241
+ "You can run this unit metric as part of a composite metric and log that"
242
+ )
243
+
244
+ result.log = log
245
+
246
+ if show:
247
+ result.show()
243
248
 
244
249
  return result
@@ -0,0 +1,14 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import accuracy_score
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("classification", "sklearn", "unit_metric")
11
+ @tasks("classification")
12
+ def Accuracy(dataset, model):
13
+ """Calculates the accuracy of a model"""
14
+ return accuracy_score(dataset.y, dataset.y_pred(model))
@@ -0,0 +1,13 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import f1_score
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("classification", "sklearn", "unit_metric")
11
+ @tasks("classification")
12
+ def F1(model, dataset, **kwargs):
13
+ return f1_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -0,0 +1,13 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import precision_score
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("classification", "sklearn", "unit_metric")
11
+ @tasks("classification")
12
+ def Precision(model, dataset, **kwargs):
13
+ return precision_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -0,0 +1,13 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import roc_auc_score
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("classification", "sklearn", "unit_metric")
11
+ @tasks("classification")
12
+ def ROC_AUC(model, dataset, **kwargs):
13
+ return roc_auc_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -0,0 +1,13 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import recall_score
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("classification", "sklearn", "unit_metric")
11
+ @tasks("classification")
12
+ def Recall(model, dataset, **kwargs):
13
+ return recall_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -0,0 +1,228 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from dataclasses import dataclass
6
+ from typing import List, Tuple, Union
7
+ from uuid import uuid4
8
+
9
+ from ..logging import get_logger
10
+ from ..tests.decorator import _inspect_signature
11
+ from ..utils import run_async, test_id_to_name
12
+ from ..vm_models.test.metric import Metric
13
+ from ..vm_models.test.metric_result import MetricResult
14
+ from ..vm_models.test.result_summary import ResultSummary, ResultTable
15
+ from ..vm_models.test.result_wrapper import MetricResultWrapper
16
+ from . import load_metric, run_metric
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class CompositeMetric(Metric):
23
+ unit_metrics: List[str] = None
24
+
25
+ def __post_init__(self):
26
+ if self._unit_metrics:
27
+ self.unit_metrics = self._unit_metrics
28
+ elif self.unit_metrics is None:
29
+ raise ValueError("unit_metrics must be provided")
30
+
31
+ if hasattr(self, "_output_template") and self._output_template:
32
+ self.output_template = self._output_template
33
+
34
+ def run(self):
35
+ self.result = run_metrics(
36
+ test_id=self.test_id,
37
+ metric_ids=self.unit_metrics,
38
+ description=self.description(),
39
+ inputs=self._get_input_dict(),
40
+ params=self.params,
41
+ output_template=self.output_template,
42
+ show=False,
43
+ )
44
+
45
+ return self.result
46
+
47
+ def summary(self, result: dict):
48
+ return ResultSummary(results=[ResultTable(data=[result])])
49
+
50
+
51
+ def load_composite_metric(
52
+ test_id: str = None,
53
+ metric_name: str = None,
54
+ unit_metrics: List[str] = None,
55
+ output_template: str = None,
56
+ ) -> Tuple[Union[None, str], Union[CompositeMetric, None]]:
57
+ # this function can either create a composite metric from a list of unit metrics or
58
+ # load a stored composite metric based on the test id
59
+
60
+ # TODO: figure out this circular import thing:
61
+ from ..api_client import get_metadata
62
+
63
+ if test_id:
64
+ # get the unit metric ids and output template (if any) from the metadata
65
+ try:
66
+ unit_metrics = run_async(
67
+ get_metadata, f"composite_metric_def:{test_id}:unit_metrics"
68
+ )["json"]
69
+ output_template = run_async(
70
+ get_metadata, f"composite_metric_def:{test_id}:output_template"
71
+ )["json"]["output_template"]
72
+ except Exception:
73
+ return f"Could not load composite metric {test_id}", None
74
+
75
+ description = f"""
76
+ Composite metric built from the following unit metrics:
77
+ {', '.join([metric_id.split('.')[-1] for metric_id in unit_metrics])}
78
+ """
79
+
80
+ class_def = type(
81
+ test_id.split(".")[-1] if test_id else metric_name,
82
+ (CompositeMetric,),
83
+ {
84
+ "__doc__": description,
85
+ "_unit_metrics": unit_metrics,
86
+ "_output_template": output_template,
87
+ },
88
+ )
89
+
90
+ required_inputs = set()
91
+ for metric_id in unit_metrics:
92
+ inputs, _ = _inspect_signature(load_metric(metric_id))
93
+ required_inputs.update(inputs.keys())
94
+
95
+ class_def.required_inputs = list(required_inputs)
96
+
97
+ return None, class_def
98
+
99
+
100
+ def run_metrics(
101
+ name: str = None,
102
+ metric_ids: List[str] = None,
103
+ description: str = None,
104
+ output_template: str = None,
105
+ inputs: dict = None,
106
+ params: dict = None,
107
+ test_id: str = None,
108
+ show: bool = True,
109
+ ) -> MetricResultWrapper:
110
+ """Run a composite metric
111
+
112
+ Composite metrics are metrics that are composed of multiple unit metrics. This
113
+ works by running individual unit metrics and then combining the results into a
114
+ single "MetricResult" object that can be logged and displayed just like any other
115
+ metric result. The special thing about composite metrics is that when they are
116
+ logged to the platform, metadata describing the unit metrics and output template
117
+ used to generate the composite metric is also logged. This means that by grabbing
118
+ the metadata for a composite metric (identified by the test ID
119
+ `validmind.composite_metric.<name>`) the framework can rebuild and rerun it at
120
+ any time.
121
+
122
+ Args:
123
+ name (str, optional): Name of the composite metric. Required if test_id is not
124
+ provided. Defaults to None.
125
+ metric_ids (list[str]): List of unit metric IDs to run. Required.
126
+ description (str, optional): Description of the composite metric. Defaults to
127
+ None.
128
+ output_template (_type_, optional): Output template to customize the result
129
+ table.
130
+ inputs (_type_, optional): Inputs to pass to the unit metrics. Defaults to None
131
+ params (_type_, optional): Parameters to pass to the unit metrics. Defaults to
132
+ None.
133
+ test_id (str, optional): Test ID of the composite metric. Required if name is
134
+ not provided. Defaults to None.
135
+ show (bool, optional): Whether to show the result immediately. Defaults to True
136
+
137
+ Raises:
138
+ ValueError: If metric_ids is not provided
139
+ ValueError: If name or key is not provided
140
+
141
+ Returns:
142
+ MetricResultWrapper: The result wrapper object
143
+ """
144
+ if not metric_ids:
145
+ raise ValueError("metric_ids must be provided")
146
+
147
+ if not name and not test_id:
148
+ raise ValueError("name or key must be provided")
149
+
150
+ # if name is provided, make sure to squash it into a camel case string
151
+ if name:
152
+ name = "".join(word[0].upper() + word[1:] for word in name.split())
153
+
154
+ results = {}
155
+
156
+ for metric_id in metric_ids:
157
+ metric_name = test_id_to_name(metric_id)
158
+ results[metric_name] = run_metric(
159
+ metric_id=metric_id,
160
+ inputs=inputs,
161
+ params=params,
162
+ show=False,
163
+ value_only=True,
164
+ )
165
+
166
+ test_id = f"validmind.composite_metric.{name}" if not test_id else test_id
167
+
168
+ if not output_template:
169
+
170
+ def row(name):
171
+ return f"""
172
+ <tr>
173
+ <td><strong>{name}</strong></td>
174
+ <td>{{{{ value['{name}'] | number }}}}</td>
175
+ </tr>
176
+ """
177
+
178
+ output_template = f"""
179
+ <h1{test_id_to_name(test_id)}</h1>
180
+ <table>
181
+ <thead>
182
+ <tr>
183
+ <th>Metric</th>
184
+ <th>Value</th>
185
+ </tr>
186
+ </thead>
187
+ <tbody>
188
+ {"".join([row(name) for name in results.keys()])}
189
+ </tbody>
190
+ </table>
191
+ <style>
192
+ th, td {{
193
+ padding: 5px;
194
+ text-align: left;
195
+ }}
196
+ </style>
197
+ """
198
+
199
+ result_wrapper = MetricResultWrapper(
200
+ result_id=test_id,
201
+ result_metadata=[
202
+ {
203
+ "content_id": f"metric_description:{test_id}",
204
+ "text": description,
205
+ },
206
+ {
207
+ "content_id": f"composite_metric_def:{test_id}:unit_metrics",
208
+ "json": metric_ids,
209
+ },
210
+ {
211
+ "content_id": f"composite_metric_def:{test_id}:output_template",
212
+ "json": {"output_template": output_template},
213
+ },
214
+ ],
215
+ inputs=list(inputs.keys()),
216
+ output_template=output_template,
217
+ metric=MetricResult(
218
+ key=test_id,
219
+ ref_id=str(uuid4()),
220
+ value=results,
221
+ summary=ResultSummary(results=[ResultTable(data=[results])]),
222
+ ),
223
+ )
224
+
225
+ if show:
226
+ result_wrapper.show()
227
+
228
+ return result_wrapper
@@ -0,0 +1,33 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import numpy as np
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("regression", "unit_metric")
11
+ @tasks("regression")
12
+ def GiniCoefficient(dataset, model):
13
+ y_true = dataset.y
14
+ y_pred = dataset.y_pred(model)
15
+
16
+ # Sort true values and corresponding predicted values
17
+ idx = np.argsort(y_true)
18
+ y_true_sorted = y_true[idx]
19
+ y_pred_sorted = y_pred[idx]
20
+
21
+ # Compute cumulative sums
22
+ cumsum_true = np.cumsum(y_true_sorted)
23
+ cumsum_pred = np.cumsum(y_pred_sorted)
24
+
25
+ # Normalize cumulative sums
26
+ cumsum_true_norm = cumsum_true / np.max(cumsum_true)
27
+ cumsum_pred_norm = cumsum_pred / np.max(cumsum_pred)
28
+
29
+ # Compute area under the Lorenz curve
30
+ area_lorenz = np.trapz(cumsum_pred_norm, x=cumsum_true_norm)
31
+
32
+ # Compute Gini coefficient
33
+ return 1 - 2 * area_lorenz
@@ -0,0 +1,23 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import numpy as np
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("regression", "unit_metric")
11
+ @tasks("regression")
12
+ def HuberLoss(model, dataset):
13
+ y_true = dataset.y
14
+ y_pred = dataset.y_pred(model)
15
+
16
+ # delta - Threshold for the squared error to be linear or quadratic.
17
+ delta = 1.0
18
+ error = y_true - y_pred
19
+
20
+ quadratic_part = np.minimum(np.abs(error), delta)
21
+ linear_part = np.abs(error) - quadratic_part
22
+
23
+ return np.mean(0.5 * quadratic_part**2 + delta * linear_part)
@@ -0,0 +1,30 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import numpy as np
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("regression", "unit_metric")
11
+ @tasks("regression")
12
+ def KolmogorovSmirnovStatistic(dataset, model):
13
+ y_true = dataset.y.flatten()
14
+ y_pred = dataset.y_pred(model)
15
+
16
+ # Sort true values and corresponding predicted values
17
+ idx_true = np.argsort(y_true)
18
+ idx_pred = np.argsort(y_pred)
19
+ y_true_sorted = y_true[idx_true]
20
+ y_pred_sorted = y_pred[idx_pred]
21
+
22
+ # Compute cumulative distribution functions (CDFs)
23
+ cdf_true = np.arange(1, len(y_true_sorted) + 1) / len(y_true_sorted)
24
+ cdf_pred = np.arange(1, len(y_pred_sorted) + 1) / len(y_pred_sorted)
25
+
26
+ # Compute absolute differences between CDFs
27
+ diff_cdf = np.abs(cdf_true - cdf_pred)
28
+
29
+ # Find maximum absolute difference
30
+ return np.max(diff_cdf)
@@ -0,0 +1,16 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import numpy as np
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("regression", "unit_metric")
11
+ @tasks("regression")
12
+ def MeanAbsolutePercentageError(model, dataset):
13
+ y_true = dataset.y
14
+ y_pred = dataset.y_pred(model)
15
+
16
+ return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
@@ -0,0 +1,13 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import numpy as np
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("regression", "unit_metric")
11
+ @tasks("regression")
12
+ def MeanBiasDeviation(model, dataset):
13
+ return np.mean(dataset.y - dataset.y_pred(model))
@@ -0,0 +1,15 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import numpy as np
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("regression", "unit_metric")
11
+ @tasks("regression")
12
+ def QuantileLoss(model, dataset, quantile=0.5):
13
+ error = dataset.y - dataset.y_pred(model)
14
+
15
+ return np.mean(np.maximum(quantile * error, (quantile - 1) * error))
@@ -0,0 +1,21 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import r2_score as _r2_score
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("regression", "sklearn", "unit_metric")
11
+ @tasks("regression")
12
+ def AdjustedRSquaredScore(model, dataset):
13
+ r2_score = _r2_score(
14
+ dataset.y,
15
+ dataset.y_pred(model),
16
+ )
17
+
18
+ row_count = len(dataset.y)
19
+ feature_count = len(dataset.get_features_columns())
20
+
21
+ return 1 - (1 - r2_score) * (row_count - 1) / (row_count - feature_count)
@@ -0,0 +1,13 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import mean_absolute_error as _mean_absolute_error
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("regression", "sklearn", "unit_metric")
11
+ @tasks("regression")
12
+ def MeanAbsoluteError(model, dataset, **kwargs):
13
+ return _mean_absolute_error(dataset.y, dataset.y_pred(model), **kwargs)
@@ -0,0 +1,13 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import mean_squared_error
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("regression", "sklearn", "unit_metric")
11
+ @tasks("regression")
12
+ def MeanSquaredError(model, dataset, **kwargs):
13
+ return mean_squared_error(dataset.y, dataset.y_pred(model), **kwargs)
@@ -0,0 +1,13 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from sklearn.metrics import r2_score
6
+
7
+ from validmind import tags, tasks
8
+
9
+
10
+ @tags("regression", "sklearn", "unit_metric")
11
+ @tasks("regression")
12
+ def RSquaredError(model, dataset):
13
+ return r2_score(dataset.y, dataset.y_pred(model))