validmind 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +6 -3
- validmind/__version__.py +1 -1
- validmind/ai.py +193 -0
- validmind/api_client.py +45 -31
- validmind/client.py +33 -6
- validmind/datasets/classification/customer_churn.py +2 -2
- validmind/datasets/credit_risk/__init__.py +11 -0
- validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
- validmind/datasets/credit_risk/lending_club.py +394 -0
- validmind/datasets/nlp/__init__.py +5 -0
- validmind/datasets/nlp/cnn_dailymail.py +98 -0
- validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +255 -0
- validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +1277 -0
- validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +4847 -0
- validmind/errors.py +11 -1
- validmind/logging.py +9 -2
- validmind/models/huggingface.py +2 -2
- validmind/models/pytorch.py +3 -3
- validmind/models/sklearn.py +4 -4
- validmind/template.py +2 -2
- validmind/test_suites/__init__.py +4 -2
- validmind/tests/__init__.py +130 -45
- validmind/tests/data_validation/DatasetDescription.py +0 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +8 -2
- validmind/tests/data_validation/nlp/StopWords.py +1 -6
- validmind/tests/data_validation/nlp/TextDescription.py +20 -9
- validmind/tests/decorator.py +313 -0
- validmind/tests/model_validation/BertScore.py +1 -1
- validmind/tests/model_validation/BertScoreAggregate.py +1 -1
- validmind/tests/model_validation/BleuScore.py +1 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
- validmind/tests/model_validation/ContextualRecall.py +1 -1
- validmind/tests/model_validation/FeaturesAUC.py +110 -0
- validmind/tests/model_validation/MeteorScore.py +92 -0
- validmind/tests/model_validation/RegardHistogram.py +6 -7
- validmind/tests/model_validation/RegardScore.py +4 -6
- validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
- validmind/tests/model_validation/RougeMetrics.py +7 -5
- validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
- validmind/tests/model_validation/SelfCheckNLIScore.py +112 -0
- validmind/tests/model_validation/TokenDisparity.py +1 -1
- validmind/tests/model_validation/ToxicityHistogram.py +1 -1
- validmind/tests/model_validation/ToxicityScore.py +1 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +17 -22
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +16 -17
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
- validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
- validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +55 -5
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +11 -5
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
- validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
- validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
- validmind/tests/prompt_validation/ai_powered_test.py +2 -0
- validmind/tests/test_providers.py +14 -124
- validmind/unit_metrics/__init__.py +75 -70
- validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
- validmind/unit_metrics/classification/sklearn/F1.py +13 -0
- validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
- validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
- validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
- validmind/unit_metrics/composite.py +228 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +33 -0
- validmind/unit_metrics/regression/HuberLoss.py +23 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +30 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +16 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +13 -0
- validmind/unit_metrics/regression/QuantileLoss.py +15 -0
- validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +21 -0
- validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +13 -0
- validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +13 -0
- validmind/unit_metrics/regression/sklearn/RSquaredScore.py +13 -0
- validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +20 -0
- validmind/utils.py +20 -31
- validmind/vm_models/__init__.py +0 -2
- validmind/vm_models/dataset.py +623 -29
- validmind/vm_models/figure.py +52 -17
- validmind/vm_models/test/metric.py +33 -31
- validmind/vm_models/test/output_template.py +0 -27
- validmind/vm_models/test/result_wrapper.py +68 -36
- validmind/vm_models/test/test.py +4 -2
- validmind/vm_models/test/threshold_test.py +24 -14
- validmind/vm_models/test_context.py +7 -0
- validmind/vm_models/test_suite/runner.py +1 -1
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +1 -1
- validmind/vm_models/test_suite/test_suite.py +2 -1
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/METADATA +18 -18
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/RECORD +116 -94
- validmind-2.1.0.dist-info/entry_points.txt +3 -0
- validmind/tests/__types__.py +0 -62
- validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
- validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
- validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
- validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
- validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -20
- validmind/unit_metrics/sklearn/classification/F1.py +0 -22
- validmind/unit_metrics/sklearn/classification/Precision.py +0 -22
- validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -20
- validmind/unit_metrics/sklearn/classification/Recall.py +0 -20
- validmind/vm_models/test/unit_metric.py +0 -88
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/LICENSE +0 -0
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/WHEEL +0 -0
@@ -3,14 +3,13 @@
|
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
5
|
import hashlib
|
6
|
-
import importlib
|
7
6
|
import json
|
7
|
+
from importlib import import_module
|
8
8
|
|
9
9
|
import numpy as np
|
10
10
|
|
11
|
-
from
|
12
|
-
|
13
|
-
from ..utils import get_model_info
|
11
|
+
from ..tests.decorator import _build_result, _inspect_signature
|
12
|
+
from ..utils import get_model_info, test_id_to_name
|
14
13
|
|
15
14
|
unit_metric_results_cache = {}
|
16
15
|
|
@@ -134,52 +133,6 @@ def _fast_hash(df, sample_size=1000, model_and_prediction_info=None):
|
|
134
133
|
return hash_obj.hexdigest()
|
135
134
|
|
136
135
|
|
137
|
-
def _get_metric_class(metric_id):
|
138
|
-
"""Get the metric class by metric_id
|
139
|
-
|
140
|
-
This function will load the metric class by metric_id.
|
141
|
-
|
142
|
-
Args:
|
143
|
-
metric_id (str): The full metric id (e.g. 'validmind.vm_models.test.v2.model_validation.sklearn.F1')
|
144
|
-
|
145
|
-
Returns:
|
146
|
-
Metric: The metric class
|
147
|
-
"""
|
148
|
-
|
149
|
-
metric_module = importlib.import_module(f"{metric_id}")
|
150
|
-
|
151
|
-
class_name = metric_id.split(".")[-1]
|
152
|
-
|
153
|
-
# Access the class within the F1 module
|
154
|
-
metric_class = getattr(metric_module, class_name)
|
155
|
-
|
156
|
-
return metric_class
|
157
|
-
|
158
|
-
|
159
|
-
def get_input_type(input_obj):
|
160
|
-
"""
|
161
|
-
Determines whether the input object is a 'dataset' or 'model' based on its class module path.
|
162
|
-
|
163
|
-
Args:
|
164
|
-
input_obj: The object to type check.
|
165
|
-
|
166
|
-
Returns:
|
167
|
-
str: 'dataset' or 'model' depending on the object's module, or raises ValueError.
|
168
|
-
"""
|
169
|
-
# Obtain the class object of input_obj (for clarity and debugging)
|
170
|
-
class_obj = input_obj.__class__
|
171
|
-
|
172
|
-
# Obtain the module name as a string from the class object
|
173
|
-
class_module = class_obj.__module__
|
174
|
-
|
175
|
-
if "validmind.vm_models.dataset" in class_module:
|
176
|
-
return "dataset"
|
177
|
-
elif "validmind.models" in class_module:
|
178
|
-
return "model"
|
179
|
-
else:
|
180
|
-
raise ValueError("Input must be of type validmind Dataset or Model")
|
181
|
-
|
182
|
-
|
183
136
|
def get_metric_cache_key(metric_id, params, inputs):
|
184
137
|
cache_elements = [metric_id]
|
185
138
|
|
@@ -209,36 +162,88 @@ def get_metric_cache_key(metric_id, params, inputs):
|
|
209
162
|
return key
|
210
163
|
|
211
164
|
|
212
|
-
def
|
213
|
-
"""
|
214
|
-
|
215
|
-
This function provides a high level interface for running a single metric. A metric
|
216
|
-
is a single test that calculates a value based on the input data.
|
165
|
+
def load_metric(metric_id):
|
166
|
+
"""Load a metric class from a string
|
217
167
|
|
218
168
|
Args:
|
219
|
-
metric_id (str): The metric
|
220
|
-
params (dict): A dictionary of the metric parameters
|
169
|
+
metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.sklearn.F1')
|
221
170
|
|
222
171
|
Returns:
|
223
|
-
|
172
|
+
callable: The metric function
|
224
173
|
"""
|
225
|
-
|
174
|
+
return getattr(import_module(metric_id), metric_id.split(".")[-1])
|
226
175
|
|
227
|
-
# Check if the metric value already exists in the global variable
|
228
|
-
if cache_key in unit_metric_results_cache:
|
229
|
-
return unit_metric_results_cache[cache_key]
|
230
176
|
|
231
|
-
|
232
|
-
|
177
|
+
def run_metric(metric_id, inputs=None, params=None, show=True, value_only=False):
|
178
|
+
"""Run a single metric and cache the results
|
233
179
|
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
180
|
+
Args:
|
181
|
+
metric_id (str): The metric id (e.g. 'validmind.unit_metrics.classification.sklearn.F1')
|
182
|
+
inputs (dict): A dictionary of the metric inputs
|
183
|
+
params (dict): A dictionary of the metric parameters
|
184
|
+
show (bool): Whether to display the results
|
185
|
+
value_only (bool): Whether to return only the value
|
186
|
+
"""
|
187
|
+
inputs = inputs or {}
|
188
|
+
params = params or {}
|
239
189
|
|
240
190
|
cache_key = get_metric_cache_key(metric_id, params, inputs)
|
241
191
|
|
242
|
-
|
192
|
+
if cache_key not in unit_metric_results_cache:
|
193
|
+
metric = load_metric(metric_id)
|
194
|
+
_inputs, _params = _inspect_signature(metric)
|
195
|
+
|
196
|
+
result = metric(
|
197
|
+
**{k: v for k, v in inputs.items() if k in _inputs.keys()},
|
198
|
+
**{k: v for k, v in params.items() if k in _params.keys()},
|
199
|
+
)
|
200
|
+
unit_metric_results_cache[cache_key] = (result, list(_inputs.keys()))
|
201
|
+
|
202
|
+
value = unit_metric_results_cache[cache_key][0]
|
203
|
+
|
204
|
+
if value_only:
|
205
|
+
return value
|
206
|
+
|
207
|
+
output_template = f"""
|
208
|
+
<table>
|
209
|
+
<thead>
|
210
|
+
<tr>
|
211
|
+
<th>Metric</th>
|
212
|
+
<th>Value</th>
|
213
|
+
</tr>
|
214
|
+
</thead>
|
215
|
+
<tbody>
|
216
|
+
<tr>
|
217
|
+
<td><strong>{test_id_to_name(metric_id)}</strong></td>
|
218
|
+
<td>{value:.4f}</td>
|
219
|
+
</tr>
|
220
|
+
</tbody>
|
221
|
+
</table>
|
222
|
+
<style>
|
223
|
+
th, td {{
|
224
|
+
padding: 5px;
|
225
|
+
text-align: left;
|
226
|
+
}}
|
227
|
+
</style>
|
228
|
+
"""
|
229
|
+
result = _build_result(
|
230
|
+
results=value,
|
231
|
+
test_id=metric_id,
|
232
|
+
description="",
|
233
|
+
output_template=output_template,
|
234
|
+
inputs=unit_metric_results_cache[cache_key][1],
|
235
|
+
)
|
236
|
+
|
237
|
+
# in case the user tries to log the result object
|
238
|
+
def log(self):
|
239
|
+
raise Exception(
|
240
|
+
"Cannot log unit metrics directly..."
|
241
|
+
"You can run this unit metric as part of a composite metric and log that"
|
242
|
+
)
|
243
|
+
|
244
|
+
result.log = log
|
245
|
+
|
246
|
+
if show:
|
247
|
+
result.show()
|
243
248
|
|
244
249
|
return result
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from sklearn.metrics import accuracy_score
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("classification", "sklearn", "unit_metric")
|
11
|
+
@tasks("classification")
|
12
|
+
def Accuracy(dataset, model):
|
13
|
+
"""Calculates the accuracy of a model"""
|
14
|
+
return accuracy_score(dataset.y, dataset.y_pred(model))
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from sklearn.metrics import f1_score
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("classification", "sklearn", "unit_metric")
|
11
|
+
@tasks("classification")
|
12
|
+
def F1(model, dataset, **kwargs):
|
13
|
+
return f1_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from sklearn.metrics import precision_score
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("classification", "sklearn", "unit_metric")
|
11
|
+
@tasks("classification")
|
12
|
+
def Precision(model, dataset, **kwargs):
|
13
|
+
return precision_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from sklearn.metrics import roc_auc_score
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("classification", "sklearn", "unit_metric")
|
11
|
+
@tasks("classification")
|
12
|
+
def ROC_AUC(model, dataset, **kwargs):
|
13
|
+
return roc_auc_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from sklearn.metrics import recall_score
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("classification", "sklearn", "unit_metric")
|
11
|
+
@tasks("classification")
|
12
|
+
def Recall(model, dataset, **kwargs):
|
13
|
+
return recall_score(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -0,0 +1,228 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from typing import List, Tuple, Union
|
7
|
+
from uuid import uuid4
|
8
|
+
|
9
|
+
from ..logging import get_logger
|
10
|
+
from ..tests.decorator import _inspect_signature
|
11
|
+
from ..utils import run_async, test_id_to_name
|
12
|
+
from ..vm_models.test.metric import Metric
|
13
|
+
from ..vm_models.test.metric_result import MetricResult
|
14
|
+
from ..vm_models.test.result_summary import ResultSummary, ResultTable
|
15
|
+
from ..vm_models.test.result_wrapper import MetricResultWrapper
|
16
|
+
from . import load_metric, run_metric
|
17
|
+
|
18
|
+
logger = get_logger(__name__)
|
19
|
+
|
20
|
+
|
21
|
+
@dataclass
|
22
|
+
class CompositeMetric(Metric):
|
23
|
+
unit_metrics: List[str] = None
|
24
|
+
|
25
|
+
def __post_init__(self):
|
26
|
+
if self._unit_metrics:
|
27
|
+
self.unit_metrics = self._unit_metrics
|
28
|
+
elif self.unit_metrics is None:
|
29
|
+
raise ValueError("unit_metrics must be provided")
|
30
|
+
|
31
|
+
if hasattr(self, "_output_template") and self._output_template:
|
32
|
+
self.output_template = self._output_template
|
33
|
+
|
34
|
+
def run(self):
|
35
|
+
self.result = run_metrics(
|
36
|
+
test_id=self.test_id,
|
37
|
+
metric_ids=self.unit_metrics,
|
38
|
+
description=self.description(),
|
39
|
+
inputs=self._get_input_dict(),
|
40
|
+
params=self.params,
|
41
|
+
output_template=self.output_template,
|
42
|
+
show=False,
|
43
|
+
)
|
44
|
+
|
45
|
+
return self.result
|
46
|
+
|
47
|
+
def summary(self, result: dict):
|
48
|
+
return ResultSummary(results=[ResultTable(data=[result])])
|
49
|
+
|
50
|
+
|
51
|
+
def load_composite_metric(
|
52
|
+
test_id: str = None,
|
53
|
+
metric_name: str = None,
|
54
|
+
unit_metrics: List[str] = None,
|
55
|
+
output_template: str = None,
|
56
|
+
) -> Tuple[Union[None, str], Union[CompositeMetric, None]]:
|
57
|
+
# this function can either create a composite metric from a list of unit metrics or
|
58
|
+
# load a stored composite metric based on the test id
|
59
|
+
|
60
|
+
# TODO: figure out this circular import thing:
|
61
|
+
from ..api_client import get_metadata
|
62
|
+
|
63
|
+
if test_id:
|
64
|
+
# get the unit metric ids and output template (if any) from the metadata
|
65
|
+
try:
|
66
|
+
unit_metrics = run_async(
|
67
|
+
get_metadata, f"composite_metric_def:{test_id}:unit_metrics"
|
68
|
+
)["json"]
|
69
|
+
output_template = run_async(
|
70
|
+
get_metadata, f"composite_metric_def:{test_id}:output_template"
|
71
|
+
)["json"]["output_template"]
|
72
|
+
except Exception:
|
73
|
+
return f"Could not load composite metric {test_id}", None
|
74
|
+
|
75
|
+
description = f"""
|
76
|
+
Composite metric built from the following unit metrics:
|
77
|
+
{', '.join([metric_id.split('.')[-1] for metric_id in unit_metrics])}
|
78
|
+
"""
|
79
|
+
|
80
|
+
class_def = type(
|
81
|
+
test_id.split(".")[-1] if test_id else metric_name,
|
82
|
+
(CompositeMetric,),
|
83
|
+
{
|
84
|
+
"__doc__": description,
|
85
|
+
"_unit_metrics": unit_metrics,
|
86
|
+
"_output_template": output_template,
|
87
|
+
},
|
88
|
+
)
|
89
|
+
|
90
|
+
required_inputs = set()
|
91
|
+
for metric_id in unit_metrics:
|
92
|
+
inputs, _ = _inspect_signature(load_metric(metric_id))
|
93
|
+
required_inputs.update(inputs.keys())
|
94
|
+
|
95
|
+
class_def.required_inputs = list(required_inputs)
|
96
|
+
|
97
|
+
return None, class_def
|
98
|
+
|
99
|
+
|
100
|
+
def run_metrics(
|
101
|
+
name: str = None,
|
102
|
+
metric_ids: List[str] = None,
|
103
|
+
description: str = None,
|
104
|
+
output_template: str = None,
|
105
|
+
inputs: dict = None,
|
106
|
+
params: dict = None,
|
107
|
+
test_id: str = None,
|
108
|
+
show: bool = True,
|
109
|
+
) -> MetricResultWrapper:
|
110
|
+
"""Run a composite metric
|
111
|
+
|
112
|
+
Composite metrics are metrics that are composed of multiple unit metrics. This
|
113
|
+
works by running individual unit metrics and then combining the results into a
|
114
|
+
single "MetricResult" object that can be logged and displayed just like any other
|
115
|
+
metric result. The special thing about composite metrics is that when they are
|
116
|
+
logged to the platform, metadata describing the unit metrics and output template
|
117
|
+
used to generate the composite metric is also logged. This means that by grabbing
|
118
|
+
the metadata for a composite metric (identified by the test ID
|
119
|
+
`validmind.composite_metric.<name>`) the framework can rebuild and rerun it at
|
120
|
+
any time.
|
121
|
+
|
122
|
+
Args:
|
123
|
+
name (str, optional): Name of the composite metric. Required if test_id is not
|
124
|
+
provided. Defaults to None.
|
125
|
+
metric_ids (list[str]): List of unit metric IDs to run. Required.
|
126
|
+
description (str, optional): Description of the composite metric. Defaults to
|
127
|
+
None.
|
128
|
+
output_template (_type_, optional): Output template to customize the result
|
129
|
+
table.
|
130
|
+
inputs (_type_, optional): Inputs to pass to the unit metrics. Defaults to None
|
131
|
+
params (_type_, optional): Parameters to pass to the unit metrics. Defaults to
|
132
|
+
None.
|
133
|
+
test_id (str, optional): Test ID of the composite metric. Required if name is
|
134
|
+
not provided. Defaults to None.
|
135
|
+
show (bool, optional): Whether to show the result immediately. Defaults to True
|
136
|
+
|
137
|
+
Raises:
|
138
|
+
ValueError: If metric_ids is not provided
|
139
|
+
ValueError: If name or key is not provided
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
MetricResultWrapper: The result wrapper object
|
143
|
+
"""
|
144
|
+
if not metric_ids:
|
145
|
+
raise ValueError("metric_ids must be provided")
|
146
|
+
|
147
|
+
if not name and not test_id:
|
148
|
+
raise ValueError("name or key must be provided")
|
149
|
+
|
150
|
+
# if name is provided, make sure to squash it into a camel case string
|
151
|
+
if name:
|
152
|
+
name = "".join(word[0].upper() + word[1:] for word in name.split())
|
153
|
+
|
154
|
+
results = {}
|
155
|
+
|
156
|
+
for metric_id in metric_ids:
|
157
|
+
metric_name = test_id_to_name(metric_id)
|
158
|
+
results[metric_name] = run_metric(
|
159
|
+
metric_id=metric_id,
|
160
|
+
inputs=inputs,
|
161
|
+
params=params,
|
162
|
+
show=False,
|
163
|
+
value_only=True,
|
164
|
+
)
|
165
|
+
|
166
|
+
test_id = f"validmind.composite_metric.{name}" if not test_id else test_id
|
167
|
+
|
168
|
+
if not output_template:
|
169
|
+
|
170
|
+
def row(name):
|
171
|
+
return f"""
|
172
|
+
<tr>
|
173
|
+
<td><strong>{name}</strong></td>
|
174
|
+
<td>{{{{ value['{name}'] | number }}}}</td>
|
175
|
+
</tr>
|
176
|
+
"""
|
177
|
+
|
178
|
+
output_template = f"""
|
179
|
+
<h1{test_id_to_name(test_id)}</h1>
|
180
|
+
<table>
|
181
|
+
<thead>
|
182
|
+
<tr>
|
183
|
+
<th>Metric</th>
|
184
|
+
<th>Value</th>
|
185
|
+
</tr>
|
186
|
+
</thead>
|
187
|
+
<tbody>
|
188
|
+
{"".join([row(name) for name in results.keys()])}
|
189
|
+
</tbody>
|
190
|
+
</table>
|
191
|
+
<style>
|
192
|
+
th, td {{
|
193
|
+
padding: 5px;
|
194
|
+
text-align: left;
|
195
|
+
}}
|
196
|
+
</style>
|
197
|
+
"""
|
198
|
+
|
199
|
+
result_wrapper = MetricResultWrapper(
|
200
|
+
result_id=test_id,
|
201
|
+
result_metadata=[
|
202
|
+
{
|
203
|
+
"content_id": f"metric_description:{test_id}",
|
204
|
+
"text": description,
|
205
|
+
},
|
206
|
+
{
|
207
|
+
"content_id": f"composite_metric_def:{test_id}:unit_metrics",
|
208
|
+
"json": metric_ids,
|
209
|
+
},
|
210
|
+
{
|
211
|
+
"content_id": f"composite_metric_def:{test_id}:output_template",
|
212
|
+
"json": {"output_template": output_template},
|
213
|
+
},
|
214
|
+
],
|
215
|
+
inputs=list(inputs.keys()),
|
216
|
+
output_template=output_template,
|
217
|
+
metric=MetricResult(
|
218
|
+
key=test_id,
|
219
|
+
ref_id=str(uuid4()),
|
220
|
+
value=results,
|
221
|
+
summary=ResultSummary(results=[ResultTable(data=[results])]),
|
222
|
+
),
|
223
|
+
)
|
224
|
+
|
225
|
+
if show:
|
226
|
+
result_wrapper.show()
|
227
|
+
|
228
|
+
return result_wrapper
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("regression", "unit_metric")
|
11
|
+
@tasks("regression")
|
12
|
+
def GiniCoefficient(dataset, model):
|
13
|
+
y_true = dataset.y
|
14
|
+
y_pred = dataset.y_pred(model)
|
15
|
+
|
16
|
+
# Sort true values and corresponding predicted values
|
17
|
+
idx = np.argsort(y_true)
|
18
|
+
y_true_sorted = y_true[idx]
|
19
|
+
y_pred_sorted = y_pred[idx]
|
20
|
+
|
21
|
+
# Compute cumulative sums
|
22
|
+
cumsum_true = np.cumsum(y_true_sorted)
|
23
|
+
cumsum_pred = np.cumsum(y_pred_sorted)
|
24
|
+
|
25
|
+
# Normalize cumulative sums
|
26
|
+
cumsum_true_norm = cumsum_true / np.max(cumsum_true)
|
27
|
+
cumsum_pred_norm = cumsum_pred / np.max(cumsum_pred)
|
28
|
+
|
29
|
+
# Compute area under the Lorenz curve
|
30
|
+
area_lorenz = np.trapz(cumsum_pred_norm, x=cumsum_true_norm)
|
31
|
+
|
32
|
+
# Compute Gini coefficient
|
33
|
+
return 1 - 2 * area_lorenz
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("regression", "unit_metric")
|
11
|
+
@tasks("regression")
|
12
|
+
def HuberLoss(model, dataset):
|
13
|
+
y_true = dataset.y
|
14
|
+
y_pred = dataset.y_pred(model)
|
15
|
+
|
16
|
+
# delta - Threshold for the squared error to be linear or quadratic.
|
17
|
+
delta = 1.0
|
18
|
+
error = y_true - y_pred
|
19
|
+
|
20
|
+
quadratic_part = np.minimum(np.abs(error), delta)
|
21
|
+
linear_part = np.abs(error) - quadratic_part
|
22
|
+
|
23
|
+
return np.mean(0.5 * quadratic_part**2 + delta * linear_part)
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("regression", "unit_metric")
|
11
|
+
@tasks("regression")
|
12
|
+
def KolmogorovSmirnovStatistic(dataset, model):
|
13
|
+
y_true = dataset.y.flatten()
|
14
|
+
y_pred = dataset.y_pred(model)
|
15
|
+
|
16
|
+
# Sort true values and corresponding predicted values
|
17
|
+
idx_true = np.argsort(y_true)
|
18
|
+
idx_pred = np.argsort(y_pred)
|
19
|
+
y_true_sorted = y_true[idx_true]
|
20
|
+
y_pred_sorted = y_pred[idx_pred]
|
21
|
+
|
22
|
+
# Compute cumulative distribution functions (CDFs)
|
23
|
+
cdf_true = np.arange(1, len(y_true_sorted) + 1) / len(y_true_sorted)
|
24
|
+
cdf_pred = np.arange(1, len(y_pred_sorted) + 1) / len(y_pred_sorted)
|
25
|
+
|
26
|
+
# Compute absolute differences between CDFs
|
27
|
+
diff_cdf = np.abs(cdf_true - cdf_pred)
|
28
|
+
|
29
|
+
# Find maximum absolute difference
|
30
|
+
return np.max(diff_cdf)
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("regression", "unit_metric")
|
11
|
+
@tasks("regression")
|
12
|
+
def MeanAbsolutePercentageError(model, dataset):
|
13
|
+
y_true = dataset.y
|
14
|
+
y_pred = dataset.y_pred(model)
|
15
|
+
|
16
|
+
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("regression", "unit_metric")
|
11
|
+
@tasks("regression")
|
12
|
+
def MeanBiasDeviation(model, dataset):
|
13
|
+
return np.mean(dataset.y - dataset.y_pred(model))
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("regression", "unit_metric")
|
11
|
+
@tasks("regression")
|
12
|
+
def QuantileLoss(model, dataset, quantile=0.5):
|
13
|
+
error = dataset.y - dataset.y_pred(model)
|
14
|
+
|
15
|
+
return np.mean(np.maximum(quantile * error, (quantile - 1) * error))
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from sklearn.metrics import r2_score as _r2_score
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("regression", "sklearn", "unit_metric")
|
11
|
+
@tasks("regression")
|
12
|
+
def AdjustedRSquaredScore(model, dataset):
|
13
|
+
r2_score = _r2_score(
|
14
|
+
dataset.y,
|
15
|
+
dataset.y_pred(model),
|
16
|
+
)
|
17
|
+
|
18
|
+
row_count = len(dataset.y)
|
19
|
+
feature_count = len(dataset.get_features_columns())
|
20
|
+
|
21
|
+
return 1 - (1 - r2_score) * (row_count - 1) / (row_count - feature_count)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from sklearn.metrics import mean_absolute_error as _mean_absolute_error
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("regression", "sklearn", "unit_metric")
|
11
|
+
@tasks("regression")
|
12
|
+
def MeanAbsoluteError(model, dataset, **kwargs):
|
13
|
+
return _mean_absolute_error(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from sklearn.metrics import mean_squared_error
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("regression", "sklearn", "unit_metric")
|
11
|
+
@tasks("regression")
|
12
|
+
def MeanSquaredError(model, dataset, **kwargs):
|
13
|
+
return mean_squared_error(dataset.y, dataset.y_pred(model), **kwargs)
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from sklearn.metrics import r2_score
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
|
9
|
+
|
10
|
+
@tags("regression", "sklearn", "unit_metric")
|
11
|
+
@tasks("regression")
|
12
|
+
def RSquaredError(model, dataset):
|
13
|
+
return r2_score(dataset.y, dataset.y_pred(model))
|