validmind 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +6 -3
- validmind/__version__.py +1 -1
- validmind/ai.py +193 -0
- validmind/api_client.py +45 -31
- validmind/client.py +33 -6
- validmind/datasets/classification/customer_churn.py +2 -2
- validmind/datasets/credit_risk/__init__.py +11 -0
- validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
- validmind/datasets/credit_risk/lending_club.py +394 -0
- validmind/datasets/nlp/__init__.py +5 -0
- validmind/datasets/nlp/cnn_dailymail.py +98 -0
- validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +255 -0
- validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +1277 -0
- validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +4847 -0
- validmind/errors.py +11 -1
- validmind/logging.py +9 -2
- validmind/models/huggingface.py +2 -2
- validmind/models/pytorch.py +3 -3
- validmind/models/sklearn.py +4 -4
- validmind/template.py +2 -2
- validmind/test_suites/__init__.py +4 -2
- validmind/tests/__init__.py +130 -45
- validmind/tests/data_validation/DatasetDescription.py +0 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
- validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
- validmind/tests/data_validation/ScatterPlot.py +8 -2
- validmind/tests/data_validation/nlp/StopWords.py +1 -6
- validmind/tests/data_validation/nlp/TextDescription.py +20 -9
- validmind/tests/decorator.py +313 -0
- validmind/tests/model_validation/BertScore.py +1 -1
- validmind/tests/model_validation/BertScoreAggregate.py +1 -1
- validmind/tests/model_validation/BleuScore.py +1 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
- validmind/tests/model_validation/ContextualRecall.py +1 -1
- validmind/tests/model_validation/FeaturesAUC.py +110 -0
- validmind/tests/model_validation/MeteorScore.py +92 -0
- validmind/tests/model_validation/RegardHistogram.py +6 -7
- validmind/tests/model_validation/RegardScore.py +4 -6
- validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
- validmind/tests/model_validation/RougeMetrics.py +7 -5
- validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
- validmind/tests/model_validation/SelfCheckNLIScore.py +112 -0
- validmind/tests/model_validation/TokenDisparity.py +1 -1
- validmind/tests/model_validation/ToxicityHistogram.py +1 -1
- validmind/tests/model_validation/ToxicityScore.py +1 -1
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +17 -22
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +16 -17
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
- validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
- validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
- validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +55 -5
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +11 -5
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
- validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
- validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +2 -2
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
- validmind/tests/prompt_validation/ai_powered_test.py +2 -0
- validmind/tests/test_providers.py +14 -124
- validmind/unit_metrics/__init__.py +75 -70
- validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
- validmind/unit_metrics/classification/sklearn/F1.py +13 -0
- validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
- validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
- validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
- validmind/unit_metrics/composite.py +228 -0
- validmind/unit_metrics/regression/GiniCoefficient.py +33 -0
- validmind/unit_metrics/regression/HuberLoss.py +23 -0
- validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +30 -0
- validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +16 -0
- validmind/unit_metrics/regression/MeanBiasDeviation.py +13 -0
- validmind/unit_metrics/regression/QuantileLoss.py +15 -0
- validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +21 -0
- validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +13 -0
- validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +13 -0
- validmind/unit_metrics/regression/sklearn/RSquaredScore.py +13 -0
- validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +20 -0
- validmind/utils.py +20 -31
- validmind/vm_models/__init__.py +0 -2
- validmind/vm_models/dataset.py +623 -29
- validmind/vm_models/figure.py +52 -17
- validmind/vm_models/test/metric.py +33 -31
- validmind/vm_models/test/output_template.py +0 -27
- validmind/vm_models/test/result_wrapper.py +68 -36
- validmind/vm_models/test/test.py +4 -2
- validmind/vm_models/test/threshold_test.py +24 -14
- validmind/vm_models/test_context.py +7 -0
- validmind/vm_models/test_suite/runner.py +1 -1
- validmind/vm_models/test_suite/summary.py +3 -3
- validmind/vm_models/test_suite/test.py +1 -1
- validmind/vm_models/test_suite/test_suite.py +2 -1
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/METADATA +18 -18
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/RECORD +116 -94
- validmind-2.1.0.dist-info/entry_points.txt +3 -0
- validmind/tests/__types__.py +0 -62
- validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
- validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
- validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
- validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
- validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -20
- validmind/unit_metrics/sklearn/classification/F1.py +0 -22
- validmind/unit_metrics/sklearn/classification/Precision.py +0 -22
- validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -20
- validmind/unit_metrics/sklearn/classification/Recall.py +0 -20
- validmind/vm_models/test/unit_metric.py +0 -88
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/LICENSE +0 -0
- {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/WHEEL +0 -0
validmind/errors.py
CHANGED
@@ -48,7 +48,7 @@ class MissingCacheResultsArgumentsError(BaseError):
|
|
48
48
|
pass
|
49
49
|
|
50
50
|
|
51
|
-
class
|
51
|
+
class MissingOrInvalidModelPredictFnError(BaseError):
|
52
52
|
"""
|
53
53
|
When the pytorch model is missing a predict function or its predict
|
54
54
|
method does not have the expected arguments.
|
@@ -315,6 +315,14 @@ class UnsupportedModelError(BaseError):
|
|
315
315
|
pass
|
316
316
|
|
317
317
|
|
318
|
+
class UnsupportedModelForSHAPError(BaseError):
|
319
|
+
"""
|
320
|
+
When an unsupported model is used for SHAP importance.
|
321
|
+
"""
|
322
|
+
|
323
|
+
pass
|
324
|
+
|
325
|
+
|
318
326
|
class SkipTestError(BaseError):
|
319
327
|
"""
|
320
328
|
Useful error to throw when a test cannot be executed.
|
@@ -361,6 +369,8 @@ def should_raise_on_fail_fast(error) -> bool:
|
|
361
369
|
"""
|
362
370
|
error_class = error.__class__.__name__
|
363
371
|
return error_class not in [
|
372
|
+
"MissingOrInvalidModelPredictFnError",
|
364
373
|
"MissingRequiredTestInputError",
|
365
374
|
"SkipTestError",
|
375
|
+
"UnsupportedModelForSHAPError",
|
366
376
|
]
|
validmind/logging.py
CHANGED
@@ -68,10 +68,17 @@ def get_logger(name="validmind", log_level=None):
|
|
68
68
|
logger = logging.getLogger(name)
|
69
69
|
logger.setLevel(log_level or _get_log_level())
|
70
70
|
|
71
|
-
#
|
72
|
-
|
71
|
+
# Clear existing handlers if any (or refine the existing logic as necessary)
|
72
|
+
# TODO: lets add some better handler management
|
73
|
+
if not any(
|
74
|
+
isinstance(h, type(handler)) and h.formatter._fmt == formatter._fmt
|
75
|
+
for h in logger.handlers
|
76
|
+
):
|
73
77
|
logger.addHandler(handler)
|
74
78
|
|
79
|
+
# Prevent logger from propagating to root logger
|
80
|
+
logger.propagate = False
|
81
|
+
|
75
82
|
return logger
|
76
83
|
|
77
84
|
|
validmind/models/huggingface.py
CHANGED
@@ -6,7 +6,7 @@ from dataclasses import dataclass
|
|
6
6
|
|
7
7
|
import pandas as pd
|
8
8
|
|
9
|
-
from validmind.errors import
|
9
|
+
from validmind.errors import MissingOrInvalidModelPredictFnError
|
10
10
|
from validmind.logging import get_logger
|
11
11
|
from validmind.vm_models.model import (
|
12
12
|
ModelAttributes,
|
@@ -44,7 +44,7 @@ class HFModel(VMModel):
|
|
44
44
|
Invoke predict_proba from underline model
|
45
45
|
"""
|
46
46
|
if not has_method_with_arguments(self.model, "predict_proba", 1):
|
47
|
-
raise
|
47
|
+
raise MissingOrInvalidModelPredictFnError(
|
48
48
|
"Model requires a implementation of predict_proba method with 1 argument"
|
49
49
|
+ " that is tensor features matrix"
|
50
50
|
)
|
validmind/models/pytorch.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from validmind.errors import
|
5
|
+
from validmind.errors import MissingOrInvalidModelPredictFnError
|
6
6
|
from validmind.logging import get_logger
|
7
7
|
from validmind.vm_models.model import (
|
8
8
|
ModelAttributes,
|
@@ -41,7 +41,7 @@ class PyTorchModel(VMModel):
|
|
41
41
|
Invoke predict_proba from underline model
|
42
42
|
"""
|
43
43
|
if not has_method_with_arguments(self.model, "predict_proba", 1):
|
44
|
-
raise
|
44
|
+
raise MissingOrInvalidModelPredictFnError(
|
45
45
|
"Model requires a implemention of predict_proba method with 1 argument"
|
46
46
|
+ " that is tensor features matrix"
|
47
47
|
)
|
@@ -54,7 +54,7 @@ class PyTorchModel(VMModel):
|
|
54
54
|
Predict method for the model. This is a wrapper around the model's
|
55
55
|
"""
|
56
56
|
if not has_method_with_arguments(self.model, "predict", 1):
|
57
|
-
raise
|
57
|
+
raise MissingOrInvalidModelPredictFnError(
|
58
58
|
"Model requires a implemention of predict method with 1 argument"
|
59
59
|
+ " that is tensor features matrix"
|
60
60
|
)
|
validmind/models/sklearn.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from validmind.errors import
|
5
|
+
from validmind.errors import MissingOrInvalidModelPredictFnError
|
6
6
|
from validmind.logging import get_logger
|
7
7
|
from validmind.vm_models.model import (
|
8
8
|
ModelAttributes,
|
@@ -40,9 +40,9 @@ class SKlearnModel(VMModel):
|
|
40
40
|
predict_proba (for classification) or predict (for regression) method
|
41
41
|
"""
|
42
42
|
if not has_method_with_arguments(self.model, "predict_proba", 1):
|
43
|
-
raise
|
44
|
-
"
|
45
|
-
+ "
|
43
|
+
raise MissingOrInvalidModelPredictFnError(
|
44
|
+
f"SKlearn model {self.model.__class__} Model does not have a compatible predict_proba implementation."
|
45
|
+
+ " Please assign predictions directly with vm_dataset.assign_predictions(model, prediction_values)"
|
46
46
|
)
|
47
47
|
if callable(getattr(self.model, "predict_proba", None)):
|
48
48
|
return self.model.predict_proba(*args, **kwargs)[:, 1]
|
validmind/template.py
CHANGED
@@ -4,9 +4,9 @@
|
|
4
4
|
|
5
5
|
from pprint import pformat
|
6
6
|
|
7
|
+
import mistune
|
7
8
|
from IPython.display import display
|
8
9
|
from ipywidgets import HTML, Accordion, VBox
|
9
|
-
from markdown import markdown
|
10
10
|
|
11
11
|
from .html_templates.content_blocks import (
|
12
12
|
failed_content_block_html,
|
@@ -75,7 +75,7 @@ def _create_content_widget(content):
|
|
75
75
|
HTML(
|
76
76
|
test_content_block_html.format(
|
77
77
|
title=test_deets["Name"],
|
78
|
-
description=
|
78
|
+
description=mistune.html(test_deets["Description"]),
|
79
79
|
required_inputs=", ".join(
|
80
80
|
test_deets["Required Inputs"] or ["None"]
|
81
81
|
),
|
@@ -5,6 +5,8 @@
|
|
5
5
|
"""
|
6
6
|
Entrypoint for test suites.
|
7
7
|
"""
|
8
|
+
from inspect import getdoc
|
9
|
+
|
8
10
|
import pandas as pd
|
9
11
|
|
10
12
|
from ..logging import get_logger
|
@@ -139,7 +141,7 @@ def list_suites(pretty: bool = True):
|
|
139
141
|
{
|
140
142
|
"ID": suite_id,
|
141
143
|
"Name": test_suite.__name__,
|
142
|
-
"Description": test_suite.
|
144
|
+
"Description": getdoc(test_suite).strip(),
|
143
145
|
"Tests": ", ".join(_get_test_suite_test_ids(test_suite)),
|
144
146
|
}
|
145
147
|
)
|
@@ -167,7 +169,7 @@ def describe_suite(test_suite_id: str, verbose=False):
|
|
167
169
|
{
|
168
170
|
"ID": test_suite_id,
|
169
171
|
"Name": test_suite.__name__,
|
170
|
-
"Description": test_suite.
|
172
|
+
"Description": getdoc(test_suite).strip(),
|
171
173
|
"Tests": ", ".join(_get_test_suite_test_ids(test_suite)),
|
172
174
|
}
|
173
175
|
]
|
validmind/tests/__init__.py
CHANGED
@@ -5,23 +5,26 @@
|
|
5
5
|
"""All Tests for ValidMind"""
|
6
6
|
|
7
7
|
import importlib
|
8
|
+
import inspect
|
8
9
|
import sys
|
9
10
|
from pathlib import Path
|
10
11
|
from pprint import pformat
|
11
12
|
from typing import Dict
|
12
13
|
|
14
|
+
import mistune
|
13
15
|
import pandas as pd
|
14
16
|
from IPython.display import display
|
15
17
|
from ipywidgets import HTML
|
16
|
-
from markdown import markdown
|
17
18
|
|
18
19
|
from ..errors import LoadTestError
|
19
20
|
from ..html_templates.content_blocks import test_content_block_html
|
20
21
|
from ..logging import get_logger
|
21
|
-
from ..
|
22
|
+
from ..unit_metrics import run_metric
|
23
|
+
from ..unit_metrics.composite import load_composite_metric
|
24
|
+
from ..utils import format_dataframe, fuzzy_match, test_id_to_name
|
22
25
|
from ..vm_models import TestContext, TestInput
|
23
|
-
from .
|
24
|
-
from .test_providers import
|
26
|
+
from .decorator import metric, tags, tasks
|
27
|
+
from .test_providers import LocalTestProvider, TestProvider
|
25
28
|
|
26
29
|
logger = get_logger(__name__)
|
27
30
|
|
@@ -34,22 +37,28 @@ __all__ = [
|
|
34
37
|
"load_test",
|
35
38
|
"describe_test",
|
36
39
|
"register_test_provider",
|
37
|
-
"GithubTestProvider",
|
38
40
|
"LoadTestError",
|
39
41
|
"LocalTestProvider",
|
42
|
+
# Decorators for functional metrics
|
43
|
+
"metric",
|
44
|
+
"tags",
|
45
|
+
"tasks",
|
40
46
|
]
|
41
47
|
|
42
48
|
__tests = None
|
43
49
|
__test_classes = None
|
44
50
|
|
45
|
-
__test_providers: Dict[str,
|
51
|
+
__test_providers: Dict[str, TestProvider] = {}
|
52
|
+
__custom_tests: Dict[str, object] = {}
|
46
53
|
|
47
54
|
|
48
55
|
def _test_description(test_class, truncate=True):
|
49
|
-
|
50
|
-
return test_class.__doc__.strip().split("\n")[0] + "..."
|
56
|
+
description = inspect.getdoc(test_class).strip()
|
51
57
|
|
52
|
-
|
58
|
+
if truncate and len(description.split("\n")) > 5:
|
59
|
+
return description.strip().split("\n")[0] + "..."
|
60
|
+
|
61
|
+
return description
|
53
62
|
|
54
63
|
|
55
64
|
def _load_tests(test_ids):
|
@@ -249,55 +258,83 @@ def list_tests(filter=None, task=None, tags=None, pretty=True, truncate=True):
|
|
249
258
|
return tests
|
250
259
|
|
251
260
|
|
252
|
-
def
|
253
|
-
# Extract the test ID extension from the actual test ID when loading
|
254
|
-
# the test class. This enables us to generate multiple results for
|
255
|
-
# the same tests within the document. For instance, consider the
|
256
|
-
# test ID "validmind.data_validation.ClassImbalance:data_id_1,"
|
257
|
-
# where the test ID extension is "data_id_1".
|
261
|
+
def _load_validmind_test(test_id, reload=False):
|
258
262
|
parts = test_id.split(":")[0].split(".")
|
259
263
|
|
264
|
+
test_module = ".".join(parts[1:-1])
|
265
|
+
test_class = parts[-1]
|
266
|
+
|
260
267
|
error = None
|
261
|
-
|
268
|
+
test = None
|
262
269
|
|
263
|
-
|
264
|
-
|
265
|
-
f"Unable to load test {test_id}. "
|
266
|
-
f"No Test Provider found for the namespace: {namespace}."
|
267
|
-
)
|
270
|
+
try:
|
271
|
+
full_path = f"validmind.tests.{test_module}.{test_class}"
|
268
272
|
|
269
|
-
|
270
|
-
|
271
|
-
|
273
|
+
if reload and full_path in sys.modules:
|
274
|
+
module = importlib.reload(sys.modules[full_path])
|
275
|
+
else:
|
276
|
+
module = importlib.import_module(full_path)
|
272
277
|
|
273
|
-
|
274
|
-
|
278
|
+
test = getattr(module, test_class)
|
279
|
+
except ModuleNotFoundError as e:
|
280
|
+
error = f"Unable to load test {test_id}. {e}"
|
281
|
+
except AttributeError:
|
282
|
+
error = f"Unable to load test {test_id}. Test not in module: {test_class}"
|
283
|
+
|
284
|
+
return error, test
|
285
|
+
|
286
|
+
|
287
|
+
def load_test(test_id: str, reload=False):
|
288
|
+
"""Load a test by test ID
|
289
|
+
|
290
|
+
Test IDs are in the format `namespace.path_to_module.TestClassOrFuncName[:result_id]`.
|
291
|
+
The result ID is optional and is used to distinguish between multiple results from the
|
292
|
+
running the same test.
|
293
|
+
|
294
|
+
Args:
|
295
|
+
test_id (str): The test ID in the format `namespace.path_to_module.TestName[:result_id]`
|
296
|
+
reload (bool, optional): Whether to reload the test module. Defaults to False.
|
297
|
+
"""
|
298
|
+
# TODO: we should use a dedicated class for test IDs to handle this consistently
|
299
|
+
test_id, result_id = test_id.split(":", 1) if ":" in test_id else (test_id, None)
|
300
|
+
|
301
|
+
error = None
|
302
|
+
namespace = test_id.split(".", 1)[0]
|
303
|
+
|
304
|
+
# TODO: lets implement an extensible loading system instead of this ugly if/else
|
305
|
+
if test_id in __custom_tests:
|
306
|
+
test = __custom_tests[test_id]
|
275
307
|
|
276
|
-
|
277
|
-
|
278
|
-
else:
|
279
|
-
module = importlib.import_module(full_path)
|
308
|
+
elif test_id.startswith("validmind.composite_metric"):
|
309
|
+
error, test = load_composite_metric(test_id)
|
280
310
|
|
281
|
-
|
282
|
-
|
283
|
-
error = f"Unable to load test {test_id}. {e}"
|
284
|
-
except AttributeError:
|
285
|
-
error = f"Unable to load test {test_id}. Class not in module: {test_class}"
|
311
|
+
elif namespace == "validmind":
|
312
|
+
error, test = _load_validmind_test(test_id, reload=reload)
|
286
313
|
|
287
314
|
elif namespace in __test_providers:
|
288
315
|
try:
|
289
316
|
test = __test_providers[namespace].load_test(test_id.split(".", 1)[1])
|
290
317
|
except Exception as e:
|
291
318
|
error = (
|
292
|
-
f"Unable to load test {test_id} from test
|
319
|
+
f"Unable to load test {test_id} from test provider: "
|
293
320
|
f"{__test_providers[namespace]}\n Got Exception: {e}"
|
294
321
|
)
|
295
322
|
|
323
|
+
else:
|
324
|
+
error = f"Unable to load test {test_id}. No test provider found."
|
325
|
+
|
296
326
|
if error:
|
297
327
|
logger.error(error)
|
298
328
|
raise LoadTestError(error)
|
299
329
|
|
300
|
-
test
|
330
|
+
if inspect.isfunction(test):
|
331
|
+
# if its a function, we decorate it and then load the class
|
332
|
+
# TODO: simplify this as we move towards all functional metrics
|
333
|
+
# "_" is used here so it doesn't conflict with other test ids
|
334
|
+
metric("_")(test)
|
335
|
+
test = __custom_tests["_"]
|
336
|
+
|
337
|
+
test.test_id = f"{test_id}:{result_id}" if result_id else test_id
|
301
338
|
|
302
339
|
return test
|
303
340
|
|
@@ -322,7 +359,7 @@ def describe_test(test_id: str = None, raw: bool = False):
|
|
322
359
|
"Test Type": test.test_type,
|
323
360
|
"Required Inputs": test.required_inputs,
|
324
361
|
"Params": test.default_params or {},
|
325
|
-
"Description":
|
362
|
+
"Description": inspect.getdoc(test).strip() or "",
|
326
363
|
}
|
327
364
|
|
328
365
|
if raw:
|
@@ -332,7 +369,7 @@ def describe_test(test_id: str = None, raw: bool = False):
|
|
332
369
|
HTML(
|
333
370
|
test_content_block_html.format(
|
334
371
|
title=f'{details["Name"]}',
|
335
|
-
description=
|
372
|
+
description=mistune.html(details["Description"].strip()),
|
336
373
|
required_inputs=", ".join(details["Required Inputs"] or ["None"]),
|
337
374
|
params_table="\n".join(
|
338
375
|
[
|
@@ -346,21 +383,63 @@ def describe_test(test_id: str = None, raw: bool = False):
|
|
346
383
|
)
|
347
384
|
|
348
385
|
|
349
|
-
def run_test(
|
386
|
+
def run_test(
|
387
|
+
test_id: str = None,
|
388
|
+
name: str = None,
|
389
|
+
unit_metrics: list = None,
|
390
|
+
params: dict = None,
|
391
|
+
inputs=None,
|
392
|
+
output_template=None,
|
393
|
+
show=True,
|
394
|
+
**kwargs,
|
395
|
+
):
|
350
396
|
"""Run a test by test ID
|
351
397
|
|
352
398
|
Args:
|
353
|
-
test_id (str): The test ID
|
399
|
+
test_id (str, option): The test ID to run - required when running a single test
|
400
|
+
i.e. when not running multiple unit metrics
|
401
|
+
name (str, optional): The name of the test (used to create a composite metric
|
402
|
+
out of multiple unit metrics) - required when running multiple unit metrics
|
403
|
+
unit_metrics (list, optional): A list of unit metric IDs to run as a composite
|
404
|
+
metric - required when running multiple unit metrics
|
354
405
|
params (dict, optional): A dictionary of params to override the default params
|
355
406
|
inputs: A dictionary of test inputs to pass to the Test
|
356
407
|
output_template (str, optional): A template to use for customizing the output
|
408
|
+
show (bool, optional): Whether to display the results. Defaults to True.
|
357
409
|
**kwargs: Any extra arguments will be passed in via the TestInput object. i.e.:
|
358
410
|
- dataset: A validmind Dataset object or a Pandas DataFrame
|
359
411
|
- model: A model to use for the test
|
360
412
|
- models: A list of models to use for the test
|
361
413
|
other inputs can be accessed inside the test via `self.inputs["input_name"]`
|
362
414
|
"""
|
363
|
-
|
415
|
+
if not test_id and not name and not unit_metrics:
|
416
|
+
raise ValueError(
|
417
|
+
"`test_id` or `name` and `unit_metrics` must be provided to run a test"
|
418
|
+
)
|
419
|
+
|
420
|
+
if (unit_metrics and not name) or (name and not unit_metrics):
|
421
|
+
raise ValueError("`name` and `unit_metrics` must be provided together")
|
422
|
+
|
423
|
+
if test_id and test_id.startswith("validmind.unit_metrics"):
|
424
|
+
# TODO: as we move towards a more unified approach to metrics
|
425
|
+
# we will want to make everything functional and remove the
|
426
|
+
# separation between unit metrics and "normal" metrics
|
427
|
+
return run_metric(test_id, inputs=inputs, params=params, show=show)
|
428
|
+
|
429
|
+
if unit_metrics:
|
430
|
+
metric_id_name = "".join(word[0].upper() + word[1:] for word in name.split())
|
431
|
+
test_id = f"validmind.composite_metric.{metric_id_name}"
|
432
|
+
|
433
|
+
error, TestClass = load_composite_metric(
|
434
|
+
unit_metrics=unit_metrics, metric_name=metric_id_name
|
435
|
+
)
|
436
|
+
|
437
|
+
if error:
|
438
|
+
raise LoadTestError(error)
|
439
|
+
|
440
|
+
else:
|
441
|
+
TestClass = load_test(test_id, reload=True)
|
442
|
+
|
364
443
|
test = TestClass(
|
365
444
|
test_id=test_id,
|
366
445
|
context=TestContext(),
|
@@ -370,16 +449,22 @@ def run_test(test_id, params: dict = None, inputs=None, output_template=None, **
|
|
370
449
|
)
|
371
450
|
|
372
451
|
test.run()
|
373
|
-
|
452
|
+
|
453
|
+
if show:
|
454
|
+
test.result.show()
|
374
455
|
|
375
456
|
return test.result
|
376
457
|
|
377
458
|
|
378
|
-
def register_test_provider(namespace: str, test_provider:
|
459
|
+
def register_test_provider(namespace: str, test_provider: TestProvider) -> None:
|
379
460
|
"""Register an external test provider
|
380
461
|
|
381
462
|
Args:
|
382
463
|
namespace (str): The namespace of the test provider
|
383
|
-
test_provider (
|
464
|
+
test_provider (TestProvider): The test provider
|
384
465
|
"""
|
385
466
|
__test_providers[namespace] = test_provider
|
467
|
+
|
468
|
+
|
469
|
+
def _register_custom_test(test_id: str, test_class: object):
|
470
|
+
__custom_tests[test_id] = test_class
|
@@ -74,7 +74,9 @@ class FeatureTargetCorrelationPlot(Metric):
|
|
74
74
|
|
75
75
|
def visualize_feature_target_correlation(self, df, target_column, fig_height):
|
76
76
|
# Compute correlations with the target variable
|
77
|
-
correlations =
|
77
|
+
correlations = (
|
78
|
+
df.corr(numeric_only=True)[target_column].drop(target_column).to_frame()
|
79
|
+
)
|
78
80
|
correlations = correlations.loc[:, ~correlations.columns.duplicated()]
|
79
81
|
|
80
82
|
correlations = correlations.sort_values(by=target_column, ascending=True)
|
@@ -113,7 +113,7 @@ class PiTCreditScoresHistogram(Metric):
|
|
113
113
|
)
|
114
114
|
predicted_default_column = (
|
115
115
|
self.params.get("predicted_default_column")
|
116
|
-
or self.inputs.dataset.y_pred(self.inputs.model
|
116
|
+
or self.inputs.dataset.y_pred(self.inputs.model),
|
117
117
|
)
|
118
118
|
scores_column = self.params["scores_column"]
|
119
119
|
point_in_time_column = self.params["point_in_time_column"]
|
@@ -65,8 +65,14 @@ class ScatterPlot(Metric):
|
|
65
65
|
if not set(columns).issubset(set(df.columns)):
|
66
66
|
raise ValueError("Provided 'columns' must exist in the dataset")
|
67
67
|
|
68
|
-
sns.pairplot(data=df, diag_kind="kde")
|
69
|
-
|
68
|
+
g = sns.pairplot(data=df, diag_kind="kde")
|
69
|
+
for ax in g.axes.flatten():
|
70
|
+
# rotate x axis labels
|
71
|
+
ax.set_xlabel(ax.get_xlabel(), rotation=45)
|
72
|
+
# rotate y axis labels
|
73
|
+
ax.set_ylabel(ax.get_ylabel(), rotation=45)
|
74
|
+
# set y labels alignment
|
75
|
+
ax.yaxis.get_label().set_horizontalalignment("right")
|
70
76
|
# Get the current figure
|
71
77
|
fig = plt.gcf()
|
72
78
|
|
@@ -22,7 +22,6 @@ from validmind.vm_models import (
|
|
22
22
|
ResultTableMetadata,
|
23
23
|
ThresholdTest,
|
24
24
|
ThresholdTestResult,
|
25
|
-
VMDataset,
|
26
25
|
)
|
27
26
|
|
28
27
|
|
@@ -86,17 +85,13 @@ class StopWords(ThresholdTest):
|
|
86
85
|
ResultTable(
|
87
86
|
data=df,
|
88
87
|
metadata=ResultTableMetadata(
|
89
|
-
title=f"
|
88
|
+
title=f"Stop words results for column '{self.inputs.dataset.target_column}'"
|
90
89
|
),
|
91
90
|
)
|
92
91
|
]
|
93
92
|
)
|
94
93
|
|
95
94
|
def run(self):
|
96
|
-
# Can only run this test if we have a Dataset object
|
97
|
-
if not isinstance(self.inputs.dataset, VMDataset):
|
98
|
-
raise ValueError("ClassImbalance requires a validmind Dataset object")
|
99
|
-
|
100
95
|
text_column = self.inputs.dataset.text_column
|
101
96
|
|
102
97
|
def create_corpus(df, text_column):
|
@@ -92,9 +92,12 @@ class TextDescription(Metric):
|
|
92
92
|
total_words = len(words)
|
93
93
|
total_sentences = len(sentences)
|
94
94
|
avg_sentence_length = round(
|
95
|
-
|
96
|
-
|
97
|
-
|
95
|
+
(
|
96
|
+
sum(len(sentence.split()) for sentence in sentences)
|
97
|
+
/ total_sentences
|
98
|
+
if total_sentences
|
99
|
+
else 0
|
100
|
+
),
|
98
101
|
1,
|
99
102
|
)
|
100
103
|
total_paragraphs = len(paragraphs)
|
@@ -161,9 +164,13 @@ class TextDescription(Metric):
|
|
161
164
|
return combined_df
|
162
165
|
|
163
166
|
def run(self):
|
167
|
+
# Enforce that text_column must be provided as part of the params
|
168
|
+
if self.inputs.dataset.text_column is None:
|
169
|
+
raise ValueError("A 'text_column' must be provided to run this test.")
|
170
|
+
|
164
171
|
# Can only run this test if we have a Dataset object
|
165
172
|
if not isinstance(self.inputs.dataset, VMDataset):
|
166
|
-
raise ValueError("
|
173
|
+
raise ValueError("TextDescription requires a validmind Dataset object")
|
167
174
|
|
168
175
|
df_text_description = self.text_description_table(
|
169
176
|
self.inputs.dataset.df, self.params
|
@@ -177,27 +184,31 @@ class TextDescription(Metric):
|
|
177
184
|
("Total Unique Words", "Lexical Diversity"),
|
178
185
|
]
|
179
186
|
params = {"combinations_to_plot": combinations_to_plot}
|
180
|
-
figures = self.
|
187
|
+
figures = self.text_description_plots(df_text_description, params)
|
181
188
|
|
182
189
|
return self.cache_results(
|
183
190
|
figures=figures,
|
184
191
|
)
|
185
192
|
|
186
193
|
# Function to plot scatter plots for specified combinations using Plotly
|
187
|
-
def
|
194
|
+
def text_description_plots(self, df, params):
|
188
195
|
combinations_to_plot = params["combinations_to_plot"]
|
189
196
|
figures = []
|
190
197
|
# Create hist plots for each column
|
191
198
|
for i, column in enumerate(df.columns):
|
192
199
|
fig = px.histogram(df, x=column)
|
193
200
|
fig.update_layout(bargap=0.2)
|
194
|
-
|
201
|
+
# Generate a unique key for each histogram using the column name and index
|
202
|
+
histogram_key = f"{self.name}_histogram_{column}_{i}"
|
203
|
+
figures.append(Figure(for_object=self, key=histogram_key, figure=fig))
|
195
204
|
|
196
|
-
for metric1, metric2 in combinations_to_plot:
|
205
|
+
for j, (metric1, metric2) in enumerate(combinations_to_plot):
|
197
206
|
fig = px.scatter(
|
198
207
|
df, x=metric1, y=metric2, title=f"Scatter Plot: {metric1} vs {metric2}"
|
199
208
|
)
|
200
|
-
|
209
|
+
# Generate a unique key for each scatter plot using the metric names and index
|
210
|
+
scatter_key = f"{self.name}_scatter_{metric1}_vs_{metric2}_{j}"
|
211
|
+
figures.append(Figure(for_object=self, key=scatter_key, figure=fig))
|
201
212
|
plt.close("all")
|
202
213
|
|
203
214
|
return figures
|