validmind 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. validmind/__init__.py +6 -3
  2. validmind/__version__.py +1 -1
  3. validmind/ai.py +193 -0
  4. validmind/api_client.py +45 -31
  5. validmind/client.py +33 -6
  6. validmind/datasets/classification/customer_churn.py +2 -2
  7. validmind/datasets/credit_risk/__init__.py +11 -0
  8. validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
  9. validmind/datasets/credit_risk/lending_club.py +394 -0
  10. validmind/datasets/nlp/__init__.py +5 -0
  11. validmind/datasets/nlp/cnn_dailymail.py +98 -0
  12. validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +255 -0
  13. validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +1277 -0
  14. validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +4847 -0
  15. validmind/errors.py +11 -1
  16. validmind/logging.py +9 -2
  17. validmind/models/huggingface.py +2 -2
  18. validmind/models/pytorch.py +3 -3
  19. validmind/models/sklearn.py +4 -4
  20. validmind/template.py +2 -2
  21. validmind/test_suites/__init__.py +4 -2
  22. validmind/tests/__init__.py +130 -45
  23. validmind/tests/data_validation/DatasetDescription.py +0 -1
  24. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  25. validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
  26. validmind/tests/data_validation/ScatterPlot.py +8 -2
  27. validmind/tests/data_validation/nlp/StopWords.py +1 -6
  28. validmind/tests/data_validation/nlp/TextDescription.py +20 -9
  29. validmind/tests/decorator.py +313 -0
  30. validmind/tests/model_validation/BertScore.py +1 -1
  31. validmind/tests/model_validation/BertScoreAggregate.py +1 -1
  32. validmind/tests/model_validation/BleuScore.py +1 -1
  33. validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
  34. validmind/tests/model_validation/ContextualRecall.py +1 -1
  35. validmind/tests/model_validation/FeaturesAUC.py +110 -0
  36. validmind/tests/model_validation/MeteorScore.py +92 -0
  37. validmind/tests/model_validation/RegardHistogram.py +6 -7
  38. validmind/tests/model_validation/RegardScore.py +4 -6
  39. validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
  40. validmind/tests/model_validation/RougeMetrics.py +7 -5
  41. validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
  42. validmind/tests/model_validation/SelfCheckNLIScore.py +112 -0
  43. validmind/tests/model_validation/TokenDisparity.py +1 -1
  44. validmind/tests/model_validation/ToxicityHistogram.py +1 -1
  45. validmind/tests/model_validation/ToxicityScore.py +1 -1
  46. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  47. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
  48. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +17 -22
  49. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
  50. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +16 -17
  51. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
  52. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  53. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
  54. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
  55. validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
  56. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
  57. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
  58. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
  59. validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
  60. validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
  61. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
  62. validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
  63. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +55 -5
  64. validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
  65. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +11 -5
  66. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
  67. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
  68. validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
  69. validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
  70. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
  71. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
  72. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
  73. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
  74. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
  75. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +2 -2
  76. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
  77. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
  78. validmind/tests/prompt_validation/ai_powered_test.py +2 -0
  79. validmind/tests/test_providers.py +14 -124
  80. validmind/unit_metrics/__init__.py +75 -70
  81. validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
  82. validmind/unit_metrics/classification/sklearn/F1.py +13 -0
  83. validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
  84. validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
  85. validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
  86. validmind/unit_metrics/composite.py +228 -0
  87. validmind/unit_metrics/regression/GiniCoefficient.py +33 -0
  88. validmind/unit_metrics/regression/HuberLoss.py +23 -0
  89. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +30 -0
  90. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +16 -0
  91. validmind/unit_metrics/regression/MeanBiasDeviation.py +13 -0
  92. validmind/unit_metrics/regression/QuantileLoss.py +15 -0
  93. validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +21 -0
  94. validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +13 -0
  95. validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +13 -0
  96. validmind/unit_metrics/regression/sklearn/RSquaredScore.py +13 -0
  97. validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +20 -0
  98. validmind/utils.py +20 -31
  99. validmind/vm_models/__init__.py +0 -2
  100. validmind/vm_models/dataset.py +623 -29
  101. validmind/vm_models/figure.py +52 -17
  102. validmind/vm_models/test/metric.py +33 -31
  103. validmind/vm_models/test/output_template.py +0 -27
  104. validmind/vm_models/test/result_wrapper.py +68 -36
  105. validmind/vm_models/test/test.py +4 -2
  106. validmind/vm_models/test/threshold_test.py +24 -14
  107. validmind/vm_models/test_context.py +7 -0
  108. validmind/vm_models/test_suite/runner.py +1 -1
  109. validmind/vm_models/test_suite/summary.py +3 -3
  110. validmind/vm_models/test_suite/test.py +1 -1
  111. validmind/vm_models/test_suite/test_suite.py +2 -1
  112. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/METADATA +18 -18
  113. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/RECORD +116 -94
  114. validmind-2.1.0.dist-info/entry_points.txt +3 -0
  115. validmind/tests/__types__.py +0 -62
  116. validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
  117. validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
  118. validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
  119. validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
  120. validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -20
  121. validmind/unit_metrics/sklearn/classification/F1.py +0 -22
  122. validmind/unit_metrics/sklearn/classification/Precision.py +0 -22
  123. validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -20
  124. validmind/unit_metrics/sklearn/classification/Recall.py +0 -20
  125. validmind/vm_models/test/unit_metric.py +0 -88
  126. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/LICENSE +0 -0
  127. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/WHEEL +0 -0
validmind/errors.py CHANGED
@@ -48,7 +48,7 @@ class MissingCacheResultsArgumentsError(BaseError):
48
48
  pass
49
49
 
50
50
 
51
- class MissingModelPredictFnError(BaseError):
51
+ class MissingOrInvalidModelPredictFnError(BaseError):
52
52
  """
53
53
  When the pytorch model is missing a predict function or its predict
54
54
  method does not have the expected arguments.
@@ -315,6 +315,14 @@ class UnsupportedModelError(BaseError):
315
315
  pass
316
316
 
317
317
 
318
+ class UnsupportedModelForSHAPError(BaseError):
319
+ """
320
+ When an unsupported model is used for SHAP importance.
321
+ """
322
+
323
+ pass
324
+
325
+
318
326
  class SkipTestError(BaseError):
319
327
  """
320
328
  Useful error to throw when a test cannot be executed.
@@ -361,6 +369,8 @@ def should_raise_on_fail_fast(error) -> bool:
361
369
  """
362
370
  error_class = error.__class__.__name__
363
371
  return error_class not in [
372
+ "MissingOrInvalidModelPredictFnError",
364
373
  "MissingRequiredTestInputError",
365
374
  "SkipTestError",
375
+ "UnsupportedModelForSHAPError",
366
376
  ]
validmind/logging.py CHANGED
@@ -68,10 +68,17 @@ def get_logger(name="validmind", log_level=None):
68
68
  logger = logging.getLogger(name)
69
69
  logger.setLevel(log_level or _get_log_level())
70
70
 
71
- # Check if the handler is already added
72
- if not any(isinstance(h, type(handler)) for h in logger.handlers):
71
+ # Clear existing handlers if any (or refine the existing logic as necessary)
72
+ # TODO: lets add some better handler management
73
+ if not any(
74
+ isinstance(h, type(handler)) and h.formatter._fmt == formatter._fmt
75
+ for h in logger.handlers
76
+ ):
73
77
  logger.addHandler(handler)
74
78
 
79
+ # Prevent logger from propagating to root logger
80
+ logger.propagate = False
81
+
75
82
  return logger
76
83
 
77
84
 
@@ -6,7 +6,7 @@ from dataclasses import dataclass
6
6
 
7
7
  import pandas as pd
8
8
 
9
- from validmind.errors import MissingModelPredictFnError
9
+ from validmind.errors import MissingOrInvalidModelPredictFnError
10
10
  from validmind.logging import get_logger
11
11
  from validmind.vm_models.model import (
12
12
  ModelAttributes,
@@ -44,7 +44,7 @@ class HFModel(VMModel):
44
44
  Invoke predict_proba from underline model
45
45
  """
46
46
  if not has_method_with_arguments(self.model, "predict_proba", 1):
47
- raise MissingModelPredictFnError(
47
+ raise MissingOrInvalidModelPredictFnError(
48
48
  "Model requires a implementation of predict_proba method with 1 argument"
49
49
  + " that is tensor features matrix"
50
50
  )
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from validmind.errors import MissingModelPredictFnError
5
+ from validmind.errors import MissingOrInvalidModelPredictFnError
6
6
  from validmind.logging import get_logger
7
7
  from validmind.vm_models.model import (
8
8
  ModelAttributes,
@@ -41,7 +41,7 @@ class PyTorchModel(VMModel):
41
41
  Invoke predict_proba from underline model
42
42
  """
43
43
  if not has_method_with_arguments(self.model, "predict_proba", 1):
44
- raise MissingModelPredictFnError(
44
+ raise MissingOrInvalidModelPredictFnError(
45
45
  "Model requires a implemention of predict_proba method with 1 argument"
46
46
  + " that is tensor features matrix"
47
47
  )
@@ -54,7 +54,7 @@ class PyTorchModel(VMModel):
54
54
  Predict method for the model. This is a wrapper around the model's
55
55
  """
56
56
  if not has_method_with_arguments(self.model, "predict", 1):
57
- raise MissingModelPredictFnError(
57
+ raise MissingOrInvalidModelPredictFnError(
58
58
  "Model requires a implemention of predict method with 1 argument"
59
59
  + " that is tensor features matrix"
60
60
  )
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from validmind.errors import MissingModelPredictFnError
5
+ from validmind.errors import MissingOrInvalidModelPredictFnError
6
6
  from validmind.logging import get_logger
7
7
  from validmind.vm_models.model import (
8
8
  ModelAttributes,
@@ -40,9 +40,9 @@ class SKlearnModel(VMModel):
40
40
  predict_proba (for classification) or predict (for regression) method
41
41
  """
42
42
  if not has_method_with_arguments(self.model, "predict_proba", 1):
43
- raise MissingModelPredictFnError(
44
- "Model requires a implemention of predict_proba method with 1 argument"
45
- + " that is features matrix"
43
+ raise MissingOrInvalidModelPredictFnError(
44
+ f"SKlearn model {self.model.__class__} Model does not have a compatible predict_proba implementation."
45
+ + " Please assign predictions directly with vm_dataset.assign_predictions(model, prediction_values)"
46
46
  )
47
47
  if callable(getattr(self.model, "predict_proba", None)):
48
48
  return self.model.predict_proba(*args, **kwargs)[:, 1]
validmind/template.py CHANGED
@@ -4,9 +4,9 @@
4
4
 
5
5
  from pprint import pformat
6
6
 
7
+ import mistune
7
8
  from IPython.display import display
8
9
  from ipywidgets import HTML, Accordion, VBox
9
- from markdown import markdown
10
10
 
11
11
  from .html_templates.content_blocks import (
12
12
  failed_content_block_html,
@@ -75,7 +75,7 @@ def _create_content_widget(content):
75
75
  HTML(
76
76
  test_content_block_html.format(
77
77
  title=test_deets["Name"],
78
- description=markdown(test_deets["Description"]),
78
+ description=mistune.html(test_deets["Description"]),
79
79
  required_inputs=", ".join(
80
80
  test_deets["Required Inputs"] or ["None"]
81
81
  ),
@@ -5,6 +5,8 @@
5
5
  """
6
6
  Entrypoint for test suites.
7
7
  """
8
+ from inspect import getdoc
9
+
8
10
  import pandas as pd
9
11
 
10
12
  from ..logging import get_logger
@@ -139,7 +141,7 @@ def list_suites(pretty: bool = True):
139
141
  {
140
142
  "ID": suite_id,
141
143
  "Name": test_suite.__name__,
142
- "Description": test_suite.__doc__.strip(),
144
+ "Description": getdoc(test_suite).strip(),
143
145
  "Tests": ", ".join(_get_test_suite_test_ids(test_suite)),
144
146
  }
145
147
  )
@@ -167,7 +169,7 @@ def describe_suite(test_suite_id: str, verbose=False):
167
169
  {
168
170
  "ID": test_suite_id,
169
171
  "Name": test_suite.__name__,
170
- "Description": test_suite.__doc__.strip(),
172
+ "Description": getdoc(test_suite).strip(),
171
173
  "Tests": ", ".join(_get_test_suite_test_ids(test_suite)),
172
174
  }
173
175
  ]
@@ -5,23 +5,26 @@
5
5
  """All Tests for ValidMind"""
6
6
 
7
7
  import importlib
8
+ import inspect
8
9
  import sys
9
10
  from pathlib import Path
10
11
  from pprint import pformat
11
12
  from typing import Dict
12
13
 
14
+ import mistune
13
15
  import pandas as pd
14
16
  from IPython.display import display
15
17
  from ipywidgets import HTML
16
- from markdown import markdown
17
18
 
18
19
  from ..errors import LoadTestError
19
20
  from ..html_templates.content_blocks import test_content_block_html
20
21
  from ..logging import get_logger
21
- from ..utils import clean_docstring, format_dataframe, fuzzy_match, test_id_to_name
22
+ from ..unit_metrics import run_metric
23
+ from ..unit_metrics.composite import load_composite_metric
24
+ from ..utils import format_dataframe, fuzzy_match, test_id_to_name
22
25
  from ..vm_models import TestContext, TestInput
23
- from .__types__ import ExternalTestProvider
24
- from .test_providers import GithubTestProvider, LocalTestProvider
26
+ from .decorator import metric, tags, tasks
27
+ from .test_providers import LocalTestProvider, TestProvider
25
28
 
26
29
  logger = get_logger(__name__)
27
30
 
@@ -34,22 +37,28 @@ __all__ = [
34
37
  "load_test",
35
38
  "describe_test",
36
39
  "register_test_provider",
37
- "GithubTestProvider",
38
40
  "LoadTestError",
39
41
  "LocalTestProvider",
42
+ # Decorators for functional metrics
43
+ "metric",
44
+ "tags",
45
+ "tasks",
40
46
  ]
41
47
 
42
48
  __tests = None
43
49
  __test_classes = None
44
50
 
45
- __test_providers: Dict[str, ExternalTestProvider] = {}
51
+ __test_providers: Dict[str, TestProvider] = {}
52
+ __custom_tests: Dict[str, object] = {}
46
53
 
47
54
 
48
55
  def _test_description(test_class, truncate=True):
49
- if truncate and len(test_class.__doc__.split("\n")) > 5:
50
- return test_class.__doc__.strip().split("\n")[0] + "..."
56
+ description = inspect.getdoc(test_class).strip()
51
57
 
52
- return test_class.__doc__
58
+ if truncate and len(description.split("\n")) > 5:
59
+ return description.strip().split("\n")[0] + "..."
60
+
61
+ return description
53
62
 
54
63
 
55
64
  def _load_tests(test_ids):
@@ -249,55 +258,83 @@ def list_tests(filter=None, task=None, tags=None, pretty=True, truncate=True):
249
258
  return tests
250
259
 
251
260
 
252
- def load_test(test_id, reload=False): # noqa: C901
253
- # Extract the test ID extension from the actual test ID when loading
254
- # the test class. This enables us to generate multiple results for
255
- # the same tests within the document. For instance, consider the
256
- # test ID "validmind.data_validation.ClassImbalance:data_id_1,"
257
- # where the test ID extension is "data_id_1".
261
+ def _load_validmind_test(test_id, reload=False):
258
262
  parts = test_id.split(":")[0].split(".")
259
263
 
264
+ test_module = ".".join(parts[1:-1])
265
+ test_class = parts[-1]
266
+
260
267
  error = None
261
- namespace = parts[0]
268
+ test = None
262
269
 
263
- if namespace != "validmind" and namespace not in __test_providers:
264
- error = (
265
- f"Unable to load test {test_id}. "
266
- f"No Test Provider found for the namespace: {namespace}."
267
- )
270
+ try:
271
+ full_path = f"validmind.tests.{test_module}.{test_class}"
268
272
 
269
- if namespace == "validmind":
270
- test_module = ".".join(parts[1:-1])
271
- test_class = parts[-1]
273
+ if reload and full_path in sys.modules:
274
+ module = importlib.reload(sys.modules[full_path])
275
+ else:
276
+ module = importlib.import_module(full_path)
272
277
 
273
- try:
274
- full_path = f"validmind.tests.{test_module}.{test_class}"
278
+ test = getattr(module, test_class)
279
+ except ModuleNotFoundError as e:
280
+ error = f"Unable to load test {test_id}. {e}"
281
+ except AttributeError:
282
+ error = f"Unable to load test {test_id}. Test not in module: {test_class}"
283
+
284
+ return error, test
285
+
286
+
287
+ def load_test(test_id: str, reload=False):
288
+ """Load a test by test ID
289
+
290
+ Test IDs are in the format `namespace.path_to_module.TestClassOrFuncName[:result_id]`.
291
+ The result ID is optional and is used to distinguish between multiple results from the
292
+ running the same test.
293
+
294
+ Args:
295
+ test_id (str): The test ID in the format `namespace.path_to_module.TestName[:result_id]`
296
+ reload (bool, optional): Whether to reload the test module. Defaults to False.
297
+ """
298
+ # TODO: we should use a dedicated class for test IDs to handle this consistently
299
+ test_id, result_id = test_id.split(":", 1) if ":" in test_id else (test_id, None)
300
+
301
+ error = None
302
+ namespace = test_id.split(".", 1)[0]
303
+
304
+ # TODO: lets implement an extensible loading system instead of this ugly if/else
305
+ if test_id in __custom_tests:
306
+ test = __custom_tests[test_id]
275
307
 
276
- if reload and full_path in sys.modules:
277
- module = importlib.reload(sys.modules[full_path])
278
- else:
279
- module = importlib.import_module(full_path)
308
+ elif test_id.startswith("validmind.composite_metric"):
309
+ error, test = load_composite_metric(test_id)
280
310
 
281
- test = getattr(module, test_class)
282
- except ModuleNotFoundError as e:
283
- error = f"Unable to load test {test_id}. {e}"
284
- except AttributeError:
285
- error = f"Unable to load test {test_id}. Class not in module: {test_class}"
311
+ elif namespace == "validmind":
312
+ error, test = _load_validmind_test(test_id, reload=reload)
286
313
 
287
314
  elif namespace in __test_providers:
288
315
  try:
289
316
  test = __test_providers[namespace].load_test(test_id.split(".", 1)[1])
290
317
  except Exception as e:
291
318
  error = (
292
- f"Unable to load test {test_id} from test provider: "
319
+ f"Unable to load test {test_id} from test provider: "
293
320
  f"{__test_providers[namespace]}\n Got Exception: {e}"
294
321
  )
295
322
 
323
+ else:
324
+ error = f"Unable to load test {test_id}. No test provider found."
325
+
296
326
  if error:
297
327
  logger.error(error)
298
328
  raise LoadTestError(error)
299
329
 
300
- test.test_id = test_id
330
+ if inspect.isfunction(test):
331
+ # if its a function, we decorate it and then load the class
332
+ # TODO: simplify this as we move towards all functional metrics
333
+ # "_" is used here so it doesn't conflict with other test ids
334
+ metric("_")(test)
335
+ test = __custom_tests["_"]
336
+
337
+ test.test_id = f"{test_id}:{result_id}" if result_id else test_id
301
338
 
302
339
  return test
303
340
 
@@ -322,7 +359,7 @@ def describe_test(test_id: str = None, raw: bool = False):
322
359
  "Test Type": test.test_type,
323
360
  "Required Inputs": test.required_inputs,
324
361
  "Params": test.default_params or {},
325
- "Description": clean_docstring(test.__doc__),
362
+ "Description": inspect.getdoc(test).strip() or "",
326
363
  }
327
364
 
328
365
  if raw:
@@ -332,7 +369,7 @@ def describe_test(test_id: str = None, raw: bool = False):
332
369
  HTML(
333
370
  test_content_block_html.format(
334
371
  title=f'{details["Name"]}',
335
- description=markdown(details["Description"]),
372
+ description=mistune.html(details["Description"].strip()),
336
373
  required_inputs=", ".join(details["Required Inputs"] or ["None"]),
337
374
  params_table="\n".join(
338
375
  [
@@ -346,21 +383,63 @@ def describe_test(test_id: str = None, raw: bool = False):
346
383
  )
347
384
 
348
385
 
349
- def run_test(test_id, params: dict = None, inputs=None, output_template=None, **kwargs):
386
+ def run_test(
387
+ test_id: str = None,
388
+ name: str = None,
389
+ unit_metrics: list = None,
390
+ params: dict = None,
391
+ inputs=None,
392
+ output_template=None,
393
+ show=True,
394
+ **kwargs,
395
+ ):
350
396
  """Run a test by test ID
351
397
 
352
398
  Args:
353
- test_id (str): The test ID
399
+ test_id (str, option): The test ID to run - required when running a single test
400
+ i.e. when not running multiple unit metrics
401
+ name (str, optional): The name of the test (used to create a composite metric
402
+ out of multiple unit metrics) - required when running multiple unit metrics
403
+ unit_metrics (list, optional): A list of unit metric IDs to run as a composite
404
+ metric - required when running multiple unit metrics
354
405
  params (dict, optional): A dictionary of params to override the default params
355
406
  inputs: A dictionary of test inputs to pass to the Test
356
407
  output_template (str, optional): A template to use for customizing the output
408
+ show (bool, optional): Whether to display the results. Defaults to True.
357
409
  **kwargs: Any extra arguments will be passed in via the TestInput object. i.e.:
358
410
  - dataset: A validmind Dataset object or a Pandas DataFrame
359
411
  - model: A model to use for the test
360
412
  - models: A list of models to use for the test
361
413
  other inputs can be accessed inside the test via `self.inputs["input_name"]`
362
414
  """
363
- TestClass = load_test(test_id, reload=True)
415
+ if not test_id and not name and not unit_metrics:
416
+ raise ValueError(
417
+ "`test_id` or `name` and `unit_metrics` must be provided to run a test"
418
+ )
419
+
420
+ if (unit_metrics and not name) or (name and not unit_metrics):
421
+ raise ValueError("`name` and `unit_metrics` must be provided together")
422
+
423
+ if test_id and test_id.startswith("validmind.unit_metrics"):
424
+ # TODO: as we move towards a more unified approach to metrics
425
+ # we will want to make everything functional and remove the
426
+ # separation between unit metrics and "normal" metrics
427
+ return run_metric(test_id, inputs=inputs, params=params, show=show)
428
+
429
+ if unit_metrics:
430
+ metric_id_name = "".join(word[0].upper() + word[1:] for word in name.split())
431
+ test_id = f"validmind.composite_metric.{metric_id_name}"
432
+
433
+ error, TestClass = load_composite_metric(
434
+ unit_metrics=unit_metrics, metric_name=metric_id_name
435
+ )
436
+
437
+ if error:
438
+ raise LoadTestError(error)
439
+
440
+ else:
441
+ TestClass = load_test(test_id, reload=True)
442
+
364
443
  test = TestClass(
365
444
  test_id=test_id,
366
445
  context=TestContext(),
@@ -370,16 +449,22 @@ def run_test(test_id, params: dict = None, inputs=None, output_template=None, **
370
449
  )
371
450
 
372
451
  test.run()
373
- test.result.show()
452
+
453
+ if show:
454
+ test.result.show()
374
455
 
375
456
  return test.result
376
457
 
377
458
 
378
- def register_test_provider(namespace: str, test_provider: ExternalTestProvider) -> None:
459
+ def register_test_provider(namespace: str, test_provider: TestProvider) -> None:
379
460
  """Register an external test provider
380
461
 
381
462
  Args:
382
463
  namespace (str): The namespace of the test provider
383
- test_provider (ExternalTestProvider): The test provider
464
+ test_provider (TestProvider): The test provider
384
465
  """
385
466
  __test_providers[namespace] = test_provider
467
+
468
+
469
+ def _register_custom_test(test_id: str, test_class: object):
470
+ __custom_tests[test_id] = test_class
@@ -122,7 +122,6 @@ class DatasetDescription(Metric):
122
122
  return self.cache_results(results)
123
123
 
124
124
  def infer_datatype(self, df):
125
-
126
125
  vm_dataset_variables = {}
127
126
  typeset = ProfilingTypeSet(Settings())
128
127
  variable_types = typeset.infer_type(df)
@@ -74,7 +74,9 @@ class FeatureTargetCorrelationPlot(Metric):
74
74
 
75
75
  def visualize_feature_target_correlation(self, df, target_column, fig_height):
76
76
  # Compute correlations with the target variable
77
- correlations = df.corr(numeric_only=True)[target_column].drop(target_column)
77
+ correlations = (
78
+ df.corr(numeric_only=True)[target_column].drop(target_column).to_frame()
79
+ )
78
80
  correlations = correlations.loc[:, ~correlations.columns.duplicated()]
79
81
 
80
82
  correlations = correlations.sort_values(by=target_column, ascending=True)
@@ -113,7 +113,7 @@ class PiTCreditScoresHistogram(Metric):
113
113
  )
114
114
  predicted_default_column = (
115
115
  self.params.get("predicted_default_column")
116
- or self.inputs.dataset.y_pred(self.inputs.model.input_id),
116
+ or self.inputs.dataset.y_pred(self.inputs.model),
117
117
  )
118
118
  scores_column = self.params["scores_column"]
119
119
  point_in_time_column = self.params["point_in_time_column"]
@@ -65,8 +65,14 @@ class ScatterPlot(Metric):
65
65
  if not set(columns).issubset(set(df.columns)):
66
66
  raise ValueError("Provided 'columns' must exist in the dataset")
67
67
 
68
- sns.pairplot(data=df, diag_kind="kde")
69
-
68
+ g = sns.pairplot(data=df, diag_kind="kde")
69
+ for ax in g.axes.flatten():
70
+ # rotate x axis labels
71
+ ax.set_xlabel(ax.get_xlabel(), rotation=45)
72
+ # rotate y axis labels
73
+ ax.set_ylabel(ax.get_ylabel(), rotation=45)
74
+ # set y labels alignment
75
+ ax.yaxis.get_label().set_horizontalalignment("right")
70
76
  # Get the current figure
71
77
  fig = plt.gcf()
72
78
 
@@ -22,7 +22,6 @@ from validmind.vm_models import (
22
22
  ResultTableMetadata,
23
23
  ThresholdTest,
24
24
  ThresholdTestResult,
25
- VMDataset,
26
25
  )
27
26
 
28
27
 
@@ -86,17 +85,13 @@ class StopWords(ThresholdTest):
86
85
  ResultTable(
87
86
  data=df,
88
87
  metadata=ResultTableMetadata(
89
- title=f"Class Imbalance Results for Column {self.inputs.dataset.target_column}"
88
+ title=f"Stop words results for column '{self.inputs.dataset.target_column}'"
90
89
  ),
91
90
  )
92
91
  ]
93
92
  )
94
93
 
95
94
  def run(self):
96
- # Can only run this test if we have a Dataset object
97
- if not isinstance(self.inputs.dataset, VMDataset):
98
- raise ValueError("ClassImbalance requires a validmind Dataset object")
99
-
100
95
  text_column = self.inputs.dataset.text_column
101
96
 
102
97
  def create_corpus(df, text_column):
@@ -92,9 +92,12 @@ class TextDescription(Metric):
92
92
  total_words = len(words)
93
93
  total_sentences = len(sentences)
94
94
  avg_sentence_length = round(
95
- sum(len(sentence.split()) for sentence in sentences) / total_sentences
96
- if total_sentences
97
- else 0,
95
+ (
96
+ sum(len(sentence.split()) for sentence in sentences)
97
+ / total_sentences
98
+ if total_sentences
99
+ else 0
100
+ ),
98
101
  1,
99
102
  )
100
103
  total_paragraphs = len(paragraphs)
@@ -161,9 +164,13 @@ class TextDescription(Metric):
161
164
  return combined_df
162
165
 
163
166
  def run(self):
167
+ # Enforce that text_column must be provided as part of the params
168
+ if self.inputs.dataset.text_column is None:
169
+ raise ValueError("A 'text_column' must be provided to run this test.")
170
+
164
171
  # Can only run this test if we have a Dataset object
165
172
  if not isinstance(self.inputs.dataset, VMDataset):
166
- raise ValueError("TextDescretion requires a validmind Dataset object")
173
+ raise ValueError("TextDescription requires a validmind Dataset object")
167
174
 
168
175
  df_text_description = self.text_description_table(
169
176
  self.inputs.dataset.df, self.params
@@ -177,27 +184,31 @@ class TextDescription(Metric):
177
184
  ("Total Unique Words", "Lexical Diversity"),
178
185
  ]
179
186
  params = {"combinations_to_plot": combinations_to_plot}
180
- figures = self.text_description_scatter_plot(df_text_description, params)
187
+ figures = self.text_description_plots(df_text_description, params)
181
188
 
182
189
  return self.cache_results(
183
190
  figures=figures,
184
191
  )
185
192
 
186
193
  # Function to plot scatter plots for specified combinations using Plotly
187
- def text_description_scatter_plot(self, df, params):
194
+ def text_description_plots(self, df, params):
188
195
  combinations_to_plot = params["combinations_to_plot"]
189
196
  figures = []
190
197
  # Create hist plots for each column
191
198
  for i, column in enumerate(df.columns):
192
199
  fig = px.histogram(df, x=column)
193
200
  fig.update_layout(bargap=0.2)
194
- figures.append(Figure(for_object=self, key=self.key, figure=fig))
201
+ # Generate a unique key for each histogram using the column name and index
202
+ histogram_key = f"{self.name}_histogram_{column}_{i}"
203
+ figures.append(Figure(for_object=self, key=histogram_key, figure=fig))
195
204
 
196
- for metric1, metric2 in combinations_to_plot:
205
+ for j, (metric1, metric2) in enumerate(combinations_to_plot):
197
206
  fig = px.scatter(
198
207
  df, x=metric1, y=metric2, title=f"Scatter Plot: {metric1} vs {metric2}"
199
208
  )
200
- figures.append(Figure(for_object=self, key=self.key, figure=fig))
209
+ # Generate a unique key for each scatter plot using the metric names and index
210
+ scatter_key = f"{self.name}_scatter_{metric1}_vs_{metric2}_{j}"
211
+ figures.append(Figure(for_object=self, key=scatter_key, figure=fig))
201
212
  plt.close("all")
202
213
 
203
214
  return figures