validmind 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. validmind/__init__.py +6 -3
  2. validmind/__version__.py +1 -1
  3. validmind/ai.py +193 -0
  4. validmind/api_client.py +45 -31
  5. validmind/client.py +33 -6
  6. validmind/datasets/classification/customer_churn.py +2 -2
  7. validmind/datasets/credit_risk/__init__.py +11 -0
  8. validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
  9. validmind/datasets/credit_risk/lending_club.py +394 -0
  10. validmind/datasets/nlp/__init__.py +5 -0
  11. validmind/datasets/nlp/cnn_dailymail.py +98 -0
  12. validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +255 -0
  13. validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +1277 -0
  14. validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +4847 -0
  15. validmind/errors.py +11 -1
  16. validmind/logging.py +9 -2
  17. validmind/models/huggingface.py +2 -2
  18. validmind/models/pytorch.py +3 -3
  19. validmind/models/sklearn.py +4 -4
  20. validmind/template.py +2 -2
  21. validmind/test_suites/__init__.py +4 -2
  22. validmind/tests/__init__.py +130 -45
  23. validmind/tests/data_validation/DatasetDescription.py +0 -1
  24. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  25. validmind/tests/data_validation/PiTCreditScoresHistogram.py +1 -1
  26. validmind/tests/data_validation/ScatterPlot.py +8 -2
  27. validmind/tests/data_validation/nlp/StopWords.py +1 -6
  28. validmind/tests/data_validation/nlp/TextDescription.py +20 -9
  29. validmind/tests/decorator.py +313 -0
  30. validmind/tests/model_validation/BertScore.py +1 -1
  31. validmind/tests/model_validation/BertScoreAggregate.py +1 -1
  32. validmind/tests/model_validation/BleuScore.py +1 -1
  33. validmind/tests/model_validation/ClusterSizeDistribution.py +1 -1
  34. validmind/tests/model_validation/ContextualRecall.py +1 -1
  35. validmind/tests/model_validation/FeaturesAUC.py +110 -0
  36. validmind/tests/model_validation/MeteorScore.py +92 -0
  37. validmind/tests/model_validation/RegardHistogram.py +6 -7
  38. validmind/tests/model_validation/RegardScore.py +4 -6
  39. validmind/tests/model_validation/RegressionResidualsPlot.py +127 -0
  40. validmind/tests/model_validation/RougeMetrics.py +7 -5
  41. validmind/tests/model_validation/RougeMetricsAggregate.py +1 -1
  42. validmind/tests/model_validation/SelfCheckNLIScore.py +112 -0
  43. validmind/tests/model_validation/TokenDisparity.py +1 -1
  44. validmind/tests/model_validation/ToxicityHistogram.py +1 -1
  45. validmind/tests/model_validation/ToxicityScore.py +1 -1
  46. validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
  47. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +1 -3
  48. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +17 -22
  49. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +1 -1
  50. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +16 -17
  51. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +1 -1
  52. validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  53. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +21 -3
  54. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +1 -1
  55. validmind/tests/model_validation/sklearn/MinimumF1Score.py +1 -1
  56. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +1 -1
  57. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -4
  58. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +2 -2
  59. validmind/tests/model_validation/sklearn/ROCCurve.py +6 -12
  60. validmind/tests/model_validation/sklearn/RegressionErrors.py +2 -2
  61. validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +6 -4
  62. validmind/tests/model_validation/sklearn/RegressionR2Square.py +2 -2
  63. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +55 -5
  64. validmind/tests/model_validation/sklearn/SilhouettePlot.py +1 -1
  65. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +11 -5
  66. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +2 -2
  67. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +140 -0
  68. validmind/tests/model_validation/statsmodels/GINITable.py +22 -45
  69. validmind/tests/model_validation/statsmodels/{LogisticRegPredictionHistogram.py → PredictionProbabilitiesHistogram.py} +67 -92
  70. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -2
  71. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -2
  72. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
  73. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
  74. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
  75. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +2 -2
  76. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +128 -0
  77. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +70 -103
  78. validmind/tests/prompt_validation/ai_powered_test.py +2 -0
  79. validmind/tests/test_providers.py +14 -124
  80. validmind/unit_metrics/__init__.py +75 -70
  81. validmind/unit_metrics/classification/sklearn/Accuracy.py +14 -0
  82. validmind/unit_metrics/classification/sklearn/F1.py +13 -0
  83. validmind/unit_metrics/classification/sklearn/Precision.py +13 -0
  84. validmind/unit_metrics/classification/sklearn/ROC_AUC.py +13 -0
  85. validmind/unit_metrics/classification/sklearn/Recall.py +13 -0
  86. validmind/unit_metrics/composite.py +228 -0
  87. validmind/unit_metrics/regression/GiniCoefficient.py +33 -0
  88. validmind/unit_metrics/regression/HuberLoss.py +23 -0
  89. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +30 -0
  90. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +16 -0
  91. validmind/unit_metrics/regression/MeanBiasDeviation.py +13 -0
  92. validmind/unit_metrics/regression/QuantileLoss.py +15 -0
  93. validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +21 -0
  94. validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +13 -0
  95. validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +13 -0
  96. validmind/unit_metrics/regression/sklearn/RSquaredScore.py +13 -0
  97. validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +20 -0
  98. validmind/utils.py +20 -31
  99. validmind/vm_models/__init__.py +0 -2
  100. validmind/vm_models/dataset.py +623 -29
  101. validmind/vm_models/figure.py +52 -17
  102. validmind/vm_models/test/metric.py +33 -31
  103. validmind/vm_models/test/output_template.py +0 -27
  104. validmind/vm_models/test/result_wrapper.py +68 -36
  105. validmind/vm_models/test/test.py +4 -2
  106. validmind/vm_models/test/threshold_test.py +24 -14
  107. validmind/vm_models/test_context.py +7 -0
  108. validmind/vm_models/test_suite/runner.py +1 -1
  109. validmind/vm_models/test_suite/summary.py +3 -3
  110. validmind/vm_models/test_suite/test.py +1 -1
  111. validmind/vm_models/test_suite/test_suite.py +2 -1
  112. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/METADATA +18 -18
  113. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/RECORD +116 -94
  114. validmind-2.1.0.dist-info/entry_points.txt +3 -0
  115. validmind/tests/__types__.py +0 -62
  116. validmind/tests/model_validation/statsmodels/LogRegressionConfusionMatrix.py +0 -128
  117. validmind/tests/model_validation/statsmodels/LogisticRegCumulativeProb.py +0 -172
  118. validmind/tests/model_validation/statsmodels/ScorecardBucketHistogram.py +0 -181
  119. validmind/tests/model_validation/statsmodels/ScorecardProbabilitiesHistogram.py +0 -175
  120. validmind/unit_metrics/sklearn/classification/Accuracy.py +0 -20
  121. validmind/unit_metrics/sklearn/classification/F1.py +0 -22
  122. validmind/unit_metrics/sklearn/classification/Precision.py +0 -22
  123. validmind/unit_metrics/sklearn/classification/ROC_AUC.py +0 -20
  124. validmind/unit_metrics/sklearn/classification/Recall.py +0 -20
  125. validmind/vm_models/test/unit_metric.py +0 -88
  126. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/LICENSE +0 -0
  127. {validmind-2.0.1.dist-info → validmind-2.1.0.dist-info}/WHEEL +0 -0
validmind/__init__.py CHANGED
@@ -50,7 +50,7 @@ from .__version__ import __version__ # noqa: E402
50
50
  from .api_client import init
51
51
  from .api_client import log_figure as _log_figure_async
52
52
  from .api_client import log_metrics as _log_metrics_async
53
- from .api_client import log_test_results
53
+ from .api_client import log_test_results, reload
54
54
  from .client import ( # noqa: E402
55
55
  get_test_suite,
56
56
  init_dataset,
@@ -60,7 +60,7 @@ from .client import ( # noqa: E402
60
60
  run_documentation_tests,
61
61
  run_test_suite,
62
62
  )
63
- from .unit_metrics import run_metric
63
+ from .tests.decorator import metric, tags, tasks
64
64
  from .utils import run_async # noqa: E402
65
65
 
66
66
 
@@ -105,9 +105,13 @@ __all__ = [ # noqa
105
105
  "init_dataset",
106
106
  "init_model",
107
107
  "init_r_model",
108
+ "metric",
108
109
  "preview_template",
110
+ "reload",
109
111
  "run_documentation_tests",
110
112
  "run_test_suite",
113
+ "tags",
114
+ "tasks",
111
115
  "tests",
112
116
  "test_suites",
113
117
  "vm_models",
@@ -116,5 +120,4 @@ __all__ = [ # noqa
116
120
  "log_figure",
117
121
  "log_metrics",
118
122
  "log_test_results",
119
- "run_metric",
120
123
  ]
validmind/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.0.1"
1
+ __version__ = "2.1.0"
validmind/ai.py ADDED
@@ -0,0 +1,193 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import concurrent.futures
6
+ import os
7
+
8
+ from openai import AzureOpenAI, OpenAI
9
+
10
+ SYSTEM_PROMPT = """
11
+ You are an expert data scientist and MRM specialist tasked with providing concise and'
12
+ objective insights based on the results of quantitative model or dataset analysis.
13
+
14
+ Examine the provided statistical test results and compose a brief summary. Highlight crucial
15
+ insights, focusing on the distribution characteristics, central tendencies (such as mean or median),
16
+ and the variability (including standard deviation and range) of the metrics. Evaluate how
17
+ these statistics might influence the development and performance of a predictive model. Identify
18
+ and explain any discernible trends or anomalies in the test results.
19
+
20
+ Your analysis will act as the description of the result in the model documentation.
21
+
22
+ Avoid long sentences and complex vocabulary.
23
+ Structure the response clearly and logically.
24
+ Use valid Markdown syntax to format the response (tables are supported).
25
+ Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
26
+ Use the following format for the response (feel free to modify slightly if necessary):
27
+ ```
28
+ **<Test Name>** <continue to explain what it does in detail>...
29
+
30
+ The results of this test <detailed explanation of the results>...
31
+
32
+ In summary the following key insights can be gained:
33
+
34
+ - **<key insight 1 - title>**: <explanation of key insight 1>
35
+ - ...<continue with any other key insights using the same format>
36
+ ```
37
+ It is very important that the text is nicely formatted and contains enough information to be useful to the user as documentation.
38
+ """.strip()
39
+ USER_PROMPT = """
40
+ Test ID: {test_name}
41
+ Test Description: {test_description}
42
+ Test Results (the raw results of the test):
43
+ {test_results}
44
+ Test Summary (what the user sees in the documentation):
45
+ {test_summary}
46
+ """.strip()
47
+ USER_PROMPT_FIGURES = """
48
+ Test ID: {test_name}
49
+ Test Description: {test_description}
50
+ The attached plots show the results of the test.
51
+ """.strip()
52
+
53
+ __client = None
54
+ __model = None
55
+
56
+ __executor = concurrent.futures.ThreadPoolExecutor()
57
+
58
+
59
+ def __get_client_and_model():
60
+ """
61
+ Get the model to use for generating interpretations
62
+ """
63
+ global __client, __model
64
+
65
+ if __client and __model:
66
+ return __client, __model
67
+
68
+ if "OPENAI_API_KEY" in os.environ:
69
+ __client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
70
+ __model = os.environ.get("VM_OPENAI_MODEL", "gpt-4-turbo")
71
+
72
+ elif "AZURE_OPENAI_KEY" in os.environ:
73
+ if "AZURE_OPENAI_ENDPOINT" not in os.environ:
74
+ raise ValueError(
75
+ "AZURE_OPENAI_ENDPOINT must be set to run LLM tests with Azure"
76
+ )
77
+
78
+ if "AZURE_OPENAI_MODEL" not in os.environ:
79
+ raise ValueError(
80
+ "AZURE_OPENAI_MODEL must be set to run LLM tests with Azure"
81
+ )
82
+
83
+ __client = AzureOpenAI(
84
+ azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
85
+ api_key=os.environ.get("AZURE_OPENAI_KEY"),
86
+ api_version=os.environ.get("AZURE_OPENAI_VERSION", "2023-05-15"),
87
+ )
88
+ __model = os.environ.get("AZURE_OPENAI_MODEL")
89
+
90
+ else:
91
+ raise ValueError("OPENAI_API_KEY or AZURE_OPENAI_KEY must be set")
92
+
93
+ return __client, __model
94
+
95
+
96
+ class DescriptionFuture:
97
+ """This will be immediately returned from generate_description so that
98
+ the tests can continue to be run in parallel while the description is
99
+ retrieved asynchronously.
100
+
101
+ The value will be retrieved later and if its not ready yet, it should
102
+ block until it is.
103
+ """
104
+
105
+ def __init__(self, future):
106
+ self._future = future
107
+
108
+ def get_description(self):
109
+ # This will block until the future is completed
110
+ return self._future.result()
111
+
112
+
113
+ def generate_description_async(
114
+ test_name: str,
115
+ test_description: str,
116
+ test_results: str,
117
+ test_summary: str,
118
+ figures: list = None,
119
+ ):
120
+ """Generate the description for the test results"""
121
+ client, _ = __get_client_and_model()
122
+
123
+ # get last part of test id
124
+ test_name = test_name.split(".")[-1]
125
+
126
+ if not test_results and not test_summary:
127
+ if not figures:
128
+ raise ValueError("No results, summary or figures provided")
129
+
130
+ response = client.chat.completions.create(
131
+ model="gpt-4-turbo",
132
+ messages=[
133
+ {"role": "system", "content": SYSTEM_PROMPT},
134
+ {
135
+ "role": "user",
136
+ "content": [
137
+ {
138
+ "type": "text",
139
+ "text": USER_PROMPT_FIGURES.format(
140
+ test_name=test_name,
141
+ test_description=test_description,
142
+ ),
143
+ },
144
+ *[
145
+ {
146
+ "type": "image_url",
147
+ "image_url": {
148
+ "url": figure._get_b64_url(),
149
+ },
150
+ }
151
+ for figure in figures
152
+ ],
153
+ ],
154
+ },
155
+ ],
156
+ )
157
+ else:
158
+ response = client.chat.completions.create(
159
+ model="gpt-4-turbo",
160
+ messages=[
161
+ {"role": "system", "content": SYSTEM_PROMPT},
162
+ {
163
+ "role": "user",
164
+ "content": USER_PROMPT.format(
165
+ test_name=test_name,
166
+ test_description=test_description,
167
+ test_results=test_results,
168
+ test_summary=test_summary,
169
+ ),
170
+ },
171
+ ],
172
+ )
173
+
174
+ return response.choices[0].message.content.strip("```").strip()
175
+
176
+
177
+ def generate_description(
178
+ test_name: str,
179
+ test_description: str,
180
+ test_results: str,
181
+ test_summary: str,
182
+ figures: list = None,
183
+ ):
184
+ future = __executor.submit(
185
+ generate_description_async,
186
+ test_name,
187
+ test_description,
188
+ test_results,
189
+ test_summary,
190
+ figures,
191
+ )
192
+
193
+ return DescriptionFuture(future)
validmind/api_client.py CHANGED
@@ -16,6 +16,7 @@ from io import BytesIO
16
16
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
17
17
 
18
18
  import aiohttp
19
+ import mistune
19
20
  import requests
20
21
  from aiohttp import FormData
21
22
 
@@ -171,6 +172,18 @@ def __ping() -> Dict[str, Any]:
171
172
  )
172
173
 
173
174
 
175
+ def reload():
176
+ """Reconnect to the ValidMind API and reload the project configuration"""
177
+
178
+ try:
179
+ __ping()
180
+ except Exception as e:
181
+ # if the api host is https, assume we're not in dev mode and send to sentry
182
+ if _api_host.startswith("https://"):
183
+ send_single_error(e)
184
+ raise e
185
+
186
+
174
187
  async def __get_url(endpoint: str, params: Optional[Dict[str, str]] = None) -> str:
175
188
  if not _run_cuid:
176
189
  start_run()
@@ -282,45 +295,46 @@ async def log_figures(figures: List[Figure]) -> Dict[str, Any]:
282
295
  Returns:
283
296
  dict: The response from the API
284
297
  """
285
- if client_config.can_log_figures(): # check if the backend supports batch logging
286
- try:
287
- data = {}
288
- files = {}
289
- for figure in figures:
290
- data.update(
291
- {f"{k}-{figure.key}": v for k, v in figure.serialize().items()}
292
- )
293
- files.update(
294
- {
295
- f"{k}-{figure.key}": v
296
- for k, v in figure.serialize_files().items()
297
- }
298
- )
299
-
300
- return await _post(
301
- "log_figures",
302
- data=data,
303
- files=files,
304
- )
305
- except Exception as e:
306
- logger.error("Error logging figures to ValidMind API")
307
- raise e
308
-
309
- else:
310
- return await asyncio.gather(*[log_figure(figure) for figure in figures])
298
+ # this actually slows things down - better to log them in parallel
299
+ # if client_config.can_log_figures(): # check if the backend supports batch logging
300
+ # try:
301
+ # data = {}
302
+ # files = {}
303
+ # for figure in figures:
304
+ # data.update(
305
+ # {f"{k}-{figure.key}": v for k, v in figure.serialize().items()}
306
+ # )
307
+ # files.update(
308
+ # {
309
+ # f"{k}-{figure.key}": v
310
+ # for k, v in figure.serialize_files().items()
311
+ # }
312
+ # )
313
+
314
+ # return await _post(
315
+ # "log_figures",
316
+ # data=data,
317
+ # files=files,
318
+ # )
319
+ # except Exception as e:
320
+ # logger.error("Error logging figures to ValidMind API")
321
+ # raise e
322
+
323
+ # else:
324
+ return await asyncio.gather(*[log_figure(figure) for figure in figures])
311
325
 
312
326
 
313
327
  async def log_metadata(
314
328
  content_id: str,
315
329
  text: Optional[str] = None,
316
- extra_json: Optional[Dict[str, Any]] = None,
330
+ _json: Optional[Dict[str, Any]] = None,
317
331
  ) -> Dict[str, Any]:
318
332
  """Logs free-form metadata to ValidMind API.
319
333
 
320
334
  Args:
321
335
  content_id (str): Unique content identifier for the metadata
322
336
  text (str, optional): Free-form text to assign to the metadata. Defaults to None.
323
- extra_json (dict, optional): Free-form key-value pairs to assign to the metadata. Defaults to None.
337
+ _json (dict, optional): Free-form key-value pairs to assign to the metadata. Defaults to None.
324
338
 
325
339
  Raises:
326
340
  Exception: If the API call fails
@@ -330,9 +344,9 @@ async def log_metadata(
330
344
  """
331
345
  metadata_dict = {"content_id": content_id}
332
346
  if text is not None:
333
- metadata_dict["text"] = text
334
- if extra_json is not None:
335
- metadata_dict["extra_json"] = extra_json
347
+ metadata_dict["text"] = mistune.html(text)
348
+ if _json is not None:
349
+ metadata_dict["json"] = _json
336
350
 
337
351
  try:
338
352
  return await _post(
validmind/client.py CHANGED
@@ -7,6 +7,7 @@ Client interface for all data and model validation functions
7
7
  """
8
8
 
9
9
  import pandas as pd
10
+ import polars as pl
10
11
 
11
12
  from .api_client import _log_input as log_input
12
13
  from .client_config import client_config
@@ -26,7 +27,13 @@ from .template import preview_template as _preview_template
26
27
  from .test_suites import get_by_id as get_test_suite_by_id
27
28
  from .utils import get_dataset_info, get_model_info
28
29
  from .vm_models import TestInput, TestSuite, TestSuiteRunner
29
- from .vm_models.dataset import DataFrameDataset, NumpyDataset, TorchDataset, VMDataset
30
+ from .vm_models.dataset import (
31
+ DataFrameDataset,
32
+ NumpyDataset,
33
+ PolarsDataset,
34
+ TorchDataset,
35
+ VMDataset,
36
+ )
30
37
  from .vm_models.model import VMModel, get_model_class
31
38
 
32
39
  pd.option_context("format.precision", 2)
@@ -54,11 +61,16 @@ def init_dataset(
54
61
  """
55
62
  Initializes a VM Dataset, which can then be passed to other functions
56
63
  that can perform additional analysis and tests on the data. This function
57
- also ensures we are reading a valid dataset type. We only support Pandas
58
- DataFrames at the moment.
64
+ also ensures we are reading a valid dataset type.
65
+
66
+ The following dataset types are supported:
67
+ - Pandas DataFrame
68
+ - Polars DataFrame
69
+ - Numpy ndarray
70
+ - Torch TensorDataset
59
71
 
60
72
  Args:
61
- dataset (pd.DataFrame): We only support Pandas DataFrames at the moment
73
+ dataset : dataset from various python libraries
62
74
  model (VMModel): ValidMind model object
63
75
  options (dict): A dictionary of options for the dataset
64
76
  targets (vm.vm.DatasetTargets): A list of target variables
@@ -89,7 +101,7 @@ def init_dataset(
89
101
  input_id = input_id or "dataset"
90
102
 
91
103
  # Instantiate supported dataset types here
92
- if dataset_class == "DataFrame":
104
+ if isinstance(dataset, pd.DataFrame):
93
105
  logger.info("Pandas dataset detected. Initializing VM Dataset instance...")
94
106
  vm_dataset = DataFrameDataset(
95
107
  input_id=input_id,
@@ -102,6 +114,19 @@ def init_dataset(
102
114
  target_class_labels=class_labels,
103
115
  date_time_index=date_time_index,
104
116
  )
117
+ elif isinstance(dataset, pl.DataFrame):
118
+ logger.info("Polars dataset detected. Initializing VM Dataset instance...")
119
+ vm_dataset = PolarsDataset(
120
+ input_id=input_id,
121
+ raw_dataset=dataset,
122
+ model=model,
123
+ target_column=target_column,
124
+ feature_columns=feature_columns,
125
+ text_column=text_column,
126
+ extra_columns=extra_columns,
127
+ target_class_labels=class_labels,
128
+ date_time_index=date_time_index,
129
+ )
105
130
  elif dataset_class == "ndarray":
106
131
  logger.info("Numpy ndarray detected. Initializing VM Dataset instance...")
107
132
  vm_dataset = NumpyDataset(
@@ -360,7 +385,7 @@ def preview_template():
360
385
 
361
386
 
362
387
  def run_documentation_tests(
363
- section=None, send=True, fail_fast=False, inputs=None, **kwargs
388
+ section=None, send=True, fail_fast=False, inputs=None, config=None, **kwargs
364
389
  ):
365
390
  """Collect and run all the tests associated with a template
366
391
 
@@ -373,6 +398,7 @@ def run_documentation_tests(
373
398
  send (bool, optional): Whether to send the results to the ValidMind API. Defaults to True.
374
399
  fail_fast (bool, optional): Whether to stop running tests after the first failure. Defaults to False.
375
400
  inputs (dict, optional): A dictionary of test inputs to pass to the TestSuite
401
+ config: A dictionary of test parameters to override the defaults
376
402
  **kwargs: backwards compatibility for passing in test inputs using keyword arguments
377
403
 
378
404
  Returns:
@@ -401,6 +427,7 @@ def run_documentation_tests(
401
427
  send=send,
402
428
  fail_fast=fail_fast,
403
429
  inputs=inputs,
430
+ config=config,
404
431
  **kwargs,
405
432
  )
406
433
  test_suites[_section] = test_suite
@@ -60,7 +60,7 @@ def preprocess(df):
60
60
  return train_df, validation_df, test_df
61
61
 
62
62
 
63
- def get_demo_test_config():
63
+ def get_demo_test_config(test_suite=None):
64
64
  """
65
65
  Returns input configuration for the default documentation
66
66
  template assigned to this demo model
@@ -81,7 +81,7 @@ def get_demo_test_config():
81
81
  - The only exception is ClassifierPerformance since that runs twice: once
82
82
  with the train_dataset (in sample) and once with the test_dataset (out of sample)
83
83
  """
84
- default_config = vm.get_test_suite().get_default_config()
84
+ default_config = (test_suite or vm.get_test_suite()).get_default_config()
85
85
 
86
86
  for _, test_config in default_config.items():
87
87
  if "model" in test_config["inputs"]:
@@ -0,0 +1,11 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ """
6
+ Entrypoint for credit risk datasets.
7
+ """
8
+
9
+ __all__ = [
10
+ "lending_club",
11
+ ]