validmind 1.7.0__py3-none-any.whl → 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__init__.py +8 -1
- validmind/{client.pyx → client.py} +48 -41
- validmind/data_validation/{threshold_tests.pyx → threshold_tests.py} +1 -2
- validmind/datasets/__init__.py +0 -0
- validmind/datasets/classification/{customer_churn.pyx → customer_churn.py} +1 -1
- validmind/datasets/classification/datasets/bank_customer_churn.csv +8001 -0
- validmind/datasets/classification/datasets/taiwan_credit.csv +30001 -0
- validmind/datasets/classification/{taiwan_credit.pyx → taiwan_credit.py} +1 -1
- validmind/datasets/regression/__init__.py +55 -1
- validmind/datasets/regression/datasets/fred_loan_rates.csv +3552 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +126 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +126 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +126 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +126 -0
- validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +126 -0
- validmind/datasets/regression/datasets/lending_club_loan_rates.csv +138 -0
- validmind/datasets/regression/fred.py +132 -0
- validmind/datasets/regression/lending_club.py +70 -0
- validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
- validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
- validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
- validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
- validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
- validmind/model_validation/sklearn/{threshold_tests.pyx → threshold_tests.py} +9 -9
- validmind/model_validation/statsmodels/{metrics.pyx → metrics.py} +123 -138
- validmind/test_plans/__init__.py +0 -4
- validmind/test_plans/{binary_classifier.pyx → binary_classifier.py} +0 -15
- validmind/test_plans/{statsmodels_timeseries.pyx → statsmodels_timeseries.py} +2 -2
- validmind/test_plans/{tabular_datasets.pyx → tabular_datasets.py} +0 -13
- validmind/test_plans/{time_series.pyx → time_series.py} +3 -3
- validmind/test_suites/__init__.py +73 -0
- validmind/test_suites/test_suites.py +48 -0
- validmind/vm_models/__init__.py +2 -0
- validmind/vm_models/{dataset.pyx → dataset.py} +17 -8
- validmind/vm_models/test_suite.py +57 -0
- {validmind-1.7.0.dist-info → validmind-1.8.1.dist-info}/METADATA +1 -3
- validmind-1.8.1.dist-info/RECORD +63 -0
- validmind/api_client.c +0 -9481
- validmind/api_client.cpython-310-x86_64-linux-gnu.so +0 -0
- validmind/client.c +0 -7198
- validmind/client.cpython-310-x86_64-linux-gnu.so +0 -0
- validmind/datasets/regression/fred.pyx +0 -7
- validmind/datasets/regression/lending_club.pyx +0 -7
- validmind/model_utils.c +0 -9281
- validmind/model_utils.cpython-310-x86_64-linux-gnu.so +0 -0
- validmind/utils.c +0 -10284
- validmind/utils.cpython-310-x86_64-linux-gnu.so +0 -0
- validmind-1.7.0.dist-info/RECORD +0 -53
- /validmind/{api_client.pyx → api_client.py} +0 -0
- /validmind/data_validation/{metrics.pyx → metrics.py} +0 -0
- /validmind/{model_utils.pyx → model_utils.py} +0 -0
- /validmind/model_validation/{model_metadata.pyx → model_metadata.py} +0 -0
- /validmind/model_validation/sklearn/{metrics.pyx → metrics.py} +0 -0
- /validmind/model_validation/statsmodels/{threshold_tests.pyx → threshold_tests.py} +0 -0
- /validmind/model_validation/{utils.pyx → utils.py} +0 -0
- /validmind/{utils.pyx → utils.py} +0 -0
- /validmind/vm_models/{dataset_utils.pyx → dataset_utils.py} +0 -0
- /validmind/vm_models/{figure.pyx → figure.py} +0 -0
- /validmind/vm_models/{metric.pyx → metric.py} +0 -0
- /validmind/vm_models/{metric_result.pyx → metric_result.py} +0 -0
- /validmind/vm_models/{model.pyx → model.py} +0 -0
- /validmind/vm_models/{plot_utils.pyx → plot_utils.py} +0 -0
- /validmind/vm_models/{result_summary.pyx → result_summary.py} +0 -0
- /validmind/vm_models/{test_context.pyx → test_context.py} +0 -0
- /validmind/vm_models/{test_plan.pyx → test_plan.py} +0 -0
- /validmind/vm_models/{test_plan_result.pyx → test_plan_result.py} +0 -0
- /validmind/vm_models/{test_result.pyx → test_result.py} +0 -0
- /validmind/vm_models/{threshold_test.pyx → threshold_test.py} +0 -0
- {validmind-1.7.0.dist-info → validmind-1.8.1.dist-info}/WHEEL +0 -0
validmind/__init__.py
CHANGED
@@ -30,7 +30,13 @@ from .api_client import ( # noqa: E402
|
|
30
30
|
log_figure,
|
31
31
|
)
|
32
32
|
|
33
|
-
from .client import
|
33
|
+
from .client import ( # noqa: E402
|
34
|
+
init_dataset,
|
35
|
+
init_model,
|
36
|
+
init_r_model,
|
37
|
+
run_test_plan,
|
38
|
+
run_test_suite,
|
39
|
+
)
|
34
40
|
|
35
41
|
# TODO: need to fix this import * situation
|
36
42
|
from .data_validation import * # noqa
|
@@ -45,6 +51,7 @@ __all__ = [ # noqa
|
|
45
51
|
"init_r_model",
|
46
52
|
"test_plans",
|
47
53
|
"run_test_plan",
|
54
|
+
"run_test_suite",
|
48
55
|
# Framework Logging API
|
49
56
|
"log_dataset",
|
50
57
|
"log_figure",
|
@@ -8,7 +8,8 @@ import xgboost as xgb
|
|
8
8
|
from sklearn.linear_model import LinearRegression, LogisticRegression
|
9
9
|
|
10
10
|
# from .model_validation import evaluate_model as mod_evaluate_model
|
11
|
-
from .test_plans import get_by_name
|
11
|
+
from .test_plans import get_by_name as get_test_plan_by_name
|
12
|
+
from .test_suites import get_by_name as get_test_suite_by_name
|
12
13
|
from .vm_models import (
|
13
14
|
Dataset,
|
14
15
|
DatasetTargets,
|
@@ -16,6 +17,7 @@ from .vm_models import (
|
|
16
17
|
ModelAttributes,
|
17
18
|
R_MODEL_TYPES,
|
18
19
|
TestPlan,
|
20
|
+
TestSuite,
|
19
21
|
)
|
20
22
|
|
21
23
|
|
@@ -65,10 +67,10 @@ def init_dataset(
|
|
65
67
|
|
66
68
|
|
67
69
|
def init_model(
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
70
|
+
model: object,
|
71
|
+
train_ds: Dataset = None,
|
72
|
+
test_ds: Dataset = None,
|
73
|
+
validation_ds: Dataset = None,
|
72
74
|
) -> Model:
|
73
75
|
"""
|
74
76
|
Initializes a VM Model, which can then be passed to other functions
|
@@ -92,7 +94,9 @@ def init_model(
|
|
92
94
|
)
|
93
95
|
)
|
94
96
|
|
95
|
-
return Model.init_vm_model(
|
97
|
+
return Model.init_vm_model(
|
98
|
+
model, train_ds, test_ds, validation_ds, attributes=ModelAttributes()
|
99
|
+
)
|
96
100
|
|
97
101
|
|
98
102
|
def init_r_model(model_path: str, model_type: str) -> Model:
|
@@ -193,7 +197,7 @@ def run_test_plan(test_plan_name, send=True, **kwargs):
|
|
193
197
|
dict: A dictionary of test results
|
194
198
|
"""
|
195
199
|
try:
|
196
|
-
Plan: TestPlan =
|
200
|
+
Plan: TestPlan = get_test_plan_by_name(test_plan_name)
|
197
201
|
except ValueError as exc:
|
198
202
|
raise ValueError(
|
199
203
|
"Error retrieving test plan {}. {}".format(test_plan_name, str(exc))
|
@@ -211,37 +215,40 @@ def run_test_plan(test_plan_name, send=True, **kwargs):
|
|
211
215
|
return plan
|
212
216
|
|
213
217
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
218
|
+
def run_test_suite(test_suite_name, send=True, **kwargs):
|
219
|
+
"""High Level function for running a test suite
|
220
|
+
|
221
|
+
This function provides a high level interface for running a test suite. A test suite is
|
222
|
+
a collection of test plans. This function will automatically find the correct test suite
|
223
|
+
class based on the test_suite_name, initialize each of the test plans, and run them.
|
224
|
+
|
225
|
+
Args:
|
226
|
+
test_suite_name (str): The test suite name (e.g. 'binary_classifier_full_suite')
|
227
|
+
send (bool, optional): Whether to post the test results to the API. send=False is useful for testing. Defaults to True.
|
228
|
+
**kwargs: Additional keyword arguments to pass to the test suite. These will provide
|
229
|
+
the TestSuite instance with the necessary context to run the tests. e.g. dataset, model etc.
|
230
|
+
See the documentation for the specific test plan, metric or threshold test for more details.
|
231
|
+
|
232
|
+
Raises:
|
233
|
+
ValueError: If the test suite name is not found or if there is an error initializing the test suite
|
234
|
+
|
235
|
+
Returns:
|
236
|
+
TestSuite: the TestSuite instance
|
237
|
+
"""
|
238
|
+
try:
|
239
|
+
Suite: TestSuite = get_test_suite_by_name(test_suite_name)
|
240
|
+
except ValueError as exc:
|
241
|
+
raise ValueError(
|
242
|
+
"Error retrieving test suite {}. {}".format(test_suite_name, str(exc))
|
243
|
+
)
|
244
|
+
|
245
|
+
try:
|
246
|
+
suite = Suite(**kwargs)
|
247
|
+
except ValueError as exc:
|
248
|
+
raise ValueError(
|
249
|
+
"Error initializing test suite {}. {}".format(test_suite_name, str(exc))
|
250
|
+
)
|
251
|
+
|
252
|
+
suite.run(send=send)
|
253
|
+
|
254
|
+
return suite
|
@@ -389,7 +389,7 @@ class TimeSeriesOutliers(ThresholdTest):
|
|
389
389
|
raise ValueError("zscore_threshold must be provided in params")
|
390
390
|
zscore_threshold = self.params["zscore_threshold"]
|
391
391
|
|
392
|
-
temp_df = self.df.copy(
|
392
|
+
temp_df = self.df.copy()
|
393
393
|
temp_df = temp_df.dropna()
|
394
394
|
typeset = ProfilingTypeSet(Settings())
|
395
395
|
dataset_types = typeset.infer_type(temp_df)
|
@@ -462,7 +462,6 @@ class TimeSeriesOutliers(ThresholdTest):
|
|
462
462
|
Returns:
|
463
463
|
matplotlib.figure.Figure: A matplotlib figure object with subplots for each variable.
|
464
464
|
"""
|
465
|
-
print(outliers_table)
|
466
465
|
sns.set(style="darkgrid")
|
467
466
|
n_variables = len(df.columns)
|
468
467
|
fig, axes = plt.subplots(n_variables, 1, sharex=True)
|
File without changes
|
@@ -11,7 +11,7 @@ from . import (
|
|
11
11
|
)
|
12
12
|
|
13
13
|
current_path = os.path.dirname(os.path.abspath(__file__))
|
14
|
-
dataset_path = os.path.join(current_path, "
|
14
|
+
dataset_path = os.path.join(current_path, "datasets")
|
15
15
|
|
16
16
|
drop_columns = ["RowNumber", "CustomerId", "Surname", "CreditScore"]
|
17
17
|
boolean_columns = ["Gender"]
|