teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +196 -2
- teradataml/__init__.py +4 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +79 -4
- teradataml/analytics/json_parser/metadata.py +12 -3
- teradataml/analytics/json_parser/utils.py +7 -2
- teradataml/analytics/sqle/__init__.py +1 -0
- teradataml/analytics/table_operator/__init__.py +1 -1
- teradataml/analytics/uaf/__init__.py +1 -1
- teradataml/analytics/utils.py +4 -0
- teradataml/automl/data_preparation.py +3 -2
- teradataml/automl/feature_engineering.py +15 -7
- teradataml/automl/model_training.py +39 -33
- teradataml/common/__init__.py +2 -1
- teradataml/common/constants.py +35 -0
- teradataml/common/garbagecollector.py +2 -1
- teradataml/common/messagecodes.py +8 -2
- teradataml/common/messages.py +3 -1
- teradataml/common/sqlbundle.py +25 -3
- teradataml/common/utils.py +134 -9
- teradataml/context/context.py +20 -10
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/data/dataframe_example.json +18 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -2
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/dataframe/dataframe.py +543 -175
- teradataml/dataframe/functions.py +553 -25
- teradataml/dataframe/sql.py +184 -15
- teradataml/dbutils/dbutils.py +556 -18
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
- teradataml/opensource/_lightgbm.py +950 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
- teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
- teradataml/options/__init__.py +7 -23
- teradataml/options/configure.py +29 -3
- teradataml/scriptmgmt/UserEnv.py +3 -3
- teradataml/scriptmgmt/lls_utils.py +74 -21
- teradataml/store/__init__.py +13 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2223 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/store/vector_store/__init__.py +1586 -0
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +37 -38
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/validators.py +33 -1
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
|
@@ -19,7 +19,6 @@ from collections import OrderedDict, defaultdict
|
|
|
19
19
|
from importlib import import_module
|
|
20
20
|
|
|
21
21
|
import base64
|
|
22
|
-
import functools
|
|
23
22
|
import json
|
|
24
23
|
import numpy
|
|
25
24
|
import os
|
|
@@ -28,7 +27,7 @@ import time
|
|
|
28
27
|
import inspect
|
|
29
28
|
import warnings
|
|
30
29
|
import json
|
|
31
|
-
import
|
|
30
|
+
import math
|
|
32
31
|
import pandas as pd
|
|
33
32
|
from teradatasqlalchemy import BLOB, CLOB, FLOAT, TIMESTAMP, VARCHAR, INTEGER
|
|
34
33
|
import pandas.api.types as pt
|
|
@@ -41,9 +40,9 @@ from teradataml.context.context import _get_current_databasename, get_connection
|
|
|
41
40
|
from teradataml.dbutils.filemgr import install_file, remove_file
|
|
42
41
|
from teradataml.utils.utils import execute_sql
|
|
43
42
|
from teradataml.options.configure import configure
|
|
44
|
-
from teradataml.opensource.
|
|
43
|
+
from teradataml.opensource._wrapper_utils import _validate_fit_run, _generate_new_name,\
|
|
45
44
|
_validate_opensource_func_args, _derive_df_and_required_columns, _validate_df_query_type
|
|
46
|
-
from teradataml.opensource.
|
|
45
|
+
from teradataml.opensource.constants import OpenSourcePackage, _OSML_MODELS_PRIMARY_INDEX,\
|
|
47
46
|
_OSML_MODELS_TABLE_NAME, _OSML_MODELS_TABLE_COLUMNS_TYPE_DICT, OpensourceModels,\
|
|
48
47
|
_OSML_ADDITIONAL_COLUMN_TYPES
|
|
49
48
|
from teradataml.common.messagecodes import MessageCodes
|
|
@@ -53,7 +52,6 @@ from teradataml.dbutils.dbutils import _create_table, set_session_param
|
|
|
53
52
|
from teradataml.utils.validators import _Validators
|
|
54
53
|
from teradataml.dataframe.dataframe import DataFrame
|
|
55
54
|
from teradataml.dataframe.dataframe_utils import DataFrameUtils
|
|
56
|
-
from teradataml.scriptmgmt.lls_utils import create_env, get_env
|
|
57
55
|
from teradataml.common.garbagecollector import GarbageCollector
|
|
58
56
|
from teradataml.common.constants import TeradataConstants
|
|
59
57
|
|
|
@@ -70,6 +68,9 @@ _file_installed = False
|
|
|
70
68
|
|
|
71
69
|
class _GenericObjectWrapper:
|
|
72
70
|
def __init__(self) -> None:
|
|
71
|
+
if not get_connection():
|
|
72
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.INVALID_CONTEXT_CONNECTION),
|
|
73
|
+
MessageCodes.INVALID_CONTEXT_CONNECTION)
|
|
73
74
|
self._db_name = _get_current_databasename()
|
|
74
75
|
|
|
75
76
|
self._scripts_path = os.path.join(_TDML_DIRECTORY, "data", "scripts", "sklearn")
|
|
@@ -215,6 +216,7 @@ class _GenericObjectWrapper:
|
|
|
215
216
|
raise TeradataMlException(
|
|
216
217
|
f"Script file '{file_name}' failed to remove in Vantage."
|
|
217
218
|
)
|
|
219
|
+
|
|
218
220
|
def _get_data_col_types_and_partition_col_indices_and_types(self, data, partition_columns,
|
|
219
221
|
idx_delim=",",
|
|
220
222
|
types_delim="--"):
|
|
@@ -264,7 +266,7 @@ class _GenericObjectWrapper:
|
|
|
264
266
|
args_str += f" {strr}"
|
|
265
267
|
return args_str
|
|
266
268
|
|
|
267
|
-
def
|
|
269
|
+
def _extract_model_objs(self, n_unique_partitions=1, n_partition_cols=1):
|
|
268
270
|
"""
|
|
269
271
|
Internal function to extract sklearn object from the model(s) depending on the number of
|
|
270
272
|
partitions. When it is only one model, it is directly used as sklearn object (modelObj).
|
|
@@ -297,13 +299,256 @@ class _GenericObjectWrapper:
|
|
|
297
299
|
|
|
298
300
|
warnings.filterwarnings("default")
|
|
299
301
|
|
|
302
|
+
def _validate_existence_of_partition_columns(self, partition_columns, all_columns, arg_names_for_dfs):
|
|
303
|
+
"""
|
|
304
|
+
Validate if columns in "partition_columns" argument are present in any of the given
|
|
305
|
+
dataframes.
|
|
306
|
+
"""
|
|
307
|
+
invalid_part_cols = [c for c in partition_columns if c not in all_columns]
|
|
308
|
+
|
|
309
|
+
if invalid_part_cols:
|
|
310
|
+
raise ValueError(Messages.get_message(MessageCodes.INVALID_PARTITIONING_COLS,
|
|
311
|
+
", ".join(invalid_part_cols),
|
|
312
|
+
"', '".join(arg_names_for_dfs))
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
def _prepare_data_args_string(self, kwargs):
|
|
316
|
+
"""
|
|
317
|
+
Get column indices and types of each data related arguments in the format:
|
|
318
|
+
"{<arg_name>-<comma separated indices>-<comma separated types>}--
|
|
319
|
+
{<arg_name>-<comma separated indices>-<comma separated types>}"
|
|
320
|
+
"""
|
|
321
|
+
data_args_str = []
|
|
322
|
+
for arg_name in list(self._data_args.keys()):
|
|
323
|
+
# Remove DataFrame arguments from kwargs, which will be passed to Script.
|
|
324
|
+
kwargs.pop(arg_name)
|
|
325
|
+
|
|
326
|
+
# Get column indices and their types for each dataframe from parent dataframe.
|
|
327
|
+
_, partition_indices_str, partition_types_str, _ = \
|
|
328
|
+
self._get_data_col_types_and_partition_col_indices_and_types(self._tdml_df,
|
|
329
|
+
self._data_args[arg_name].columns,
|
|
330
|
+
idx_delim=",",
|
|
331
|
+
types_delim=",")
|
|
332
|
+
|
|
333
|
+
# Format "<arg_name>-<comma separated indices>-<comma separated types>"
|
|
334
|
+
data_args_str.append(f"{arg_name}-{partition_indices_str}-{partition_types_str}")
|
|
335
|
+
|
|
336
|
+
# Format "{<arg_name>-<comma separated indices>-<comma separated types>}--
|
|
337
|
+
# {<arg_name>-<comma separated indices>-<comma separated types>}"
|
|
338
|
+
return "--".join(data_args_str)
|
|
339
|
+
|
|
340
|
+
def _prepare_and_install_file(self, replace_dict):
|
|
341
|
+
"""
|
|
342
|
+
Prepare function script file from template file and install it in Vantage.
|
|
343
|
+
Takes the dictionary with keys as strings to be replaced in script and values as
|
|
344
|
+
strings which should be added in place of keys.
|
|
345
|
+
"""
|
|
346
|
+
|
|
347
|
+
with open(os.path.join(self._scripts_path, self._template_file)) as fp:
|
|
348
|
+
script_data = fp.read()
|
|
349
|
+
|
|
350
|
+
for old, new in replace_dict.items():
|
|
351
|
+
script_data = script_data.replace(old, new)
|
|
352
|
+
|
|
353
|
+
self._script_file_local = os.path.join(self._tdml_tmp_dir, self._script_file_name)
|
|
354
|
+
|
|
355
|
+
with open(self._script_file_local, "w") as fp:
|
|
356
|
+
fp.write(script_data)
|
|
357
|
+
|
|
358
|
+
self._install_script_file(file_identifier=self._script_file_name.split(".")[0],
|
|
359
|
+
file_name=self._script_file_name,
|
|
360
|
+
file_location=self._tdml_tmp_dir)
|
|
361
|
+
|
|
362
|
+
def _get_dataframe_related_args_and_their_columns(self, kwargs):
|
|
363
|
+
"""
|
|
364
|
+
Get dataframe related arguments and return all their column names from kwargs.
|
|
365
|
+
"""
|
|
366
|
+
__data_columns = []
|
|
367
|
+
__data_args_dict = OrderedDict()
|
|
368
|
+
|
|
369
|
+
# Separate dataframe related arguments and their column names from actual kwargs.
|
|
370
|
+
for k, v in kwargs.items():
|
|
371
|
+
if isinstance(v, DataFrame):
|
|
372
|
+
# All dataframes should be select of parent dataframe.
|
|
373
|
+
_validate_df_query_type(v, "select", k)
|
|
374
|
+
|
|
375
|
+
# Save all columns in dataframe related arguments.
|
|
376
|
+
__data_columns.extend(v.columns)
|
|
377
|
+
|
|
378
|
+
__data_args_dict[k] = v
|
|
379
|
+
|
|
380
|
+
return __data_args_dict, __data_columns
|
|
381
|
+
|
|
382
|
+
def _process_data_for_funcs_returning_objects(self, kwargs):
|
|
383
|
+
"""
|
|
384
|
+
Internal function to process all arguments and assign self._data_args, self._tdml_df
|
|
385
|
+
and return
|
|
386
|
+
1. dictionary of elements (needed to replace in the script template file)
|
|
387
|
+
2. partition columns list.
|
|
388
|
+
"""
|
|
389
|
+
partition_cols = self._get_columns_as_list(kwargs.get("partition_columns", None))
|
|
390
|
+
if partition_cols:
|
|
391
|
+
kwargs.pop("partition_columns")
|
|
392
|
+
|
|
393
|
+
self._data_args, __data_columns = self._get_dataframe_related_args_and_their_columns(kwargs)
|
|
394
|
+
|
|
395
|
+
arg_names_for_dfs = list(self._data_args.keys())
|
|
396
|
+
|
|
397
|
+
# Get common parent dataframe from all dataframes.
|
|
398
|
+
self._tdml_df = DataFrameUtils()._get_common_parent_df_from_dataframes(list(self._data_args.values()))
|
|
399
|
+
|
|
400
|
+
self._tdml_df = self._tdml_df.select(__data_columns + partition_cols)
|
|
401
|
+
|
|
402
|
+
self._validate_existence_of_partition_columns(partition_cols, self._tdml_df.columns, arg_names_for_dfs)
|
|
403
|
+
|
|
404
|
+
self._tdml_df, partition_cols = self._get_data_and_data_partition_columns(self._tdml_df,
|
|
405
|
+
__data_columns,
|
|
406
|
+
[],
|
|
407
|
+
partition_cols
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
# Prepare string of data arguments with name, indices where columns of that argument resides
|
|
411
|
+
# and types of each of the column.
|
|
412
|
+
data_args_str = self._prepare_data_args_string(kwargs)
|
|
413
|
+
|
|
414
|
+
# Get indices of partition_columns and types of all columns.
|
|
415
|
+
data_column_types_str, partition_indices_str, _, partition_cols = \
|
|
416
|
+
self._get_data_col_types_and_partition_col_indices_and_types(self._tdml_df,
|
|
417
|
+
partition_cols,
|
|
418
|
+
types_delim=None,
|
|
419
|
+
idx_delim=None)
|
|
420
|
+
|
|
421
|
+
replace_dict = {"<partition_cols_indices>": str(partition_indices_str),
|
|
422
|
+
"<types_of_data_cols>": str(data_column_types_str),
|
|
423
|
+
"<data_args_info_str>": f"'{data_args_str}'"}
|
|
424
|
+
|
|
425
|
+
return replace_dict, partition_cols
|
|
426
|
+
|
|
427
|
+
def _validate_equality_of_partition_values(self, fit_values, trans_values):
|
|
428
|
+
"""
|
|
429
|
+
Internal function to compare the partition values in fit() and predict() are same.
|
|
430
|
+
"""
|
|
431
|
+
if len(fit_values) != len(trans_values):
|
|
432
|
+
return False
|
|
433
|
+
|
|
434
|
+
for val in fit_values:
|
|
435
|
+
if not all([val in trans_values]):
|
|
436
|
+
return False
|
|
437
|
+
|
|
438
|
+
return True
|
|
439
|
+
|
|
440
|
+
def _get_non_data_related_args_from_kwargs(self, kwargs):
|
|
441
|
+
"""
|
|
442
|
+
Get all non-data related arguments from kwargs.
|
|
443
|
+
"""
|
|
444
|
+
non_data_related_args = {}
|
|
445
|
+
for k, v in kwargs.items():
|
|
446
|
+
if not isinstance(v, DataFrame):
|
|
447
|
+
non_data_related_args[k] = v
|
|
448
|
+
non_data_related_args.pop("partition_columns", None)
|
|
449
|
+
return non_data_related_args
|
|
450
|
+
|
|
451
|
+
def _read_from_template_and_write_dict_to_file(self, template_file, replace_dict,
|
|
452
|
+
output_script_file_name=None):
|
|
453
|
+
"""
|
|
454
|
+
Read template file, replace the keys with values and write to new file.
|
|
455
|
+
"""
|
|
456
|
+
with open(os.path.join(self._scripts_path, template_file)) as fp:
|
|
457
|
+
script_data = fp.read()
|
|
458
|
+
|
|
459
|
+
for old, new in replace_dict.items():
|
|
460
|
+
script_data = script_data.replace(old, new)
|
|
461
|
+
|
|
462
|
+
if output_script_file_name is None:
|
|
463
|
+
output_script_file_name = self._script_file_name
|
|
464
|
+
file_path = os.path.join(self._tdml_tmp_dir, output_script_file_name)
|
|
465
|
+
with open(file_path, "w") as fp:
|
|
466
|
+
fp.write(script_data)
|
|
467
|
+
|
|
468
|
+
def _generate_script_file_from_template_file(self, kwargs, template_file, func_name,
|
|
469
|
+
output_script_file_name=None):
|
|
470
|
+
"""
|
|
471
|
+
Internal function to generate script file from template file. It just adds the non-data
|
|
472
|
+
related arguments to the template file and writes the contents to new file, so that these
|
|
473
|
+
arguments are available in the script file for running this function "func_name".
|
|
474
|
+
"""
|
|
475
|
+
# Take out all non-data related arguments to write to template file.
|
|
476
|
+
non_data_related_args = self._get_non_data_related_args_from_kwargs(kwargs)
|
|
477
|
+
|
|
478
|
+
# Read template file and write the contents to new file with non-data related arguments.
|
|
479
|
+
template_f = os.path.join(self._scripts_path, template_file)
|
|
480
|
+
with open(template_f, "r") as f:
|
|
481
|
+
template = f.read()
|
|
482
|
+
|
|
483
|
+
if output_script_file_name is None:
|
|
484
|
+
output_script_file_name = self._script_file_name
|
|
485
|
+
file_path = os.path.join(self._tdml_tmp_dir, output_script_file_name)
|
|
486
|
+
with open(file_path, "w") as f:
|
|
487
|
+
f.write("import json\n")
|
|
488
|
+
f.write(f"params = json.loads('{json.dumps(non_data_related_args)}')\n")
|
|
489
|
+
f.write(template)
|
|
490
|
+
|
|
491
|
+
kwargs["file_name"] = output_script_file_name
|
|
492
|
+
kwargs["name"] = func_name
|
|
493
|
+
|
|
494
|
+
def _remove_data_related_args_from_kwargs(self, kwargs):
|
|
495
|
+
"""
|
|
496
|
+
Internal function to remove data related arguments from kwargs.
|
|
497
|
+
"""
|
|
498
|
+
kwargs.pop("data", None)
|
|
499
|
+
kwargs.pop("feature_columns", None)
|
|
500
|
+
kwargs.pop("group_columns", None)
|
|
501
|
+
kwargs.pop("partition_columns", None)
|
|
502
|
+
kwargs.pop("label_columns", None)
|
|
503
|
+
|
|
504
|
+
def _convert_pos_args_to_kwargs_for_function(self, pos_args, kwargs, func_name):
|
|
505
|
+
"""
|
|
506
|
+
Internal function to convert positional arguments to keyword arguments.
|
|
507
|
+
"""
|
|
508
|
+
fn = getattr(getattr(import_module(self.module_name), self.class_name), func_name)
|
|
509
|
+
kwargs.update(zip(fn.__code__.co_varnames[1:], pos_args))
|
|
510
|
+
|
|
511
|
+
def _install_model_and_script_files(self, file_name, file_location):
|
|
512
|
+
"""
|
|
513
|
+
Internal function to install model and script files to Vantage.
|
|
514
|
+
"""
|
|
515
|
+
self._install_initial_model_file()
|
|
516
|
+
self._install_script_file(file_identifier=file_name.split(".")[0],
|
|
517
|
+
file_name=file_name,
|
|
518
|
+
is_binary=False,
|
|
519
|
+
file_location=file_location)
|
|
520
|
+
|
|
521
|
+
def _assign_fit_variables_after_execution(self, data, partition_columns, label_columns):
|
|
522
|
+
"""
|
|
523
|
+
Internal function to assign fit related variables.
|
|
524
|
+
"""
|
|
525
|
+
# Extract sklearn object(s) from the depending on the number of unique partitioning values.
|
|
526
|
+
self._extract_model_objs(n_unique_partitions=len(self._fit_partition_unique_values),
|
|
527
|
+
n_partition_cols=len(partition_columns))
|
|
528
|
+
|
|
529
|
+
# Need this label columns types in prediction.
|
|
530
|
+
self._fit_label_columns_types = []
|
|
531
|
+
self._fit_label_columns_python_types = []
|
|
532
|
+
|
|
533
|
+
for l_c in label_columns:
|
|
534
|
+
column_data = data._td_column_names_and_sqlalchemy_types[l_c.lower()]
|
|
535
|
+
self._fit_label_columns_types.append(column_data)
|
|
536
|
+
self._fit_label_columns_python_types.append(column_data.python_type.__name__)
|
|
537
|
+
|
|
538
|
+
# If the model is trained a second time after the object creation,
|
|
539
|
+
# or if set_params() is called after the first model training,
|
|
540
|
+
# this flag will reset to False. So that for subsequent predict/score
|
|
541
|
+
# operations, the newly trained model will be installed.
|
|
542
|
+
if self._is_trained_model_installed:
|
|
543
|
+
self._is_trained_model_installed = False
|
|
544
|
+
|
|
300
545
|
|
|
301
546
|
class _OpenSourceObjectWrapper(_GenericObjectWrapper):
|
|
302
547
|
# This has to be set for every package which subclasses this class.
|
|
303
548
|
OPENSOURCE_PACKAGE_NAME = None
|
|
304
549
|
|
|
305
550
|
def __init__(self, model=None, module_name=None, class_name=None, pos_args=None, kwargs=None):
|
|
306
|
-
if
|
|
551
|
+
if model is None and not module_name and not class_name:
|
|
307
552
|
raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, "model",
|
|
308
553
|
"module_name and class_name"),
|
|
309
554
|
MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
|
|
@@ -319,24 +564,224 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
|
|
|
319
564
|
self.pos_args = pos_args if pos_args is not None else tuple()
|
|
320
565
|
|
|
321
566
|
self._fit_label_columns_types = None
|
|
567
|
+
self._fit_label_columns_python_types = None
|
|
322
568
|
self._table_name_prefix = None
|
|
323
569
|
|
|
324
570
|
self._is_default_partition_value_fit = True # False when the user provides partition columns.
|
|
325
571
|
self._fit_partition_colums_non_default = None
|
|
326
572
|
self._is_default_partition_value_predict = True # False when the user provides partition columns.
|
|
327
573
|
|
|
328
|
-
def
|
|
574
|
+
def __repr__(self):
|
|
575
|
+
if self._is_default_partition_value_fit:
|
|
576
|
+
# Single model use case.
|
|
577
|
+
return self.modelObj.__repr__()
|
|
578
|
+
|
|
579
|
+
pd.set_option("display.expand_frame_repr", None)
|
|
580
|
+
pd.set_option("display.max_colwidth", None)
|
|
581
|
+
opt = self.modelObj.__repr__()
|
|
582
|
+
pd.reset_option("display.expand_frame_repr")
|
|
583
|
+
pd.reset_option("display.max_colwidth")
|
|
584
|
+
return opt
|
|
585
|
+
|
|
586
|
+
def _initialize_object(self):
|
|
329
587
|
"""
|
|
330
|
-
Internal function to
|
|
588
|
+
Internal function to initialize sklearn object from module name and class name.
|
|
331
589
|
"""
|
|
332
|
-
|
|
333
|
-
|
|
590
|
+
# Needed when writing imported modules to generated file. TODO: Remove later.
|
|
591
|
+
imported_args = {}
|
|
592
|
+
# If there are any objects of class `_SkLearnObjectWrapper`, it is modified to
|
|
593
|
+
# corresponding sklearn object.
|
|
594
|
+
_partition_column_names = None
|
|
595
|
+
if "partition_columns" in self.kwargs:
|
|
596
|
+
self._fit_partition_colums_non_default = self.kwargs["partition_columns"]
|
|
597
|
+
self._is_default_partition_value_fit = False
|
|
598
|
+
_partition_column_names = self._fit_partition_colums_non_default
|
|
334
599
|
|
|
335
|
-
for val in fit_values:
|
|
336
|
-
if not all([val in trans_values]):
|
|
337
|
-
return False
|
|
338
600
|
|
|
339
|
-
|
|
601
|
+
new_sklearn_pos_args = self.modify_args(None, self.pos_args, imported_args)
|
|
602
|
+
new_sklearn_kwargs = self.modify_args(None, self.kwargs, imported_args)
|
|
603
|
+
|
|
604
|
+
# Create model object from new positional and keyword arguments.
|
|
605
|
+
class_obj = getattr(import_module(self.module_name), self.class_name)
|
|
606
|
+
if new_sklearn_pos_args:
|
|
607
|
+
self.modelObj = class_obj(*new_sklearn_pos_args, **new_sklearn_kwargs)
|
|
608
|
+
else:
|
|
609
|
+
self.modelObj = class_obj(**new_sklearn_kwargs)
|
|
610
|
+
|
|
611
|
+
# All arguments are moved to kwargs and kept pos_args empty.
|
|
612
|
+
# Might help in set_params() bug fix.
|
|
613
|
+
self.pos_args = tuple()
|
|
614
|
+
_arguments = self.modelObj.__dict__
|
|
615
|
+
|
|
616
|
+
if hasattr(self.modelObj, "get_params"):
|
|
617
|
+
# Update kwargs that are both in modelObj and get_params() as there are
|
|
618
|
+
# some classes which return other internals variables also.
|
|
619
|
+
# Hence, filtering them using get_params().
|
|
620
|
+
for k, v in _arguments.items():
|
|
621
|
+
if type(v).__name__ in ["function", "generator"]:
|
|
622
|
+
# TODO: ELE-6351: Skipping adding functions and generators to kwargs as these
|
|
623
|
+
# are not supported yet due to pickling issue.
|
|
624
|
+
continue
|
|
625
|
+
if self.get_params():
|
|
626
|
+
if k in self.get_params():
|
|
627
|
+
self.kwargs[k] = v
|
|
628
|
+
else:
|
|
629
|
+
_model_init_arguments = None
|
|
630
|
+
try:
|
|
631
|
+
_model_init_arguments = self.modelObj.__init__.__code__.co_varnames
|
|
632
|
+
except AttributeError:
|
|
633
|
+
pass
|
|
634
|
+
if _model_init_arguments:
|
|
635
|
+
self.kwargs = dict((k, v) for k, v in _arguments.items() if k in _model_init_arguments)
|
|
636
|
+
else:
|
|
637
|
+
self.kwargs = _arguments
|
|
638
|
+
else:
|
|
639
|
+
# Model selection classes will not have `get_params`, in which case modelObj's __dict__
|
|
640
|
+
# is saved as kwargs.
|
|
641
|
+
self.kwargs = _arguments
|
|
642
|
+
|
|
643
|
+
if _partition_column_names:
|
|
644
|
+
self.kwargs["partition_columns"] = _partition_column_names
|
|
645
|
+
|
|
646
|
+
def _initialize_variables(self, table_name_prefix):
|
|
647
|
+
"""
|
|
648
|
+
Internal function to initialize variables used in this class.
|
|
649
|
+
"""
|
|
650
|
+
self.feature_names_in_ = None
|
|
651
|
+
self._table_name_prefix = table_name_prefix
|
|
652
|
+
self._model_file_name_prefix = _generate_new_name(type="file")
|
|
653
|
+
self.model_file_paths_local = set()
|
|
654
|
+
|
|
655
|
+
self._fit_execution_time = None
|
|
656
|
+
self._fit_predict_execution_time = None
|
|
657
|
+
self._partial_fit_execution_time = None
|
|
658
|
+
self._predict_execution_time = None
|
|
659
|
+
self._transform_execution_time = None
|
|
660
|
+
self._score_execution_time = None
|
|
661
|
+
|
|
662
|
+
# Set to partition columns when training is done with partition columns.
|
|
663
|
+
self._fit_partition_colums_non_default = None
|
|
664
|
+
|
|
665
|
+
self._is_model_installed = False
|
|
666
|
+
self._fit_partition_unique_values = [[self._default_data_partition_value]]
|
|
667
|
+
|
|
668
|
+
def _get_returning_df(self, script_df, partition_column, returns):
|
|
669
|
+
"""
|
|
670
|
+
Internal function to return the teradataml Dataframe except
|
|
671
|
+
partition_column.
|
|
672
|
+
"""
|
|
673
|
+
if self._is_default_partition_value_fit:
|
|
674
|
+
# For single model case, partition column is internally generated
|
|
675
|
+
# and no point in returning it to the user.
|
|
676
|
+
|
|
677
|
+
# Extract columns from return types.
|
|
678
|
+
returning_cols = [col[0] for col in returns[len(partition_column):]]
|
|
679
|
+
return script_df.select(returning_cols)
|
|
680
|
+
return script_df
|
|
681
|
+
|
|
682
|
+
def modify_args(self, fp1, arg, imported_args):
|
|
683
|
+
"""
|
|
684
|
+
Internal function to recursively (if "arg" is list/tuple/dict) check if any sklearn object
|
|
685
|
+
of opensourceML is present in the argument "arg" and modify it to corresponding sklearn
|
|
686
|
+
object.
|
|
687
|
+
This function can also be used to write import statements to file (if "fp1" is not
|
|
688
|
+
None). Update "imported_args" dictionary with imported module and class name to avoid
|
|
689
|
+
importing same module and class again when writing to file. This is useful when we want to
|
|
690
|
+
generate script from template file.
|
|
691
|
+
Pass None to "fp1" if we don't want to write to file and just modify opensourceML sklearn
|
|
692
|
+
object to corresponding sklearn object.
|
|
693
|
+
"""
|
|
694
|
+
if isinstance(arg, type(self)):
|
|
695
|
+
imported_tuple = (arg.module_name, arg.class_name)
|
|
696
|
+
already_imported = imported_args.get(imported_tuple, False)
|
|
697
|
+
if not already_imported:
|
|
698
|
+
imported_args[imported_tuple] = True
|
|
699
|
+
if fp1:
|
|
700
|
+
fp1.write(f"from {arg.module_name} import {arg.class_name}\n")
|
|
701
|
+
self.modify_args(fp1, arg.pos_args, imported_args)
|
|
702
|
+
self.modify_args(fp1, arg.kwargs, imported_args)
|
|
703
|
+
return arg.modelObj
|
|
704
|
+
elif isinstance(arg, list):
|
|
705
|
+
return [self.modify_args(fp1, val, imported_args) for val in arg]
|
|
706
|
+
elif isinstance(arg, tuple):
|
|
707
|
+
return tuple([self.modify_args(fp1, val, imported_args) for val in arg])
|
|
708
|
+
elif type(arg).__name__ == "generator":
|
|
709
|
+
# Raising exception as generator object can't be pickled.
|
|
710
|
+
# TODO: ELE-6351 - Find ways to pickle generator object later.
|
|
711
|
+
raise ValueError("Generator type/iterator is not supported for any argument. "\
|
|
712
|
+
"Support will be added later.")
|
|
713
|
+
elif type(arg).__name__ == "function":
|
|
714
|
+
# Raising exception as functions/lambda functions can't be pickled.
|
|
715
|
+
# TODO: ELE-6351 - Find ways to pickle functions later.
|
|
716
|
+
raise ValueError("Functions are not supported for any argument. "\
|
|
717
|
+
"Support will be added later.")
|
|
718
|
+
elif isinstance(arg, dict):
|
|
719
|
+
return dict(
|
|
720
|
+
(
|
|
721
|
+
self.modify_args(fp1, k, imported_args),
|
|
722
|
+
self.modify_args(fp1, v, imported_args),
|
|
723
|
+
)
|
|
724
|
+
for k, v in arg.items() if k != "partition_columns"
|
|
725
|
+
)
|
|
726
|
+
# elif arg == "partition_columns":
|
|
727
|
+
|
|
728
|
+
else:
|
|
729
|
+
return arg
|
|
730
|
+
|
|
731
|
+
def _install_initial_model_file(self, use_dummy_initial_file=False):
|
|
732
|
+
"""
|
|
733
|
+
If model file(s) is/are not installed in Vantage, then install it/them.
|
|
734
|
+
"""
|
|
735
|
+
if isinstance(self.modelObj, pd.DataFrame):
|
|
736
|
+
# Get list of unique partition values and corresponding model object as dict.
|
|
737
|
+
partition_values_model_dict = {}
|
|
738
|
+
obj_list = self.modelObj.values.tolist()
|
|
739
|
+
for lst in obj_list:
|
|
740
|
+
partition_values_model_dict[tuple(lst[:len(self._fit_partition_colums_non_default)])] = \
|
|
741
|
+
lst[len(self._fit_partition_colums_non_default)]
|
|
742
|
+
|
|
743
|
+
for partition in self._fit_partition_unique_values:
|
|
744
|
+
# Create a new file with file name with partition values and
|
|
745
|
+
# dump sklearn object into it. Finally install the file to Vantage.
|
|
746
|
+
partition_join = "_".join([str(x) for x in partition])
|
|
747
|
+
file_name = f"{self._model_file_name_prefix}_{partition_join}"
|
|
748
|
+
# Replace '-' with '_' as '-' can't be present in file identifier.
|
|
749
|
+
# Needed this replace because partition_columns can be negative.
|
|
750
|
+
file_name = file_name.replace("-", "_")
|
|
751
|
+
full_file_name = os.path.join(self._tdml_tmp_dir, file_name)
|
|
752
|
+
with open(full_file_name, "wb+") as fp:
|
|
753
|
+
# Write sklearn object to file.
|
|
754
|
+
if isinstance(self.modelObj, pd.DataFrame):
|
|
755
|
+
# If multiple models, then write the model corresponding to the partition value.
|
|
756
|
+
fp.write(pickle.dumps(partition_values_model_dict[tuple(partition)]))
|
|
757
|
+
else:
|
|
758
|
+
if use_dummy_initial_file:
|
|
759
|
+
fp.write(pickle.dumps("abc"))
|
|
760
|
+
else:
|
|
761
|
+
fp.write(pickle.dumps(self.modelObj))
|
|
762
|
+
self.model_file_paths_local.add(file_name)
|
|
763
|
+
|
|
764
|
+
self._install_script_file(file_identifier=file_name,
|
|
765
|
+
file_name=file_name,
|
|
766
|
+
is_binary=True,
|
|
767
|
+
file_location=self._tdml_tmp_dir)
|
|
768
|
+
|
|
769
|
+
if self._is_lake_system:
|
|
770
|
+
# Need to pass env_name along with file_name for cleaning up the files in env.
|
|
771
|
+
obj = f"{self._env.env_name}::{file_name}"
|
|
772
|
+
if installed_model_files[obj] == 0:
|
|
773
|
+
# Add to GC for the first time the model file (along with env name) is encountered.
|
|
774
|
+
installed_model_files[obj] = 1
|
|
775
|
+
GarbageCollector._add_to_garbagecollector(object_name=obj,
|
|
776
|
+
object_type=TeradataConstants.TERADATA_APPLY)
|
|
777
|
+
else:
|
|
778
|
+
if installed_model_files[file_name] == 0:
|
|
779
|
+
# Add to GC for the first time the model file is encountered.
|
|
780
|
+
installed_model_files[file_name] = 1
|
|
781
|
+
GarbageCollector._add_to_garbagecollector(object_name=file_name,
|
|
782
|
+
object_type=TeradataConstants.TERADATA_SCRIPT)
|
|
783
|
+
|
|
784
|
+
self._is_model_installed = True
|
|
340
785
|
|
|
341
786
|
def _validate_unique_partition_values(self, data, partition_columns):
|
|
342
787
|
"""
|
|
@@ -361,25 +806,61 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
|
|
|
361
806
|
|
|
362
807
|
if not self._validate_equality_of_partition_values(fit_unique_values, trans_unique_values):
|
|
363
808
|
raise TeradataMlException(
|
|
364
|
-
Messages.get_message(MessageCodes.PARTITION_VALUES_NOT_MATCHING),
|
|
809
|
+
Messages.get_message(MessageCodes.PARTITION_VALUES_NOT_MATCHING, "training", "test"),
|
|
365
810
|
MessageCodes.PARTITION_VALUES_NOT_MATCHING
|
|
366
811
|
)
|
|
367
812
|
|
|
368
813
|
def fit(self, **kwargs):
|
|
369
814
|
pass
|
|
370
815
|
|
|
816
|
+
def _convert_arguments_to_modelObj(self, args, idx_multi_model=None):
|
|
817
|
+
"""
|
|
818
|
+
Internal function to convert all OpensourceML related objects in arguments to
|
|
819
|
+
underlying model objects.
|
|
820
|
+
"""
|
|
821
|
+
if isinstance(args, dict):
|
|
822
|
+
new_args = args.copy() # To avoid updating
|
|
823
|
+
for k, v in new_args.items():
|
|
824
|
+
if isinstance(v, type(self)):
|
|
825
|
+
if idx_multi_model is not None:
|
|
826
|
+
# single model. This argument is set only when modelObj is single model.
|
|
827
|
+
new_args[k] = v.modelObj
|
|
828
|
+
else:
|
|
829
|
+
# multi-model. Get appropriate model from modelObj.
|
|
830
|
+
new_args[k] = v.modelObj.iloc[idx_multi_model]["model"]
|
|
831
|
+
else:
|
|
832
|
+
new_args[k] = v
|
|
833
|
+
return new_args
|
|
834
|
+
|
|
835
|
+
# If args is tuple, convert all elements to underlying model object.
|
|
836
|
+
elif isinstance(args, tuple):
|
|
837
|
+
new_args = tuple()
|
|
838
|
+
for arg in args:
|
|
839
|
+
if isinstance(arg, type(self)):
|
|
840
|
+
if idx_multi_model is None:
|
|
841
|
+
# single model. This argument is set only when modelObj is single model.
|
|
842
|
+
new_args += (arg.modelObj,)
|
|
843
|
+
else:
|
|
844
|
+
# multi-model. Get appropriate model from modelObj.
|
|
845
|
+
new_args += (arg.modelObj.iloc[idx_multi_model]["model"],)
|
|
846
|
+
else:
|
|
847
|
+
new_args += (arg,)
|
|
848
|
+
return new_args
|
|
849
|
+
return args
|
|
850
|
+
|
|
371
851
|
def __get_obj_attributes_multi_model(self, name):
|
|
372
852
|
"""
|
|
373
853
|
Internal function to get attributes of all sklearn model objects when multiple models are
|
|
374
854
|
generated by fit.
|
|
375
855
|
"""
|
|
376
856
|
|
|
377
|
-
def __generate_model_object(model_obj_value):
|
|
857
|
+
def __generate_model_object(model_obj_value, init_model_obj):
|
|
378
858
|
"""
|
|
379
859
|
Internal function to generate _SkLearnWrapperObject model object from model_obj_value.
|
|
380
860
|
"""
|
|
381
861
|
# Create _SkLearnObjectWrapper object from opensource model object.
|
|
382
|
-
model_obj = self.__class__(model=
|
|
862
|
+
model_obj = self.__class__(model=init_model_obj)
|
|
863
|
+
|
|
383
864
|
model_obj.modelObj = model_obj_value
|
|
384
865
|
model_obj._is_model_installed = True
|
|
385
866
|
|
|
@@ -396,13 +877,34 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
|
|
|
396
877
|
multi_models = self.modelObj.copy()
|
|
397
878
|
for i in range(multi_models.shape[0]):
|
|
398
879
|
curr_model = multi_models.iloc[i]["model"]
|
|
399
|
-
multi_models.
|
|
880
|
+
partition_values = multi_models.iloc[i][0:len(self._fit_partition_colums_non_default)].to_list()
|
|
881
|
+
partition_values = "_".join([str(x) for x in partition_values])
|
|
882
|
+
if self.module_name == "lightgbm.basic" and self.class_name == "Booster" and name == "save_model":
|
|
883
|
+
# filename is first argument.
|
|
884
|
+
kwargs1 = kwargs.copy()
|
|
885
|
+
c1 = c
|
|
886
|
+
|
|
887
|
+
if len(c) > 0:
|
|
888
|
+
c1 = list(c1)
|
|
889
|
+
c1[0] = f"{c1[0]}_{partition_values}"
|
|
890
|
+
c1 = tuple(c1)
|
|
891
|
+
if len(kwargs) > 0 and kwargs.get("filename", None):
|
|
892
|
+
kwargs1["filename"] = f"{kwargs1['filename']}_{partition_values}"
|
|
893
|
+
|
|
894
|
+
multi_models.at[i, "model"] = getattr(curr_model, name)(*self._convert_arguments_to_modelObj(c1, i),
|
|
895
|
+
**self._convert_arguments_to_modelObj(kwargs1, i))
|
|
896
|
+
else:
|
|
897
|
+
multi_models.at[i, "model"] = getattr(curr_model, name)(*self._convert_arguments_to_modelObj(c, i),
|
|
898
|
+
**self._convert_arguments_to_modelObj(kwargs, i))
|
|
400
899
|
|
|
401
|
-
|
|
402
|
-
if self.__class__._validate_model_supportability(
|
|
403
|
-
return __generate_model_object(multi_models)
|
|
900
|
+
first_function_value = multi_models.at[0, "model"]
|
|
901
|
+
if self.__class__._validate_model_supportability(first_function_value):
|
|
902
|
+
return __generate_model_object(multi_models, init_model_obj=first_function_value)
|
|
404
903
|
|
|
405
|
-
|
|
904
|
+
multi_models = multi_models.rename(columns={"model": name})
|
|
905
|
+
|
|
906
|
+
# Select only partition columns and the attribute column.
|
|
907
|
+
return multi_models[self._fit_partition_colums_non_default + [name]]
|
|
406
908
|
|
|
407
909
|
# Assuming that self.modelObj will have at least 1 row.
|
|
408
910
|
|
|
@@ -420,15 +922,15 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
|
|
|
420
922
|
output_attributes.at[i, "model"] = getattr(model, name)
|
|
421
923
|
|
|
422
924
|
if self.__class__._validate_model_supportability(first_atrribute_instance):
|
|
423
|
-
return __generate_model_object(output_attributes)
|
|
925
|
+
return __generate_model_object(output_attributes, init_model_obj=first_atrribute_instance)
|
|
424
926
|
|
|
425
927
|
return output_attributes.rename(columns={"model": name})
|
|
426
928
|
|
|
427
929
|
def __getattr__(self, name):
|
|
428
|
-
# This just run attributes (functions and properties) from sklearn
|
|
930
|
+
# This just run attributes (functions and properties) from opensource (sklearn/lightgbm) objects.
|
|
429
931
|
def __sklearn_method_invoker(*c, **kwargs):
|
|
430
|
-
#
|
|
431
|
-
model_obj = attribute_instance(*c, **kwargs)
|
|
932
|
+
# Opensource model is returned from the function call. Create _OpensourceObjectWrapper object.
|
|
933
|
+
model_obj = attribute_instance(*self._convert_arguments_to_modelObj(c), **self._convert_arguments_to_modelObj(kwargs))
|
|
432
934
|
if self.__class__._validate_model_supportability(model_obj):
|
|
433
935
|
model_obj = self.__class__(model=model_obj)
|
|
434
936
|
model_obj._is_model_installed = True # Trained model is returned by function call.
|
|
@@ -636,234 +1138,63 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
|
|
|
636
1138
|
EXAMPLES:
|
|
637
1139
|
>>> from teradataml import td_sklearn
|
|
638
1140
|
>>> model = td_sklearn.LinearRegression(normalize=True)
|
|
639
|
-
>>> model
|
|
640
|
-
LinearRegression(normalize=True)
|
|
641
|
-
|
|
642
|
-
# Example 1: Deploy the model held by interface object to Vantage.
|
|
643
|
-
>>> lin_reg = model.deploy("linreg_model_ver_2")
|
|
644
|
-
Model is saved.
|
|
645
|
-
>>> lin_reg
|
|
646
|
-
LinearRegression(normalize=True)
|
|
647
|
-
|
|
648
|
-
# Example 2: Deploy the model held by interface object to Vantage with the name same
|
|
649
|
-
# as that of model that already existed in Vantage.
|
|
650
|
-
>>> lin_reg = model.deploy("linreg_model_ver_2", replace_if_exists=True)
|
|
651
|
-
Model is deleted.
|
|
652
|
-
Model is saved.
|
|
653
|
-
>>> lin_reg
|
|
654
|
-
LinearRegression(normalize=True)
|
|
655
|
-
"""
|
|
656
|
-
|
|
657
|
-
# Install model file into Vantage, if not installed.
|
|
658
|
-
self._install_initial_model_file()
|
|
659
|
-
|
|
660
|
-
self._save_model(model_name, replace_if_exists)
|
|
661
|
-
return self
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
665
|
-
|
|
666
|
-
OPENSOURCE_PACKAGE_NAME = OpenSourcePackage.SKLEARN
|
|
667
|
-
|
|
668
|
-
def __init__(self, model=None, module_name=None, class_name=None, pos_args=None, kwargs=None):
|
|
669
|
-
super().__init__(model=model, module_name=module_name, class_name=class_name,
|
|
670
|
-
pos_args=pos_args, kwargs=kwargs)
|
|
671
|
-
|
|
672
|
-
self._initialize_variables()
|
|
673
|
-
if model:
|
|
674
|
-
self.modelObj = model
|
|
675
|
-
self.module_name = model.__module__.split("._")[0]
|
|
676
|
-
self.class_name = model.__class__.__name__
|
|
677
|
-
# __dict__ gets all the arguments as dictionary including default ones and positional
|
|
678
|
-
# args.
|
|
679
|
-
self.kwargs = model.__dict__
|
|
680
|
-
self.pos_args = tuple() # Kept empty as all are moved to kwargs.
|
|
681
|
-
else:
|
|
682
|
-
self._initialize_object()
|
|
683
|
-
|
|
684
|
-
def __repr__(self):
|
|
685
|
-
if self._is_default_partition_value_fit:
|
|
686
|
-
# Single model use case.
|
|
687
|
-
return self.modelObj.__repr__()
|
|
688
|
-
|
|
689
|
-
pd.set_option("display.expand_frame_repr", None)
|
|
690
|
-
pd.set_option("display.max_colwidth", None)
|
|
691
|
-
opt = self.modelObj.__repr__()
|
|
692
|
-
pd.reset_option("display.expand_frame_repr")
|
|
693
|
-
pd.reset_option("display.max_colwidth")
|
|
694
|
-
return opt
|
|
695
|
-
|
|
696
|
-
def _validate_args_and_get_data(self, X=None, y=None, groups=None, kwargs={},
|
|
697
|
-
skip_either_or_that=False):
|
|
698
|
-
"""
|
|
699
|
-
Internal function to validate arguments passed to exposed opensource APIs and return
|
|
700
|
-
parent DataFrame, feature columns, label columns, group columns, data partition columns.
|
|
701
|
-
"""
|
|
702
|
-
_validate_opensource_func_args(X=X, y=y, groups=groups,
|
|
703
|
-
fit_partition_cols=self._fit_partition_colums_non_default,
|
|
704
|
-
kwargs=kwargs,
|
|
705
|
-
skip_either_or_that=skip_either_or_that)
|
|
706
|
-
return _derive_df_and_required_columns(X=X, y=y, groups=groups, kwargs=kwargs,
|
|
707
|
-
fit_partition_cols=self._fit_partition_colums_non_default)
|
|
708
|
-
|
|
709
|
-
def _initialize_object(self):
|
|
710
|
-
"""
|
|
711
|
-
Internal function to initialize sklearn object from module name and class name.
|
|
712
|
-
"""
|
|
713
|
-
# Needed when writing imported modules to generated file. TODO: Remove later.
|
|
714
|
-
imported_args = {}
|
|
715
|
-
# If there are any objects of class `_SkLearnObjectWrapper`, it is modified to
|
|
716
|
-
# corresponding sklearn object.
|
|
717
|
-
new_sklearn_pos_args = self.modify_args(None, self.pos_args, imported_args)
|
|
718
|
-
new_sklearn_kwargs = self.modify_args(None, self.kwargs, imported_args)
|
|
719
|
-
|
|
720
|
-
# Create model object from new positional and keyword arguments.
|
|
721
|
-
class_obj = getattr(import_module(self.module_name), self.class_name)
|
|
722
|
-
if new_sklearn_pos_args:
|
|
723
|
-
self.modelObj = class_obj(*new_sklearn_pos_args, **new_sklearn_kwargs)
|
|
724
|
-
else:
|
|
725
|
-
self.modelObj = class_obj(**new_sklearn_kwargs)
|
|
726
|
-
|
|
727
|
-
# All arguments are moved to kwargs and kept pos_args empty.
|
|
728
|
-
# Might help in set_params() bug fix.
|
|
729
|
-
self.pos_args = tuple()
|
|
730
|
-
_arguments = self.modelObj.__dict__
|
|
731
|
-
|
|
732
|
-
if hasattr(self.modelObj, "get_params"):
|
|
733
|
-
# Update kwargs that are both in modelObj and get_params() as there are
|
|
734
|
-
# some classes which return other internals variables also.
|
|
735
|
-
# Hence, filtering them using get_params().
|
|
736
|
-
for k, v in _arguments.items():
|
|
737
|
-
if type(v).__name__ in ["function", "generator"]:
|
|
738
|
-
# TODO: ELE-6351: Skipping adding functions and generators to kwargs as these
|
|
739
|
-
# are not supported yet due to pickling issue.
|
|
740
|
-
continue
|
|
741
|
-
if k in self.get_params():
|
|
742
|
-
self.kwargs[k] = v
|
|
743
|
-
else:
|
|
744
|
-
# Model selection classes will not have `get_params`, in which case modelObj's __dict__
|
|
745
|
-
# is saved as kwargs.
|
|
746
|
-
self.kwargs = _arguments
|
|
747
|
-
|
|
748
|
-
def _initialize_variables(self):
|
|
749
|
-
"""
|
|
750
|
-
Internal function to initialize variables used in this class.
|
|
751
|
-
"""
|
|
752
|
-
self.feature_names_in_ = None
|
|
753
|
-
self._table_name_prefix = "td_sklearn_"
|
|
754
|
-
self._model_file_name_prefix = _generate_new_name(type="file")
|
|
755
|
-
self.model_file_paths_local = set()
|
|
756
|
-
|
|
757
|
-
self._fit_execution_time = None
|
|
758
|
-
self._fit_predict_execution_time = None
|
|
759
|
-
self._partial_fit_execution_time = None
|
|
760
|
-
self._predict_execution_time = None
|
|
761
|
-
self._transform_execution_time = None
|
|
762
|
-
self._score_execution_time = None
|
|
763
|
-
|
|
764
|
-
# Set to partition columns when training is done with partition columns.
|
|
765
|
-
self._fit_partition_colums_non_default = None
|
|
766
|
-
|
|
767
|
-
self._is_model_installed = False
|
|
768
|
-
self._fit_partition_unique_values = [[self._default_data_partition_value]]
|
|
769
|
-
|
|
770
|
-
def modify_args(self, fp1, arg, imported_args):
|
|
771
|
-
"""
|
|
772
|
-
Internal function to recursively (if "arg" is list/tuple/dict) check if any sklearn object
|
|
773
|
-
of opensourceML is present in the argument "arg" and modify it to corresponding sklearn
|
|
774
|
-
object.
|
|
775
|
-
This function can also be used to write import statements to file (if "fp1" is not
|
|
776
|
-
None). Update "imported_args" dictionary with imported module and class name to avoid
|
|
777
|
-
importing same module and class again when writing to file. This is useful when we want to
|
|
778
|
-
generate script from template file.
|
|
779
|
-
Pass None to "fp1" if we don't want to write to file and just modify opensourceML sklearn
|
|
780
|
-
object to corresponding sklearn object.
|
|
781
|
-
"""
|
|
782
|
-
if isinstance(arg, type(self)):
|
|
783
|
-
imported_tuple = (arg.module_name, arg.class_name)
|
|
784
|
-
already_imported = imported_args.get(imported_tuple, False)
|
|
785
|
-
if not already_imported:
|
|
786
|
-
imported_args[imported_tuple] = True
|
|
787
|
-
if fp1:
|
|
788
|
-
fp1.write(f"from {arg.module_name} import {arg.class_name}\n")
|
|
789
|
-
self.modify_args(fp1, arg.pos_args, imported_args)
|
|
790
|
-
self.modify_args(fp1, arg.kwargs, imported_args)
|
|
791
|
-
return arg.modelObj
|
|
792
|
-
elif isinstance(arg, list):
|
|
793
|
-
return [self.modify_args(fp1, val, imported_args) for val in arg]
|
|
794
|
-
elif isinstance(arg, tuple):
|
|
795
|
-
return tuple([self.modify_args(fp1, val, imported_args) for val in arg])
|
|
796
|
-
elif type(arg).__name__ == "generator":
|
|
797
|
-
# Raising exception as generator object can't be pickled.
|
|
798
|
-
# TODO: ELE-6351 - Find ways to pickle generator object later.
|
|
799
|
-
raise ValueError("Generator type/iterator is not supported for any argument. "\
|
|
800
|
-
"Support will be added later.")
|
|
801
|
-
elif type(arg).__name__ == "function":
|
|
802
|
-
# Raising exception as functions/lambda functions can't be pickled.
|
|
803
|
-
# TODO: ELE-6351 - Find ways to pickle functions later.
|
|
804
|
-
raise ValueError("Functions are not supported for any argument. "\
|
|
805
|
-
"Support will be added later.")
|
|
806
|
-
elif isinstance(arg, dict):
|
|
807
|
-
return dict(
|
|
808
|
-
(
|
|
809
|
-
self.modify_args(fp1, k, imported_args),
|
|
810
|
-
self.modify_args(fp1, v, imported_args),
|
|
811
|
-
)
|
|
812
|
-
for k, v in arg.items()
|
|
813
|
-
)
|
|
814
|
-
else:
|
|
815
|
-
return arg
|
|
1141
|
+
>>> model
|
|
1142
|
+
LinearRegression(normalize=True)
|
|
816
1143
|
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
1144
|
+
# Example 1: Deploy the model held by interface object to Vantage.
|
|
1145
|
+
>>> lin_reg = model.deploy("linreg_model_ver_2")
|
|
1146
|
+
Model is saved.
|
|
1147
|
+
>>> lin_reg
|
|
1148
|
+
LinearRegression(normalize=True)
|
|
1149
|
+
|
|
1150
|
+
# Example 2: Deploy the model held by interface object to Vantage with the name same
|
|
1151
|
+
# as that of model that already existed in Vantage.
|
|
1152
|
+
>>> lin_reg = model.deploy("linreg_model_ver_2", replace_if_exists=True)
|
|
1153
|
+
Model is deleted.
|
|
1154
|
+
Model is saved.
|
|
1155
|
+
>>> lin_reg
|
|
1156
|
+
LinearRegression(normalize=True)
|
|
820
1157
|
"""
|
|
821
|
-
if isinstance(self.modelObj, pd.DataFrame):
|
|
822
|
-
# Get list of unique partition values and corresponding model object as dict.
|
|
823
|
-
partition_values_model_dict = {}
|
|
824
|
-
obj_list = self.modelObj.values.tolist()
|
|
825
|
-
for lst in obj_list:
|
|
826
|
-
partition_values_model_dict[tuple(lst[:len(lst)-1])] = lst[len(lst)-1]
|
|
827
1158
|
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
# dump sklearn object into it. Finally install the file to Vantage.
|
|
831
|
-
partition_join = "_".join([str(x) for x in partition])
|
|
832
|
-
file_name = f"{self._model_file_name_prefix}_{partition_join}"
|
|
833
|
-
# Replace '-' with '_' as '-' can't be present in file identifier.
|
|
834
|
-
# Needed this replace because partition_columns can be negative.
|
|
835
|
-
file_name = file_name.replace("-", "_")
|
|
836
|
-
full_file_name = os.path.join(self._tdml_tmp_dir, file_name)
|
|
837
|
-
with open(full_file_name, "wb+") as fp:
|
|
838
|
-
# Write sklearn object to file.
|
|
839
|
-
if isinstance(self.modelObj, pd.DataFrame):
|
|
840
|
-
# If multiple models, then write the model corresponding to the partition value.
|
|
841
|
-
fp.write(pickle.dumps(partition_values_model_dict[tuple(partition)]))
|
|
842
|
-
else:
|
|
843
|
-
fp.write(pickle.dumps(self.modelObj))
|
|
844
|
-
self.model_file_paths_local.add(file_name)
|
|
1159
|
+
# Install model file into Vantage, if not installed.
|
|
1160
|
+
self._install_initial_model_file()
|
|
845
1161
|
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
is_binary=True,
|
|
849
|
-
file_location=self._tdml_tmp_dir)
|
|
1162
|
+
self._save_model(model_name, replace_if_exists)
|
|
1163
|
+
return self
|
|
850
1164
|
|
|
851
|
-
if self._is_lake_system:
|
|
852
|
-
# Need to pass env_name along with file_name for cleaning up the files in env.
|
|
853
|
-
obj = f"{self._env.env_name}::{file_name}"
|
|
854
|
-
if installed_model_files[obj] == 0:
|
|
855
|
-
# Add to GC for the first time the model file (along with env name) is encountered.
|
|
856
|
-
installed_model_files[obj] = 1
|
|
857
|
-
GarbageCollector._add_to_garbagecollector(object_name=obj,
|
|
858
|
-
object_type=TeradataConstants.TERADATA_APPLY)
|
|
859
|
-
else:
|
|
860
|
-
if installed_model_files[file_name] == 0:
|
|
861
|
-
# Add to GC for the first time the model file is encountered.
|
|
862
|
-
installed_model_files[file_name] = 1
|
|
863
|
-
GarbageCollector._add_to_garbagecollector(object_name=file_name,
|
|
864
|
-
object_type=TeradataConstants.TERADATA_SCRIPT)
|
|
865
1165
|
|
|
866
|
-
|
|
1166
|
+
class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
1167
|
+
|
|
1168
|
+
OPENSOURCE_PACKAGE_NAME = OpenSourcePackage.SKLEARN
|
|
1169
|
+
|
|
1170
|
+
def __init__(self, model=None, module_name=None, class_name=None, pos_args=None, kwargs=None):
|
|
1171
|
+
super().__init__(model=model, module_name=module_name, class_name=class_name,
|
|
1172
|
+
pos_args=pos_args, kwargs=kwargs)
|
|
1173
|
+
|
|
1174
|
+
self._initialize_variables(table_name_prefix="td_sklearn_")
|
|
1175
|
+
if model is not None:
|
|
1176
|
+
self.modelObj = model
|
|
1177
|
+
self.module_name = model.__module__.split("._")[0]
|
|
1178
|
+
self.class_name = model.__class__.__name__
|
|
1179
|
+
# __dict__ gets all the arguments as dictionary including default ones and positional
|
|
1180
|
+
# args.
|
|
1181
|
+
self.kwargs = model.__dict__
|
|
1182
|
+
self.pos_args = tuple() # Kept empty as all are moved to kwargs.
|
|
1183
|
+
else:
|
|
1184
|
+
self._initialize_object()
|
|
1185
|
+
|
|
1186
|
+
def _validate_args_and_get_data(self, X=None, y=None, groups=None, kwargs={},
|
|
1187
|
+
skip_either_or_that=False):
|
|
1188
|
+
"""
|
|
1189
|
+
Internal function to validate arguments passed to exposed opensource APIs and return
|
|
1190
|
+
parent DataFrame, feature columns, label columns, group columns, data partition columns.
|
|
1191
|
+
"""
|
|
1192
|
+
_validate_opensource_func_args(X=X, y=y, groups=groups,
|
|
1193
|
+
fit_partition_cols=self._fit_partition_colums_non_default,
|
|
1194
|
+
kwargs=kwargs,
|
|
1195
|
+
skip_either_or_that=skip_either_or_that)
|
|
1196
|
+
return _derive_df_and_required_columns(X=X, y=y, groups=groups, kwargs=kwargs,
|
|
1197
|
+
fit_partition_cols=self._fit_partition_colums_non_default)
|
|
867
1198
|
|
|
868
1199
|
def _run_fit_related_functions(self,
|
|
869
1200
|
data,
|
|
@@ -871,7 +1202,8 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
871
1202
|
label_columns,
|
|
872
1203
|
partition_columns,
|
|
873
1204
|
func,
|
|
874
|
-
classes=None
|
|
1205
|
+
classes=None,
|
|
1206
|
+
file_name="sklearn_fit.py"):
|
|
875
1207
|
"""
|
|
876
1208
|
Internal function to run fit() and partial_fit() functions.
|
|
877
1209
|
"""
|
|
@@ -886,8 +1218,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
886
1218
|
return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
|
|
887
1219
|
for col in new_partition_columns] + [("model", model_type)]
|
|
888
1220
|
|
|
889
|
-
file_name = "sklearn_fit.py"
|
|
890
|
-
|
|
891
1221
|
if classes:
|
|
892
1222
|
class_type = type(classes[0]).__name__
|
|
893
1223
|
classes = "--".join([str(x) for x in classes])
|
|
@@ -913,20 +1243,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
913
1243
|
self._model_data = self._run_script(data, script_command, new_partition_columns,
|
|
914
1244
|
return_types)
|
|
915
1245
|
|
|
916
|
-
|
|
917
|
-
self.extract_sklearn_obj(n_unique_partitions=len(self._fit_partition_unique_values),
|
|
918
|
-
n_partition_cols=len(new_partition_columns))
|
|
919
|
-
|
|
920
|
-
# Need this label columns types in prediction.
|
|
921
|
-
self._fit_label_columns_types = [data._td_column_names_and_sqlalchemy_types[l_c.lower()]
|
|
922
|
-
for l_c in label_columns]
|
|
923
|
-
|
|
924
|
-
# If the model is trained a second time after the object creation,
|
|
925
|
-
# or if set_params() is called after the first model training,
|
|
926
|
-
# this flag will reset to False. So that for subsequent predict/score
|
|
927
|
-
# operations, the newly trained model will be installed.
|
|
928
|
-
if self._is_trained_model_installed:
|
|
929
|
-
self._is_trained_model_installed = False
|
|
1246
|
+
self._assign_fit_variables_after_execution(data, new_partition_columns, label_columns)
|
|
930
1247
|
|
|
931
1248
|
def partial_fit(self, X=None, y=None, classes=None, **kwargs):
|
|
932
1249
|
"""
|
|
@@ -974,11 +1291,19 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
974
1291
|
self._is_default_partition_value_fit = False
|
|
975
1292
|
self._fit_partition_colums_non_default = partition_columns
|
|
976
1293
|
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
1294
|
+
file_name = kwargs.pop("file_name", None)
|
|
1295
|
+
func_name = kwargs.pop("name", "fit")
|
|
1296
|
+
|
|
1297
|
+
args = {"data": data,
|
|
1298
|
+
"feature_columns": feature_columns,
|
|
1299
|
+
"label_columns": label_columns,
|
|
1300
|
+
"partition_columns": partition_columns,
|
|
1301
|
+
"func": func_name}
|
|
1302
|
+
|
|
1303
|
+
if file_name is not None:
|
|
1304
|
+
args["file_name"] = file_name
|
|
1305
|
+
|
|
1306
|
+
self._run_fit_related_functions(**args)
|
|
982
1307
|
|
|
983
1308
|
self._fit_execution_time = time.time() - st_time
|
|
984
1309
|
|
|
@@ -1043,10 +1368,130 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
1043
1368
|
|
|
1044
1369
|
return super().__getattr__(name)
|
|
1045
1370
|
|
|
1371
|
+
def _special_handling_multimodel_(self, data, feature_columns, label_columns, partition_columns,
|
|
1372
|
+
func_name, **kwargs):
|
|
1373
|
+
"""
|
|
1374
|
+
Internal function to handle multi model case for transform function for functions
|
|
1375
|
+
["SelectFpr", "SelectFdr", "SelectFwe", "SelectFromModel", "RFECV"] of feature_selection module
|
|
1376
|
+
and "Birch" of cluster module.
|
|
1377
|
+
These functions generate multiple models and when transform is applied to each model, it generates
|
|
1378
|
+
output with different number of columns.
|
|
1379
|
+
"""
|
|
1380
|
+
skl_objs_dict = {}
|
|
1381
|
+
no_of_unique_partitions = len(self._fit_partition_unique_values)
|
|
1382
|
+
no_of_partitioning_cols = len(self._fit_partition_unique_values[0])
|
|
1383
|
+
|
|
1384
|
+
# Run on 10 rows of data individually using corresponding scikit-learn objects based on paritition value
|
|
1385
|
+
# and get the maximum number of columns and their types.
|
|
1386
|
+
for i in range(no_of_unique_partitions):
|
|
1387
|
+
skl_objs_dict[tuple(self.modelObj.iloc[i, :no_of_partitioning_cols])] = self.modelObj.iloc[i]["model"]
|
|
1388
|
+
|
|
1389
|
+
|
|
1390
|
+
data = data.select(feature_columns + label_columns + partition_columns)
|
|
1391
|
+
ten_row_data = data.head(10).get_values()
|
|
1392
|
+
X = numpy.array(ten_row_data)
|
|
1393
|
+
|
|
1394
|
+
# For multi-model case, model in one AMP can give more number of columns than other AMPs.
|
|
1395
|
+
# Returns clause can't contain different number of columns in different AMPs. Hence, taking
|
|
1396
|
+
# maximum number of columns and their types from all models.
|
|
1397
|
+
max_no_of_columns = 0
|
|
1398
|
+
max_col_names = []
|
|
1399
|
+
max_col_types = []
|
|
1400
|
+
|
|
1401
|
+
def _get_input_row_without_nans(row):
|
|
1402
|
+
"""
|
|
1403
|
+
`inverse_transform` should not contain NaNs. Hence, removing NaNs from the row.
|
|
1404
|
+
"""
|
|
1405
|
+
X1 = []
|
|
1406
|
+
for _, v in enumerate(row):
|
|
1407
|
+
if isinstance(v, type(None)) or isinstance(v, str) or not math.isnan(v) or self.module_name == "sklearn.impute":
|
|
1408
|
+
# Add to list when:
|
|
1409
|
+
# - v is None or
|
|
1410
|
+
# - v is string or
|
|
1411
|
+
# - v is not nan or
|
|
1412
|
+
# - if module is impute (which transforms nan values) even though v is nan.
|
|
1413
|
+
X1.append(v)
|
|
1414
|
+
else:
|
|
1415
|
+
# skip nan values.
|
|
1416
|
+
pass
|
|
1417
|
+
return X1
|
|
1418
|
+
|
|
1419
|
+
for i in range(X.shape[0]):
|
|
1420
|
+
# Run `transform` or `inverse_transform` on each row with corresponding scikit-learn model object.
|
|
1421
|
+
partition_values = tuple(X[i, -no_of_partitioning_cols:])
|
|
1422
|
+
skl_obj = skl_objs_dict[partition_values]
|
|
1423
|
+
|
|
1424
|
+
X1 = X[i, :-no_of_partitioning_cols]
|
|
1425
|
+
# Since Nans/NULLs are added in transform for last columns where some models generated
|
|
1426
|
+
# less number of columns, removing Nans/NULLs from the input row for inverse_transform
|
|
1427
|
+
# using function _get_input_row_without_nans().
|
|
1428
|
+
X1 = numpy.array([_get_input_row_without_nans(X1)])
|
|
1429
|
+
|
|
1430
|
+
trans_opt = getattr(skl_obj, func_name)(X1, **kwargs)
|
|
1431
|
+
|
|
1432
|
+
no_of_columns = 1
|
|
1433
|
+
|
|
1434
|
+
if trans_opt.shape == (X1.shape[0],):
|
|
1435
|
+
trans_opt = trans_opt.reshape(X1.shape[0], 1)
|
|
1436
|
+
|
|
1437
|
+
if isinstance(trans_opt[0], numpy.ndarray) \
|
|
1438
|
+
or isinstance(trans_opt[0], list) \
|
|
1439
|
+
or isinstance(trans_opt[0], tuple):
|
|
1440
|
+
no_of_columns = len(trans_opt[0])
|
|
1441
|
+
|
|
1442
|
+
col_names = [f"{self.class_name.lower()}_{func_name}_{(i + 1)}" for i in range(no_of_columns)]
|
|
1443
|
+
|
|
1444
|
+
# Get new column sqlalchemy types for pandas df columns of transform output.
|
|
1445
|
+
opt_pd = pd.DataFrame(trans_opt)
|
|
1446
|
+
|
|
1447
|
+
# Get output column types for each column in pandas df from the output of transform
|
|
1448
|
+
# type functions.
|
|
1449
|
+
types = {}
|
|
1450
|
+
for idx in range(no_of_columns):
|
|
1451
|
+
col = list(opt_pd.columns)[idx]
|
|
1452
|
+
|
|
1453
|
+
# Only one row in trans_opt.
|
|
1454
|
+
if isinstance(trans_opt[0], numpy.ndarray) or isinstance(trans_opt[0], tuple) or isinstance(trans_opt[0], list):
|
|
1455
|
+
type_ = type(trans_opt[0][idx])
|
|
1456
|
+
else:
|
|
1457
|
+
# only one value in the output.
|
|
1458
|
+
type_ = type(trans_opt[0])
|
|
1459
|
+
|
|
1460
|
+
# If type of the output value (trans_opt) is None, then use `str` as type since
|
|
1461
|
+
# pandas astype() does not accept None type.
|
|
1462
|
+
if type_ is type(None):
|
|
1463
|
+
type_ = str
|
|
1464
|
+
|
|
1465
|
+
# numpy integer columns with nan values can't be typecasted using pd.astype() to int64.
|
|
1466
|
+
# It raises error like "Cannot convert non-finite values (NA or inf) to integer:
|
|
1467
|
+
# Error while type casting for column '2'"
|
|
1468
|
+
# Hence, using pd.Int64Dtype() for integer columns with nan values.
|
|
1469
|
+
types[col] = type_ if type_ not in [int, numpy.int64] else pd.Int64Dtype()
|
|
1470
|
+
|
|
1471
|
+
# Without this, all columns will be of object type and gets converted to VARCHAR in Vantage.
|
|
1472
|
+
opt_pd = opt_pd.astype(types)
|
|
1473
|
+
|
|
1474
|
+
# If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
|
|
1475
|
+
# TIMESTAMP(timezone=True) else map it according to default value.
|
|
1476
|
+
col_types = [TIMESTAMP(timezone=True)
|
|
1477
|
+
if pt.is_datetime64_ns_dtype(opt_pd.dtypes[key]) and (opt_pd[col_name].dt.tz is not None)
|
|
1478
|
+
else _get_sqlalchemy_mapping(str(opt_pd.dtypes[key]))
|
|
1479
|
+
for key, col_name in enumerate(list(opt_pd.columns))]
|
|
1480
|
+
|
|
1481
|
+
# Different models in multi model case can generate different number of output columns for example in
|
|
1482
|
+
# SelectFpr. Hence, taking the model which generates maximum number of columns.
|
|
1483
|
+
if no_of_columns > max_no_of_columns:
|
|
1484
|
+
max_no_of_columns = no_of_columns
|
|
1485
|
+
max_col_names = col_names
|
|
1486
|
+
max_col_types = col_types
|
|
1487
|
+
|
|
1488
|
+
return [(c_name, c_type) for c_name, c_type in zip(max_col_names, max_col_types)]
|
|
1489
|
+
|
|
1046
1490
|
def _get_return_columns_for_function_(self,
|
|
1047
1491
|
data,
|
|
1048
1492
|
feature_columns,
|
|
1049
1493
|
label_columns,
|
|
1494
|
+
partition_columns,
|
|
1050
1495
|
func_name,
|
|
1051
1496
|
kwargs):
|
|
1052
1497
|
"""
|
|
@@ -1060,7 +1505,8 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
1060
1505
|
return [(f"{self.class_name.lower()}_{func_name}_{(i + 1)}",
|
|
1061
1506
|
data._td_column_names_and_sqlalchemy_types[col.lower()])
|
|
1062
1507
|
for i, col in enumerate(label_columns)]
|
|
1063
|
-
|
|
1508
|
+
|
|
1509
|
+
if func_name == "predict" and self.OPENSOURCE_PACKAGE_NAME == OpenSourcePackage.SKLEARN:
|
|
1064
1510
|
"""
|
|
1065
1511
|
Return predict columns using either label_columns (if provided) or
|
|
1066
1512
|
self._fit_label_columns_types (if the function is trained using label columns).
|
|
@@ -1075,8 +1521,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
1075
1521
|
return [(f"{self.class_name.lower()}_{func_name}_{(i + 1)}", col_type)
|
|
1076
1522
|
for i, col_type in enumerate(self._fit_label_columns_types)]
|
|
1077
1523
|
|
|
1078
|
-
data = data.select(feature_columns + label_columns)
|
|
1079
|
-
|
|
1080
1524
|
## If function is not `fit_predict`:
|
|
1081
1525
|
# then take one row of transform/other functions to execute in client
|
|
1082
1526
|
# to get number of columns in return clause and their Vantage types.
|
|
@@ -1090,8 +1534,20 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
1090
1534
|
skl_obj = self.modelObj
|
|
1091
1535
|
else:
|
|
1092
1536
|
# Multi model case.
|
|
1537
|
+
if (func_name in ["transform", "inverse_transform"] and \
|
|
1538
|
+
self.class_name in ["SelectFpr", "SelectFdr", "SelectFwe", "SelectFromModel", "RFECV", "Birch"]) or \
|
|
1539
|
+
(self.module_name == "lightgbm.sklearn" and self.class_name == "LGBMClassifier"):
|
|
1540
|
+
# Special handling for multi model case for transform function as these classes
|
|
1541
|
+
# generate transform output with different number of columns for each model.
|
|
1542
|
+
# Hence, need to add Nulls/Nans to columns which are not present in the transform output of
|
|
1543
|
+
# some models.
|
|
1544
|
+
return self._special_handling_multimodel_(data, feature_columns, label_columns,
|
|
1545
|
+
partition_columns, func_name, **kwargs)
|
|
1546
|
+
|
|
1093
1547
|
skl_obj = self.modelObj.iloc[0]["model"]
|
|
1094
1548
|
|
|
1549
|
+
data = data.select(feature_columns + label_columns)
|
|
1550
|
+
|
|
1095
1551
|
ten_row_data = data.head(10).get_values()
|
|
1096
1552
|
X = numpy.array(ten_row_data)
|
|
1097
1553
|
if label_columns:
|
|
@@ -1200,7 +1656,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
1200
1656
|
return [(c_name, c_type) for c_name, c_type in zip(col_names, col_types)]
|
|
1201
1657
|
|
|
1202
1658
|
@_validate_fit_run
|
|
1203
|
-
def _run_function_needing_all_rows(self, X=None, y=None, **kwargs):
|
|
1659
|
+
def _run_function_needing_all_rows(self, X=None, y=None, file_name="sklearn_score.py", **kwargs):
|
|
1204
1660
|
"""
|
|
1205
1661
|
Internal function to run functions like score, aic, bic which needs all rows and return
|
|
1206
1662
|
one floating number as result.
|
|
@@ -1223,8 +1679,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
1223
1679
|
label_columns,
|
|
1224
1680
|
partition_columns)
|
|
1225
1681
|
|
|
1226
|
-
file_name = "sklearn_score.py"
|
|
1227
|
-
|
|
1228
1682
|
script_file_path = f"{file_name}" if self._is_lake_system \
|
|
1229
1683
|
else f"./{self._db_name}/{file_name}"
|
|
1230
1684
|
|
|
@@ -1260,7 +1714,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
1260
1714
|
return opt
|
|
1261
1715
|
|
|
1262
1716
|
@_validate_fit_run
|
|
1263
|
-
def _transform(self, X=None, y=None, **kwargs):
|
|
1717
|
+
def _transform(self, X=None, y=None, file_name="sklearn_transform.py", **kwargs):
|
|
1264
1718
|
"""
|
|
1265
1719
|
Internal function to run predict/transform and similar functions, which returns
|
|
1266
1720
|
multiple columns. This function will return data row along with the generated
|
|
@@ -1283,18 +1737,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
1283
1737
|
partition_columns)
|
|
1284
1738
|
|
|
1285
1739
|
# Since kwargs are passed to transform, removing additional unrelated arguments from kwargs.
|
|
1286
|
-
|
|
1287
|
-
kwargs.pop("data")
|
|
1288
|
-
if "feature_columns" in kwargs:
|
|
1289
|
-
kwargs.pop("feature_columns")
|
|
1290
|
-
if "group_columns" in kwargs:
|
|
1291
|
-
kwargs.pop("group_columns")
|
|
1292
|
-
if "partition_columns" in kwargs:
|
|
1293
|
-
kwargs.pop("partition_columns")
|
|
1294
|
-
if "label_columns" in kwargs:
|
|
1295
|
-
kwargs.pop("label_columns")
|
|
1296
|
-
|
|
1297
|
-
file_name = "sklearn_transform.py"
|
|
1740
|
+
self._remove_data_related_args_from_kwargs(kwargs)
|
|
1298
1741
|
|
|
1299
1742
|
script_file_path = f"{file_name}" if self._is_lake_system \
|
|
1300
1743
|
else f"./{self._db_name}/{file_name}"
|
|
@@ -1304,24 +1747,36 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
1304
1747
|
|
|
1305
1748
|
self._validate_unique_partition_values(data, new_partition_columns)
|
|
1306
1749
|
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
f"{self._model_file_name_prefix} {self._is_lake_system}"
|
|
1750
|
+
return_columns_python_types = None
|
|
1751
|
+
if self._fit_label_columns_python_types:
|
|
1752
|
+
return_columns_python_types = '--'.join(self._fit_label_columns_python_types)
|
|
1311
1753
|
|
|
1312
1754
|
# Returning feature columns also along with transformed columns because we don't know the
|
|
1313
1755
|
# mapping of feature columns to the transformed columns.
|
|
1314
|
-
|
|
1315
|
-
|
|
1756
|
+
## 'correct_covariance()' returns the (n_features, n_features)
|
|
1757
|
+
if func_name == "correct_covariance":
|
|
1758
|
+
return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
|
|
1759
|
+
for col in new_partition_columns]
|
|
1760
|
+
else:
|
|
1761
|
+
return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
|
|
1762
|
+
for col in (new_partition_columns + feature_columns)]
|
|
1316
1763
|
if func_name in ["predict", "decision_function"] and label_columns:
|
|
1317
1764
|
return_types += [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
|
|
1318
1765
|
for col in label_columns]
|
|
1319
1766
|
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1767
|
+
output_cols_types = self._get_return_columns_for_function_(data,
|
|
1768
|
+
feature_columns,
|
|
1769
|
+
label_columns,
|
|
1770
|
+
new_partition_columns,
|
|
1771
|
+
func_name,
|
|
1772
|
+
kwargs)
|
|
1773
|
+
return_types += output_cols_types
|
|
1774
|
+
|
|
1775
|
+
py_exc = UtilFuncs._get_python_execution_path()
|
|
1776
|
+
script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
|
|
1777
|
+
f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
|
|
1778
|
+
f"{self._model_file_name_prefix} {len(output_cols_types)} {self._is_lake_system} " \
|
|
1779
|
+
f"{return_columns_python_types}"
|
|
1325
1780
|
|
|
1326
1781
|
# Checking the trained model installation. If not installed,
|
|
1327
1782
|
# install it and set flag to True.
|
|
@@ -1363,6 +1818,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
1363
1818
|
return_types += self._get_return_columns_for_function_(data,
|
|
1364
1819
|
feature_columns,
|
|
1365
1820
|
label_columns,
|
|
1821
|
+
new_partition_columns,
|
|
1366
1822
|
func_name,
|
|
1367
1823
|
{})
|
|
1368
1824
|
else:
|
|
@@ -1448,14 +1904,10 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
1448
1904
|
skip_either_or_that=True)
|
|
1449
1905
|
|
|
1450
1906
|
# Remove the kwargs data.
|
|
1451
|
-
|
|
1452
|
-
partition_cols = kwargs.pop("partition_columns", None)
|
|
1453
|
-
feature_cols = kwargs.pop("feature_columns", None)
|
|
1454
|
-
label_cols = kwargs.pop("label_columns", None)
|
|
1907
|
+
self._remove_data_related_args_from_kwargs(kwargs)
|
|
1455
1908
|
|
|
1456
1909
|
if partition_columns:
|
|
1457
1910
|
# kwargs are passed to kneighbors function. So, removing them from kwargs.
|
|
1458
|
-
kwargs.pop("partition_columns")
|
|
1459
1911
|
self._is_default_partition_value_fit = False
|
|
1460
1912
|
|
|
1461
1913
|
# Generating new partition column name.
|
|
@@ -1640,161 +2092,69 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
|
|
|
1640
2092
|
|
|
1641
2093
|
return opt
|
|
1642
2094
|
|
|
1643
|
-
def _get_returning_df(self, script_df, partition_column, returns):
|
|
1644
|
-
"""
|
|
1645
|
-
Internal function to return the teradataml Dataframe except
|
|
1646
|
-
partition_column.
|
|
1647
|
-
"""
|
|
1648
|
-
if self._is_default_partition_value_fit:
|
|
1649
|
-
# For single model case, partition column is internally generated
|
|
1650
|
-
# and no point in returning it to the user.
|
|
1651
|
-
|
|
1652
|
-
# Extract columns from return types.
|
|
1653
|
-
returning_cols = [col[0] for col in returns[len(partition_column):]]
|
|
1654
|
-
return script_df.select(returning_cols)
|
|
1655
|
-
return script_df
|
|
1656
2095
|
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
def __init__(self, module_name, func_name):
|
|
2096
|
+
class _FunctionWrapper(_GenericObjectWrapper):
|
|
2097
|
+
def __init__(self, module_name, func_name, file_type, template_file):
|
|
1660
2098
|
super().__init__()
|
|
1661
|
-
self.
|
|
1662
|
-
self.
|
|
1663
|
-
self.
|
|
1664
|
-
self.
|
|
1665
|
-
self.
|
|
2099
|
+
self._module_name = module_name
|
|
2100
|
+
self._func_name = func_name
|
|
2101
|
+
self._params = None
|
|
2102
|
+
self._data_args = OrderedDict()
|
|
2103
|
+
self._template_file = template_file
|
|
2104
|
+
self._script_file_name = _generate_new_name(type=file_type, extension="py")
|
|
1666
2105
|
|
|
1667
2106
|
def __call__(self, **kwargs):
|
|
1668
2107
|
"""
|
|
1669
2108
|
Run the function with all the arguments passed from `td_sklearn.<function_name>` function.
|
|
1670
2109
|
"""
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
partition_cols = self._get_columns_as_list(kwargs.get("partition_columns", None))
|
|
1674
|
-
if partition_cols:
|
|
1675
|
-
kwargs.pop("partition_columns")
|
|
1676
|
-
|
|
1677
|
-
# Separate dataframe related arguments and their column names from actual kwargs.
|
|
1678
|
-
for k, v in kwargs.items():
|
|
1679
|
-
if isinstance(v, DataFrame):
|
|
1680
|
-
# All dataframes should be select of parent dataframe.
|
|
1681
|
-
_validate_df_query_type(v, "select", k)
|
|
1682
|
-
|
|
1683
|
-
# Save all columns in dataframe related arguments.
|
|
1684
|
-
__data_columns.extend(v.columns)
|
|
1685
|
-
|
|
1686
|
-
self.__data_args[k] = v
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
# Get common parent dataframe from all dataframes.
|
|
1690
|
-
self.__tdml_df = DataFrameUtils()._get_common_parent_df_from_dataframes(list(self.__data_args.values()))
|
|
2110
|
+
replace_dict, partition_cols = self._process_data_for_funcs_returning_objects(kwargs)
|
|
1691
2111
|
|
|
1692
|
-
self.
|
|
1693
|
-
|
|
1694
|
-
self.__tdml_df = self.__tdml_df.select(__data_columns + partition_cols)
|
|
1695
|
-
|
|
1696
|
-
self.__tdml_df, partition_cols = self._get_data_and_data_partition_columns(self.__tdml_df,
|
|
1697
|
-
__data_columns,
|
|
1698
|
-
[],
|
|
1699
|
-
partition_cols
|
|
1700
|
-
)
|
|
1701
|
-
|
|
1702
|
-
# Prepare string of data arguments with name, indices where columns of that argument resides
|
|
1703
|
-
# and types of each of the column.
|
|
1704
|
-
data_args_str = self._prepare_data_args_string(kwargs)
|
|
1705
|
-
|
|
1706
|
-
self.__params = kwargs
|
|
1707
|
-
|
|
1708
|
-
# Get indices of partition_columns and types of all columns.
|
|
1709
|
-
data_column_types_str, partition_indices_str, _, partition_cols = \
|
|
1710
|
-
self._get_data_col_types_and_partition_col_indices_and_types(self.__tdml_df, partition_cols)
|
|
1711
|
-
|
|
1712
|
-
script_file_path = f"{self._model_file_name}" if self._is_lake_system \
|
|
1713
|
-
else f"./{self._db_name}/{self._model_file_name}"
|
|
2112
|
+
script_file_path = f"{self._script_file_name}" if self._is_lake_system \
|
|
2113
|
+
else f"./{self._db_name}/{self._script_file_name}"
|
|
1714
2114
|
|
|
1715
2115
|
model_file_prefix = None
|
|
1716
2116
|
if self._is_lake_system:
|
|
1717
|
-
model_file_prefix = self.
|
|
2117
|
+
model_file_prefix = self._script_file_name.replace(".py", "")
|
|
1718
2118
|
|
|
1719
2119
|
py_exc = UtilFuncs._get_python_execution_path()
|
|
1720
|
-
script_command =
|
|
1721
|
-
f"{data_column_types_str} {data_args_str} {self._is_lake_system}"\
|
|
1722
|
-
f" {model_file_prefix}")
|
|
2120
|
+
script_command = f"{py_exc} {script_file_path} {model_file_prefix} {self._is_lake_system}"
|
|
1723
2121
|
|
|
1724
2122
|
model_type = BLOB() if self._is_lake_system else CLOB()
|
|
1725
|
-
return_types = [(col, self.__tdml_df._td_column_names_and_sqlalchemy_types[col.lower()])
|
|
1726
|
-
for col in partition_cols] + [(self.__func_name, model_type)]
|
|
1727
|
-
|
|
1728
|
-
# Generate new file in .teradataml directory and install it to Vantage.
|
|
1729
|
-
self._prepare_and_install_file()
|
|
1730
|
-
|
|
1731
|
-
self._model_data = self._run_script(self.__tdml_df, script_command, partition_cols, return_types)
|
|
1732
|
-
self._model_data._index_label = None
|
|
1733
2123
|
|
|
1734
|
-
|
|
2124
|
+
return_types = [(col, self._tdml_df._td_column_names_and_sqlalchemy_types[col.lower()])
|
|
2125
|
+
for col in partition_cols] + [(self._func_name, model_type)]
|
|
1735
2126
|
|
|
1736
|
-
self.
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
# File cleanup after processing.
|
|
1740
|
-
os.remove(self._model_file_local)
|
|
1741
|
-
self._remove_script_file(self._model_file_name)
|
|
2127
|
+
replace_dict.update({"<module_name>": self._module_name,
|
|
2128
|
+
"<func_name>": self._func_name,
|
|
2129
|
+
"<params>": json.dumps(kwargs)})
|
|
1742
2130
|
|
|
1743
|
-
|
|
2131
|
+
# Generate new file in .teradataml directory and install it to Vantage.
|
|
2132
|
+
self._prepare_and_install_file(replace_dict=replace_dict)
|
|
1744
2133
|
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
"{<arg_name>-<comma separated indices>-<comma separated types>}--
|
|
1749
|
-
{<arg_name>-<comma separated indices>-<comma separated types>}"
|
|
1750
|
-
"""
|
|
1751
|
-
data_args_str = []
|
|
1752
|
-
for arg_name in list(self.__data_args.keys()):
|
|
1753
|
-
# Remove DataFrame arguments from kwargs, which will be passed to Script.
|
|
1754
|
-
kwargs.pop(arg_name)
|
|
2134
|
+
try:
|
|
2135
|
+
self._model_data = self._run_script(self._tdml_df, script_command, partition_cols, return_types)
|
|
2136
|
+
self._model_data._index_label = None
|
|
1755
2137
|
|
|
1756
|
-
|
|
1757
|
-
_, partition_indices_str, partition_types_str, _ = \
|
|
1758
|
-
self._get_data_col_types_and_partition_col_indices_and_types(self.__tdml_df,
|
|
1759
|
-
self.__data_args[arg_name].columns,
|
|
1760
|
-
idx_delim=",",
|
|
1761
|
-
types_delim=",")
|
|
1762
|
-
|
|
1763
|
-
# Format "<arg_name>-<comma separated indices>-<comma separated types>"
|
|
1764
|
-
data_args_str.append(f"{arg_name}-{partition_indices_str}-{partition_types_str}")
|
|
1765
|
-
|
|
1766
|
-
# Format "{<arg_name>-<comma separated indices>-<comma separated types>}--
|
|
1767
|
-
# {<arg_name>-<comma separated indices>-<comma separated types>}"
|
|
1768
|
-
return "--".join(data_args_str)
|
|
2138
|
+
fit_partition_unique_values = self._tdml_df.drop_duplicate(partition_cols).get_values()
|
|
1769
2139
|
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
Validate if columns in "partition_columns" argument are present in any of the given
|
|
1773
|
-
dataframes.
|
|
1774
|
-
"""
|
|
1775
|
-
invalid_part_cols = [c for c in partition_columns if c not in all_columns]
|
|
2140
|
+
self._extract_model_objs(n_unique_partitions=len(fit_partition_unique_values),
|
|
2141
|
+
n_partition_cols=len(partition_cols))
|
|
1776
2142
|
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
|
|
2143
|
+
except Exception as ex:
|
|
2144
|
+
# File cleanup if script execution fails or unable to fetch modelObj.
|
|
2145
|
+
os.remove(self._script_file_local)
|
|
2146
|
+
self._remove_script_file(self._script_file_name)
|
|
2147
|
+
raise
|
|
1782
2148
|
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
"""
|
|
1787
|
-
with open(os.path.join(self._scripts_path, "sklearn_function.template")) as fp:
|
|
1788
|
-
script_data = fp.read()
|
|
1789
|
-
script_data = script_data.replace("<module_name>",self.__module_name).\
|
|
1790
|
-
replace("<func_name>",self.__func_name).replace("<params>", json.dumps(self.__params))
|
|
2149
|
+
# File cleanup after processing.
|
|
2150
|
+
os.remove(self._script_file_local)
|
|
2151
|
+
self._remove_script_file(self._script_file_name)
|
|
1791
2152
|
|
|
1792
|
-
|
|
2153
|
+
return self.modelObj
|
|
1793
2154
|
|
|
1794
|
-
with open(self._model_file_local, "w") as fp:
|
|
1795
|
-
fp.write(script_data)
|
|
1796
|
-
|
|
1797
|
-
self._install_script_file(file_identifier=self._model_file_name.split(".")[0],
|
|
1798
|
-
file_name=self._model_file_name,
|
|
1799
|
-
file_location=self._tdml_tmp_dir)
|
|
1800
2155
|
|
|
2156
|
+
class _SKLearnFunctionWrapper(_FunctionWrapper):
|
|
2157
|
+
def __init__(self, module_name, func_name):
|
|
2158
|
+
file_type = "file_fn_sklearn"
|
|
2159
|
+
template_file = "sklearn_function.template"
|
|
2160
|
+
super().__init__(module_name, func_name, file_type=file_type, template_file=template_file)
|