teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +193 -1
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +25 -18
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +20 -2
- teradataml/analytics/utils.py +15 -1
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +341 -112
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +84 -42
- teradataml/automl/data_transformation.py +69 -33
- teradataml/automl/feature_engineering.py +76 -9
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +35 -14
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/__init__.py +1 -2
- teradataml/common/constants.py +122 -63
- teradataml/common/messagecodes.py +14 -3
- teradataml/common/messages.py +8 -4
- teradataml/common/sqlbundle.py +40 -10
- teradataml/common/utils.py +366 -74
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +348 -86
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -0
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/vectordistance_example.json +1 -1
- teradataml/dataframe/copy_to.py +45 -29
- teradataml/dataframe/data_transfer.py +72 -46
- teradataml/dataframe/dataframe.py +642 -166
- teradataml/dataframe/dataframe_utils.py +167 -22
- teradataml/dataframe/functions.py +135 -20
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +330 -78
- teradataml/dbutils/dbutils.py +556 -140
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
- teradataml/opensource/_class.py +141 -17
- teradataml/opensource/{constants.py → _constants.py} +7 -3
- teradataml/opensource/_lightgbm.py +52 -53
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +5 -5
- teradataml/options/__init__.py +47 -15
- teradataml/options/configure.py +103 -26
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +307 -40
- teradataml/scriptmgmt/lls_utils.py +428 -145
- teradataml/store/__init__.py +2 -3
- teradataml/store/feature_store/feature_store.py +102 -7
- teradataml/table_operators/Apply.py +48 -19
- teradataml/table_operators/Script.py +23 -2
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +49 -1
- teradataml/utils/internal_buffer.py +38 -0
- teradataml/utils/validators.py +377 -62
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -0
- teradataml/store/vector_store/__init__.py +0 -1586
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
|
@@ -41,6 +41,8 @@ from teradataml.dataframe.sql_functions import case
|
|
|
41
41
|
from teradataml.hyperparameter_tuner.utils import _ProgressBar
|
|
42
42
|
from teradataml.utils.validators import _Validators
|
|
43
43
|
from teradataml.common.utils import UtilFuncs
|
|
44
|
+
from teradataml.common.constants import TeradataConstants
|
|
45
|
+
from teradataml.options.configure import configure
|
|
44
46
|
|
|
45
47
|
|
|
46
48
|
class _FeatureEngineering:
|
|
@@ -132,8 +134,12 @@ class _FeatureEngineering:
|
|
|
132
134
|
self.data_transform_dict = {}
|
|
133
135
|
self.one_hot_obj_count = 0
|
|
134
136
|
self.is_classification_type = lambda: self.task_type.upper() == 'CLASSIFICATION'
|
|
135
|
-
self.volatile = kwargs.get('volatile', False)
|
|
136
137
|
self.persist = kwargs.get('persist', False)
|
|
138
|
+
self.volatile = kwargs.get('volatile', False) or (configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE and self.persist is False)
|
|
139
|
+
|
|
140
|
+
self.data_mapping = {}
|
|
141
|
+
self.progress_prefix = kwargs.get('progress_prefix', None)
|
|
142
|
+
self.aml_phases = kwargs.get('automl_phases', None)
|
|
137
143
|
|
|
138
144
|
# Method for doing feature engineering on data -> adding id, removing futile col, imputation, encoding(one hot)
|
|
139
145
|
def feature_engineering(self,
|
|
@@ -159,7 +165,7 @@ class _FeatureEngineering:
|
|
|
159
165
|
second element represents list of columns which are not participating in outlier tranformation.
|
|
160
166
|
"""
|
|
161
167
|
# Assigning number of base jobs for progress bar.
|
|
162
|
-
base_jobs =
|
|
168
|
+
base_jobs = 12 if auto else 17
|
|
163
169
|
|
|
164
170
|
# Updating model list based on distinct value of target column for classification type
|
|
165
171
|
if self.is_classification_type():
|
|
@@ -169,10 +175,14 @@ class _FeatureEngineering:
|
|
|
169
175
|
|
|
170
176
|
# Updating number of jobs for progress bar based on number of models.
|
|
171
177
|
jobs = base_jobs + len(self.model_list)
|
|
172
|
-
self.progress_bar = _ProgressBar(jobs=jobs,
|
|
178
|
+
self.progress_bar = _ProgressBar(jobs=jobs,
|
|
179
|
+
verbose=2,
|
|
180
|
+
prefix=self.progress_prefix)
|
|
173
181
|
|
|
174
182
|
self._display_heading(phase=1,
|
|
175
|
-
progress_bar=self.progress_bar
|
|
183
|
+
progress_bar=self.progress_bar,
|
|
184
|
+
automl_phases=self.aml_phases)
|
|
185
|
+
|
|
176
186
|
self._display_msg(msg='Feature Engineering started ...',
|
|
177
187
|
progress_bar=self.progress_bar)
|
|
178
188
|
|
|
@@ -236,7 +246,7 @@ class _FeatureEngineering:
|
|
|
236
246
|
self._non_linear_transformation()
|
|
237
247
|
self.progress_bar.update()
|
|
238
248
|
|
|
239
|
-
return self.data, self.excluded_cols, self.target_label, self.data_transform_dict
|
|
249
|
+
return self.data, self.excluded_cols, self.target_label, self.data_transform_dict, self.data_mapping
|
|
240
250
|
|
|
241
251
|
def _extract_list(self,
|
|
242
252
|
list1,
|
|
@@ -260,6 +270,11 @@ class _FeatureEngineering:
|
|
|
260
270
|
Returns extracted elements in form of list.
|
|
261
271
|
|
|
262
272
|
"""
|
|
273
|
+
# Ensure list1 and list2 are lists, default to empty list if None
|
|
274
|
+
if list1 is None:
|
|
275
|
+
list1 = []
|
|
276
|
+
if list2 is None:
|
|
277
|
+
list2 = []
|
|
263
278
|
new_lst = list(set(list1) - set(list2))
|
|
264
279
|
return new_lst
|
|
265
280
|
|
|
@@ -348,12 +363,10 @@ class _FeatureEngineering:
|
|
|
348
363
|
|
|
349
364
|
# Detecting and removing futile columns, if categorical_column exists
|
|
350
365
|
if len(categorical_columns) != 0:
|
|
351
|
-
|
|
352
366
|
obj = CategoricalSummary(data=self.data,
|
|
353
367
|
target_columns=categorical_columns,
|
|
354
368
|
volatile=self.volatile,
|
|
355
369
|
persist=self.persist)
|
|
356
|
-
|
|
357
370
|
gfc_out = GetFutileColumns(data=self.data,
|
|
358
371
|
object=obj,
|
|
359
372
|
category_summary_column="ColumnName",
|
|
@@ -363,6 +376,9 @@ class _FeatureEngineering:
|
|
|
363
376
|
|
|
364
377
|
# Extracting Futile columns
|
|
365
378
|
f_cols = [row[0] for row in gfc_out.result.itertuples()]
|
|
379
|
+
|
|
380
|
+
self.data_mapping['categorical_summary'] = obj.result._table_name
|
|
381
|
+
self.data_mapping['futile_columns'] = gfc_out.result._table_name
|
|
366
382
|
|
|
367
383
|
if len(f_cols) == 0:
|
|
368
384
|
self._display_msg(inline_msg="Analysis indicates all categorical columns are significant. No action Needed.",
|
|
@@ -372,6 +388,15 @@ class _FeatureEngineering:
|
|
|
372
388
|
self.data = self.data.drop(f_cols, axis=1)
|
|
373
389
|
# Storing futile column list in data transform dictionary
|
|
374
390
|
self.data_transform_dict['futile_columns'] = f_cols
|
|
391
|
+
|
|
392
|
+
if self.persist:
|
|
393
|
+
table_name = UtilFuncs._generate_temp_table_name(table_type=TeradataConstants.TERADATA_TABLE,
|
|
394
|
+
gc_on_quit=False)
|
|
395
|
+
self.data.to_sql(table_name)
|
|
396
|
+
else:
|
|
397
|
+
self.data.materialize()
|
|
398
|
+
|
|
399
|
+
self.data_mapping['data_without_futile_columns'] = self.data._table_name
|
|
375
400
|
self._display_msg(msg='Removing Futile columns:',
|
|
376
401
|
col_lst=f_cols,
|
|
377
402
|
progress_bar=self.progress_bar)
|
|
@@ -547,6 +572,13 @@ class _FeatureEngineering:
|
|
|
547
572
|
# Storing date column list in data transform dictionary
|
|
548
573
|
self.data_transform_dict['date_columns'] = self.date_column_list
|
|
549
574
|
self._handle_date_columns_helper()
|
|
575
|
+
if self.persist:
|
|
576
|
+
table_name = UtilFuncs._generate_temp_table_name(table_type=TeradataConstants.TERADATA_TABLE,
|
|
577
|
+
gc_on_quit=False)
|
|
578
|
+
self.data.to_sql(table_name)
|
|
579
|
+
else:
|
|
580
|
+
self.data.materialize()
|
|
581
|
+
self.data_mapping['data_after_date_handling'] = self.data._table_name
|
|
550
582
|
|
|
551
583
|
end_time = time.time()
|
|
552
584
|
self._display_msg(msg="Total time to handle date features: {:.2f} sec\n".format(end_time-start_time),
|
|
@@ -760,6 +792,9 @@ class _FeatureEngineering:
|
|
|
760
792
|
persist=self.persist)
|
|
761
793
|
|
|
762
794
|
self.data = sm.result
|
|
795
|
+
self.data_mapping['fit_simpleimpute_output'] = fit_obj.output_data._table_name
|
|
796
|
+
self.data_mapping['fit_simpleimpute_result'] = fit_obj.output._table_name
|
|
797
|
+
self.data_mapping['data_without_missing_values'] = self.data._table_name
|
|
763
798
|
self._display_msg(msg="Sample of dataset after Imputation:",
|
|
764
799
|
data=self.data,
|
|
765
800
|
progress_bar=self.progress_bar)
|
|
@@ -872,6 +907,11 @@ class _FeatureEngineering:
|
|
|
872
907
|
transform_param["persist"] = False
|
|
873
908
|
# Updating dataset with transform result
|
|
874
909
|
self.data = SimpleImputeTransform(**transform_param).result
|
|
910
|
+
|
|
911
|
+
self.data_mapping['fit_simpleimpute_output'] = fit_obj.output_data._table_name
|
|
912
|
+
self.data_mapping['fit_simpleimpute_result'] = fit_obj.output._table_name
|
|
913
|
+
self.data_mapping['data_without_missing_values'] = self.data._table_name
|
|
914
|
+
|
|
875
915
|
if not volatile and not persist:
|
|
876
916
|
# Adding transformed data containing table to garbage collector
|
|
877
917
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
@@ -978,6 +1018,10 @@ class _FeatureEngineering:
|
|
|
978
1018
|
if not volatile and not persist:
|
|
979
1019
|
# Adding transformed data containing table to garbage collector
|
|
980
1020
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
1021
|
+
|
|
1022
|
+
self.data_mapping['fit_eql_width'] = eql_bin_code_fit.output._table_name
|
|
1023
|
+
self.data_mapping['eql_width_bincoded_data'] = self.data._table_name
|
|
1024
|
+
|
|
981
1025
|
self._display_msg(msg="\nUpdated dataset sample after performing Equal-Width binning :-",
|
|
982
1026
|
data=self.data,
|
|
983
1027
|
progress_bar=self.progress_bar)
|
|
@@ -1026,6 +1070,8 @@ class _FeatureEngineering:
|
|
|
1026
1070
|
var_transform_params["volatile"] = True
|
|
1027
1071
|
var_transform_params["persist"] = False
|
|
1028
1072
|
self.data = BincodeTransform(**var_transform_params).result
|
|
1073
|
+
self.data_mapping['fit_var_width'] = var_bin_code_fit.output._table_name
|
|
1074
|
+
self.data_mapping['var_width_bincoded_data'] = self.data._table_name
|
|
1029
1075
|
if not volatile and not persist:
|
|
1030
1076
|
# Adding transformed data containing table to garbage collector
|
|
1031
1077
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
@@ -1147,6 +1193,7 @@ class _FeatureEngineering:
|
|
|
1147
1193
|
if not volatile and not persist:
|
|
1148
1194
|
# Adding transformed data containing table to garbage collector
|
|
1149
1195
|
GarbageCollector._add_to_garbagecollector(transform_output._table_name)
|
|
1196
|
+
self.data_mapping['string_manipulated_data'] = transform_output._table_name
|
|
1150
1197
|
return transform_output
|
|
1151
1198
|
|
|
1152
1199
|
def _one_hot_encoding(self,
|
|
@@ -1216,6 +1263,9 @@ class _FeatureEngineering:
|
|
|
1216
1263
|
# Adding transformed data containing table to garbage collector
|
|
1217
1264
|
GarbageCollector._add_to_garbagecollector(transform_output._table_name)
|
|
1218
1265
|
self.data = transform_output.drop(drop_lst, axis=1)
|
|
1266
|
+
self.data.materialize()
|
|
1267
|
+
self.data_mapping['one_hot_encoded_data'] = transform_output._table_name
|
|
1268
|
+
self.data_mapping['fit_ohe_result'] = fit_obj.result._table_name
|
|
1219
1269
|
|
|
1220
1270
|
def _ordinal_encoding(self,
|
|
1221
1271
|
ordinal_columns):
|
|
@@ -1273,6 +1323,10 @@ class _FeatureEngineering:
|
|
|
1273
1323
|
# Adding transformed data containing table to garbage collector
|
|
1274
1324
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
1275
1325
|
|
|
1326
|
+
self.data_mapping['fit_ordinal_output'] = ord_fit_obj.output_data._table_name
|
|
1327
|
+
self.data_mapping['fit_ordinal_result'] = ord_fit_obj.result._table_name
|
|
1328
|
+
self.data_mapping['ordinal_encoded_data'] = self.data._table_name
|
|
1329
|
+
|
|
1276
1330
|
if len(ordinal_columns) == 1 and ordinal_columns[0] == self.target_column:
|
|
1277
1331
|
self.target_label = ord_fit_obj
|
|
1278
1332
|
|
|
@@ -1319,6 +1373,7 @@ class _FeatureEngineering:
|
|
|
1319
1373
|
"encoder_method" : encoder_method,
|
|
1320
1374
|
"target_columns" : col,
|
|
1321
1375
|
"response_column" : response_column,
|
|
1376
|
+
"default_values": -1,
|
|
1322
1377
|
"volatile" : volatile,
|
|
1323
1378
|
"persist" : persist
|
|
1324
1379
|
}
|
|
@@ -1352,6 +1407,9 @@ class _FeatureEngineering:
|
|
|
1352
1407
|
if not volatile and not persist:
|
|
1353
1408
|
# Adding transformed data containing table to garbage collector
|
|
1354
1409
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
1410
|
+
self.data_mapping[f'fit_{col}_target_output'] = tar_fit_obj.output_data._table_name
|
|
1411
|
+
self.data_mapping[f'fit_{col}_target_result'] = tar_fit_obj.result._table_name
|
|
1412
|
+
self.data_mapping[f'{col}_target_encoded_data'] = self.data._table_name
|
|
1355
1413
|
|
|
1356
1414
|
def _encoding_categorical_columns(self):
|
|
1357
1415
|
"""
|
|
@@ -1584,6 +1642,9 @@ class _FeatureEngineering:
|
|
|
1584
1642
|
if not volatile and not persist:
|
|
1585
1643
|
# Adding transformed data containing table to garbage collector
|
|
1586
1644
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
1645
|
+
|
|
1646
|
+
self.data_mapping['fit_numerical_result'] = num_fit_obj.result._table_name
|
|
1647
|
+
self.data_mapping['numerical_transformed_data'] = self.data._table_name
|
|
1587
1648
|
self._display_msg(msg="Updated dataset sample after applying numerical transformation:",
|
|
1588
1649
|
data=self.data,
|
|
1589
1650
|
progress_bar=self.progress_bar)
|
|
@@ -1624,6 +1685,7 @@ class _FeatureEngineering:
|
|
|
1624
1685
|
if apply_method in (["sininv","sigmoid"]):
|
|
1625
1686
|
# Applying numapply transformation
|
|
1626
1687
|
self.data = self._numapply_transformation(col,transform_val)
|
|
1688
|
+
self.data_mapping[f'{apply_method}_transformed_data'] = self.data._table_name
|
|
1627
1689
|
self._display_msg(msg="Updated dataset sample after applying numapply transformation:",
|
|
1628
1690
|
data=self.data,
|
|
1629
1691
|
progress_bar=self.progress_bar)
|
|
@@ -1728,6 +1790,10 @@ class _FeatureEngineering:
|
|
|
1728
1790
|
transform_params["persist"] = False
|
|
1729
1791
|
self.data = NonLinearCombineTransform(**transform_params).result
|
|
1730
1792
|
|
|
1793
|
+
self.data_mapping[f'fit_nonlinear_{comb}_output'] = fit_obj.output_data._table_name
|
|
1794
|
+
self.data_mapping[f'fit_nonlinear_{comb}_result'] = fit_obj.result._table_name
|
|
1795
|
+
self.data_mapping['non_linear_transformed_data'] = self.data._table_name
|
|
1796
|
+
|
|
1731
1797
|
if not volatile and not persist:
|
|
1732
1798
|
# Adding transformed data containing table to garbage collector
|
|
1733
1799
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
@@ -1810,10 +1876,11 @@ class _FeatureEngineering:
|
|
|
1810
1876
|
RETURNS:
|
|
1811
1877
|
Tuple containing volatile and persist parameters.
|
|
1812
1878
|
"""
|
|
1813
|
-
|
|
1879
|
+
# Prioritizing persist argument and then volatile
|
|
1814
1880
|
persist = self.persist
|
|
1881
|
+
volatile = self.volatile or (configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE and persist is False)
|
|
1815
1882
|
if self.custom_data is not None and self.custom_data.get(func_indicator, False):
|
|
1816
1883
|
volatile = self.custom_data[param_name].get("volatile", False)
|
|
1817
1884
|
persist = self.custom_data[param_name].get("persist", False)
|
|
1818
1885
|
|
|
1819
|
-
return (volatile, persist)
|
|
1886
|
+
return (volatile, persist)
|